Page MenuHomeFreeBSD

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
Index: head/ObsoleteFiles.inc
===================================================================
--- head/ObsoleteFiles.inc (revision 265924)
+++ head/ObsoleteFiles.inc (revision 265925)
@@ -1,6316 +1,6353 @@
#
# $FreeBSD$
#
# This file lists old files (OLD_FILES), libraries (OLD_LIBS) and
# directories (OLD_DIRS) which should get removed at an update. Recently
# removed entries first (with the date as a comment). Dynamic libraries are
# special cased (OLD_LIBS). Static libraries or the generic links to
# the dynamic libraries (lib*.so) should (if you don't know why to make an
# exception, make this a "must") be viewed as normal files (OLD_FILES).
#
# In case of a complete directory hierarchy the sorting is in depth first
# order.
#
# The file is partitioned: OLD_FILES first, then OLD_LIBS and OLD_DIRS last.
#
# Before you commit changes to this file please check if any entries in
# tools/build/mk/OptionalObsoleteFiles.inc can be removed. The following
# command tells which files are listed more than once regardless of some
# architecture specific conditionals, so you can not blindly trust the
# output:
# ( grep '+=' /usr/src/ObsoleteFiles.inc | sort -u ; \
# grep '+=' /usr/src/tools/build/mk/OptionalObsoleteFiles.inc | sort -u) | \
# sort | uniq -d
#
# To find regular duplicates not dependant on optional components, you can
# also use something that will not give you false positives, e.g.:
# for t in `make -V TARGETS universe`; do
# __MAKE_CONF=/dev/null make -f Makefile.inc1 TARGET=$t \
# -V OLD_FILES -V OLD_LIBS -V OLD_DIRS check-old | \
# xargs -n1 | sort | uniq -d;
# done
#
# For optional components, you can use the following to see if some entries
# in OptionalObsoleteFiles.inc have been obsoleted by ObsoleteFiles.inc
# for o in tools/build/options/WITH*; do
# __MAKE_CONF=/dev/null make -f Makefile.inc1 -D${o##*/} \
# -V OLD_FILES -V OLD_LIBS -V OLD_DIRS check-old | \
# xargs -n1 | sort | uniq -d;
# done
+# 20140512: new clang import which bumps version from 3.4 to 3.4.1.
+OLD_FILES+=usr/include/clang/3.4/__wmmintrin_aes.h
+OLD_FILES+=usr/include/clang/3.4/__wmmintrin_pclmul.h
+OLD_FILES+=usr/include/clang/3.4/altivec.h
+OLD_FILES+=usr/include/clang/3.4/ammintrin.h
+OLD_FILES+=usr/include/clang/3.4/avx2intrin.h
+OLD_FILES+=usr/include/clang/3.4/avxintrin.h
+OLD_FILES+=usr/include/clang/3.4/bmi2intrin.h
+OLD_FILES+=usr/include/clang/3.4/bmiintrin.h
+OLD_FILES+=usr/include/clang/3.4/cpuid.h
+OLD_FILES+=usr/include/clang/3.4/emmintrin.h
+OLD_FILES+=usr/include/clang/3.4/f16cintrin.h
+OLD_FILES+=usr/include/clang/3.4/fma4intrin.h
+OLD_FILES+=usr/include/clang/3.4/fmaintrin.h
+OLD_FILES+=usr/include/clang/3.4/immintrin.h
+OLD_FILES+=usr/include/clang/3.4/lzcntintrin.h
+OLD_FILES+=usr/include/clang/3.4/mm3dnow.h
+OLD_FILES+=usr/include/clang/3.4/mm_malloc.h
+OLD_FILES+=usr/include/clang/3.4/mmintrin.h
+OLD_FILES+=usr/include/clang/3.4/module.map
+OLD_FILES+=usr/include/clang/3.4/nmmintrin.h
+OLD_FILES+=usr/include/clang/3.4/pmmintrin.h
+OLD_FILES+=usr/include/clang/3.4/popcntintrin.h
+OLD_FILES+=usr/include/clang/3.4/prfchwintrin.h
+OLD_FILES+=usr/include/clang/3.4/rdseedintrin.h
+OLD_FILES+=usr/include/clang/3.4/rtmintrin.h
+OLD_FILES+=usr/include/clang/3.4/shaintrin.h
+OLD_FILES+=usr/include/clang/3.4/smmintrin.h
+OLD_FILES+=usr/include/clang/3.4/tbmintrin.h
+OLD_FILES+=usr/include/clang/3.4/tmmintrin.h
+OLD_FILES+=usr/include/clang/3.4/wmmintrin.h
+OLD_FILES+=usr/include/clang/3.4/x86intrin.h
+OLD_FILES+=usr/include/clang/3.4/xmmintrin.h
+OLD_FILES+=usr/include/clang/3.4/xopintrin.h
+OLD_FILES+=usr/include/clang/3.4/arm_neon.h
+OLD_FILES+=usr/include/clang/3.4/module.map
+OLD_DIRS+=usr/include/clang/3.4
# 20140505: Bogusly installing src.opts.mk
OLD_FILES+=usr/share/mk/src.opts.mk
# 20140505: Reject PR kern/187551
OLD_DIRS+=usr/tests/sbin/ifconfig
OLD_FILES+=usr/tests/sbin/ifconfig/Kyuafile
OLD_FILES+=usr/tests/sbin/ifconfig/fibs_test
# 20140502: Removal of lindev(4)
OLD_FILES+=usr/share/man/man4/lindev.4.gz
# 20140314: AppleTalk
OLD_DIRS+=usr/include/netatalk
OLD_FILES+=usr/include/netatalk/aarp.h
OLD_FILES+=usr/include/netatalk/at.h
OLD_FILES+=usr/include/netatalk/at_extern.h
OLD_FILES+=usr/include/netatalk/at_var.h
OLD_FILES+=usr/include/netatalk/ddp.h
OLD_FILES+=usr/include/netatalk/ddp_pcb.h
OLD_FILES+=usr/include/netatalk/ddp_var.h
OLD_FILES+=usr/include/netatalk/endian.h
OLD_FILES+=usr/include/netatalk/phase2.h
# 20140314: Remove IPX/SPX
OLD_LIBS+=lib/libipx.so.5
OLD_FILES+=usr/include/netipx/ipx.h
OLD_FILES+=usr/include/netipx/ipx_if.h
OLD_FILES+=usr/include/netipx/ipx_pcb.h
OLD_FILES+=usr/include/netipx/ipx_var.h
OLD_FILES+=usr/include/netipx/spx.h
OLD_FILES+=usr/include/netipx/spx_debug.h
OLD_FILES+=usr/include/netipx/spx_timer.h
OLD_FILES+=usr/include/netipx/spx_var.h
OLD_DIRS+=usr/include/netipx
OLD_FILES+=usr/lib/libipx.a
OLD_FILES+=usr/lib/libipx.so
OLD_FILES+=usr/lib/libipx_p.a
OLD_FILES+=usr/lib32/libipx.a
OLD_FILES+=usr/lib32/libipx.so
OLD_LIBS+=usr/lib32/libipx.so.5
OLD_FILES+=usr/lib32/libipx_p.a
OLD_FILES+=usr/sbin/IPXrouted
OLD_FILES+=usr/share/man/man3/ipx.3.gz
OLD_FILES+=usr/share/man/man3/ipx_addr.3.gz
OLD_FILES+=usr/share/man/man3/ipx_ntoa.3.gz
OLD_FILES+=usr/share/man/man4/ef.4.gz
OLD_FILES+=usr/share/man/man4/if_ef.4.gz
OLD_FILES+=usr/share/man/man8/IPXrouted.8.gz
# 20140314: bsdconfig usermgmt rewrite
OLD_FILES+=usr/libexec/bsdconfig/070.usermgmt/userinput
# 20140307: bsdconfig groupmgmt rewrite
OLD_FILES+=usr/libexec/bsdconfig/070.usermgmt/groupinput
# 20140223: Remove libyaml
OLD_FILES+=usr/lib/private/libyaml.a
OLD_FILES+=usr/lib/private/libyaml.so
OLD_FILES+=usr/lib/private/libyaml.so.1
OLD_FILES+=usr/lib/private/libyaml_p.a
# 20140216: new clang import which bumps version from 3.3 to 3.4.
OLD_FILES+=usr/bin/llvm-prof
OLD_FILES+=usr/bin/llvm-ranlib
OLD_FILES+=usr/include/clang/3.3/__wmmintrin_aes.h
OLD_FILES+=usr/include/clang/3.3/__wmmintrin_pclmul.h
OLD_FILES+=usr/include/clang/3.3/altivec.h
OLD_FILES+=usr/include/clang/3.3/ammintrin.h
OLD_FILES+=usr/include/clang/3.3/avx2intrin.h
OLD_FILES+=usr/include/clang/3.3/avxintrin.h
OLD_FILES+=usr/include/clang/3.3/bmi2intrin.h
OLD_FILES+=usr/include/clang/3.3/bmiintrin.h
OLD_FILES+=usr/include/clang/3.3/cpuid.h
OLD_FILES+=usr/include/clang/3.3/emmintrin.h
OLD_FILES+=usr/include/clang/3.3/f16cintrin.h
OLD_FILES+=usr/include/clang/3.3/fma4intrin.h
OLD_FILES+=usr/include/clang/3.3/fmaintrin.h
OLD_FILES+=usr/include/clang/3.3/immintrin.h
OLD_FILES+=usr/include/clang/3.3/lzcntintrin.h
OLD_FILES+=usr/include/clang/3.3/mm3dnow.h
OLD_FILES+=usr/include/clang/3.3/mm_malloc.h
OLD_FILES+=usr/include/clang/3.3/mmintrin.h
OLD_FILES+=usr/include/clang/3.3/module.map
OLD_FILES+=usr/include/clang/3.3/nmmintrin.h
OLD_FILES+=usr/include/clang/3.3/pmmintrin.h
OLD_FILES+=usr/include/clang/3.3/popcntintrin.h
OLD_FILES+=usr/include/clang/3.3/prfchwintrin.h
OLD_FILES+=usr/include/clang/3.3/rdseedintrin.h
OLD_FILES+=usr/include/clang/3.3/rtmintrin.h
OLD_FILES+=usr/include/clang/3.3/smmintrin.h
OLD_FILES+=usr/include/clang/3.3/tmmintrin.h
OLD_FILES+=usr/include/clang/3.3/wmmintrin.h
OLD_FILES+=usr/include/clang/3.3/x86intrin.h
OLD_FILES+=usr/include/clang/3.3/xmmintrin.h
OLD_FILES+=usr/include/clang/3.3/xopintrin.h
OLD_FILES+=usr/share/man/man1/llvm-prof.1.gz
OLD_FILES+=usr/share/man/man1/llvm-ranlib.1.gz
OLD_DIRS+=usr/include/clang/3.3
# 20140216: nve(4) removed
OLD_FILES+=usr/share/man/man4/if_nve.4.gz
OLD_FILES+=usr/share/man/man4/nve.4.gz
# 20140205: Open Firmware device moved
OLD_FILES+=usr/include/dev/ofw/ofw_nexus.h
# 20140128: libelf and libdwarf import
OLD_LIBS+=usr/lib/libelf.so.1
OLD_LIBS+=usr/lib32/libelf.so.1
OLD_LIBS+=usr/lib/libdwarf.so.3
OLD_LIBS+=usr/lib32/libdwarf.so.3
# 20140123: apicvar header moved to x86
OLD_FILES+=usr/include/machine/apicvar.h
# 20131215: libcam version bumped
OLD_LIBS+=lib/libcam.so.6 usr/lib32/libcam.so.6
# 20131202: libcapsicum and libcasper moved to /lib/
OLD_LIBS+=usr/lib/libcapsicum.so.0
OLD_LIBS+=usr/lib/libcasper.so.0
# 20131109: extattr(2) mlinks fixed
OLD_FILES+=usr/share/man/man2/extattr_delete_list.2.gz
OLD_FILES+=usr/share/man/man2/extattr_get_list.2.gz
# 20131107: example files removed
OLD_FILES+=usr/share/examples/libusb20/aux.c
OLD_FILES+=usr/share/examples/libusb20/aux.h
# 20131105: tzdata 2013h import
OLD_FILES+=usr/share/zoneinfo/America/Shiprock
OLD_FILES+=usr/share/zoneinfo/Antarctica/South_Pole
# 20131103: WITH_LIBICONV_COMPAT removal
OLD_FILES+=usr/include/_libiconv_compat.h
OLD_FILES+=usr/lib/libiconv.a
OLD_FILES+=usr/lib/libiconv.so
OLD_FILES+=usr/lib/libiconv.so.3
OLD_FILES+=usr/lib/libiconv_p.a
OLD_FILES+=usr/lib32/libiconv.a
OLD_FILES+=usr/lib32/libiconv.so
OLD_FILES+=usr/lib32/libiconv.so.3
OLD_FILES+=usr/lib32/libiconv_p.a
# 20131103: removal of utxrm(8), use 'utx rm' instead.
OLD_FILES+=usr/sbin/utxrm
OLD_FILES+=usr/share/man/man8/utxrm.8.gz
# 20131031: pkg_install has been removed
OLD_FILES+=etc/periodic/daily/220.backup-pkgdb
OLD_FILES+=etc/periodic/daily/490.status-pkg-changes
OLD_FILES+=etc/periodic/security/460.chkportsum
OLD_FILES+=etc/periodic/weekly/400.status-pkg
OLD_FILES+=usr/sbin/pkg_add
OLD_FILES+=usr/sbin/pkg_create
OLD_FILES+=usr/sbin/pkg_delete
OLD_FILES+=usr/sbin/pkg_info
OLD_FILES+=usr/sbin/pkg_updating
OLD_FILES+=usr/sbin/pkg_version
OLD_FILES+=usr/share/man/man1/pkg_add.1.gz
OLD_FILES+=usr/share/man/man1/pkg_create.1.gz
OLD_FILES+=usr/share/man/man1/pkg_delete.1.gz
OLD_FILES+=usr/share/man/man1/pkg_info.1.gz
OLD_FILES+=usr/share/man/man1/pkg_updating.1.gz
OLD_FILES+=usr/share/man/man1/pkg_version.1.gz
# 20131030: /etc/keys moved to /usr/share/keys
OLD_DIRS+=etc/keys
OLD_DIRS+=etc/keys/pkg
OLD_DIRS+=etc/keys/pkg/revoked
OLD_DIRS+=etc/keys/pkg/trusted
OLD_FILES+=etc/keys/pkg/trusted/pkg.freebsd.org.2013102301
# 20131028: ng_fec(4) removed
OLD_FILES+=usr/include/netgraph/ng_fec.h
OLD_FILES+=usr/share/man/man4/ng_fec.4.gz
# 20131027: header moved
OLD_FILES+=usr/include/net/pf_mtag.h
# 20131023: remove never used iscsi directory
OLD_DIRS+=usr/share/examples/iscsi
# 20131021: isf(4) removed
OLD_FILES+=usr/sbin/isfctl
OLD_FILES+=usr/share/man/man4/isf.4.gz
OLD_FILES+=usr/share/man/man8/isfctl.8.gz
# 20131014: libbsdyml becomes private
OLD_FILES+=usr/lib/libbsdyml.a
OLD_FILES+=usr/lib/libbsdyml.so
OLD_LIBS+=usr/lib/libbsdyml.so.0
OLD_FILES+=usr/lib/libbsdyml_p.a
OLD_FILES+=usr/lib32/libbsdyml.a
OLD_FILES+=usr/lib32/libbsdyml.so
OLD_LIBS+=usr/lib32/libbsdyml.so.0
OLD_FILES+=usr/lib32/libbsdyml_p.a
OLD_FILES+=usr/share/man/man3/libbsdyml.3.gz
OLD_FILES+=usr/include/bsdyml.h
# 20131013: Removal of the ATF tools
OLD_FILES+=etc/atf/FreeBSD.conf
OLD_FILES+=etc/atf/atf-run.hooks
OLD_FILES+=etc/atf/common.conf
OLD_FILES+=usr/bin/atf-config
OLD_FILES+=usr/bin/atf-report
OLD_FILES+=usr/bin/atf-run
OLD_FILES+=usr/bin/atf-version
OLD_FILES+=usr/share/atf/atf-run.hooks
OLD_FILES+=usr/share/examples/atf/atf-run.hooks
OLD_FILES+=usr/share/examples/atf/tests-results.css
OLD_FILES+=usr/share/man/man1/atf-config.1.gz
OLD_FILES+=usr/share/man/man1/atf-report.1.gz
OLD_FILES+=usr/share/man/man1/atf-run.1.gz
OLD_FILES+=usr/share/man/man1/atf-version.1.gz
OLD_FILES+=usr/share/man/man5/atf-formats.5.gz
OLD_FILES+=usr/share/man/man7/atf.7.gz
OLD_FILES+=usr/share/xml/atf/tests-results.dtd
OLD_FILES+=usr/share/xsl/atf/tests-results.xsl
# 20131009: freebsd-version moved from /libexec to /bin
OLD_FILES+=libexec/freebsd-version
# 20131001: ar and ranlib from binutils not used
OLD_FILES+=usr/bin/gnu-ar
OLD_FILES+=usr/bin/gnu-ranlib
OLD_FILES+=usr/share/man/man1/gnu-ar.1.gz
OLD_FILES+=usr/share/man/man1/gnu-ranlib.1.gz
# 20130930: BIND removed from base
OLD_FILES+=etc/mtree/BIND.chroot.dist
OLD_FILES+=etc/namedb
OLD_FILES+=etc/periodic/daily/470.status-named
OLD_FILES+=usr/bin/dig
OLD_FILES+=usr/bin/nslookup
OLD_FILES+=usr/bin/nsupdate
OLD_DIRS+=usr/include/lwres
OLD_FILES+=usr/include/lwres/context.h
OLD_FILES+=usr/include/lwres/int.h
OLD_FILES+=usr/include/lwres/ipv6.h
OLD_FILES+=usr/include/lwres/lang.h
OLD_FILES+=usr/include/lwres/list.h
OLD_FILES+=usr/include/lwres/lwbuffer.h
OLD_FILES+=usr/include/lwres/lwpacket.h
OLD_FILES+=usr/include/lwres/lwres.h
OLD_FILES+=usr/include/lwres/net.h
OLD_FILES+=usr/include/lwres/netdb.h
OLD_FILES+=usr/include/lwres/platform.h
OLD_FILES+=usr/include/lwres/result.h
OLD_FILES+=usr/include/lwres/version.h
OLD_FILES+=usr/lib/liblwres.a
OLD_FILES+=usr/lib/liblwres.so
OLD_LIBS+=usr/lib/liblwres.so.90
OLD_FILES+=usr/lib/liblwres_p.a
OLD_FILES+=usr/sbin/arpaname
OLD_FILES+=usr/sbin/ddns-confgen
OLD_FILES+=usr/sbin/dnssec-dsfromkey
OLD_FILES+=usr/sbin/dnssec-keyfromlabel
OLD_FILES+=usr/sbin/dnssec-keygen
OLD_FILES+=usr/sbin/dnssec-revoke
OLD_FILES+=usr/sbin/dnssec-settime
OLD_FILES+=usr/sbin/dnssec-signzone
OLD_FILES+=usr/sbin/dnssec-verify
OLD_FILES+=usr/sbin/genrandom
OLD_FILES+=usr/sbin/isc-hmac-fixup
OLD_FILES+=usr/sbin/lwresd
OLD_FILES+=usr/sbin/named
OLD_FILES+=usr/sbin/named-checkconf
OLD_FILES+=usr/sbin/named-checkzone
OLD_FILES+=usr/sbin/named-compilezone
OLD_FILES+=usr/sbin/named-journalprint
OLD_FILES+=usr/sbin/named.reconfig
OLD_FILES+=usr/sbin/named.reload
OLD_FILES+=usr/sbin/nsec3hash
OLD_FILES+=usr/sbin/rndc
OLD_FILES+=usr/sbin/rndc-confgen
OLD_DIRS+=usr/share/doc/bind9
OLD_FILES+=usr/share/doc/bind9/CHANGES
OLD_FILES+=usr/share/doc/bind9/COPYRIGHT
OLD_FILES+=usr/share/doc/bind9/FAQ
OLD_FILES+=usr/share/doc/bind9/HISTORY
OLD_FILES+=usr/share/doc/bind9/README
OLD_DIRS+=usr/share/doc/bind9/arm
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.ch01.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.ch02.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.ch03.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.ch04.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.ch05.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.ch06.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.ch07.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.ch08.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.ch09.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.ch10.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.html
OLD_FILES+=usr/share/doc/bind9/arm/Bv9ARM.pdf
OLD_FILES+=usr/share/doc/bind9/arm/man.arpaname.html
OLD_FILES+=usr/share/doc/bind9/arm/man.ddns-confgen.html
OLD_FILES+=usr/share/doc/bind9/arm/man.dig.html
OLD_FILES+=usr/share/doc/bind9/arm/man.dnssec-dsfromkey.html
OLD_FILES+=usr/share/doc/bind9/arm/man.dnssec-keyfromlabel.html
OLD_FILES+=usr/share/doc/bind9/arm/man.dnssec-keygen.html
OLD_FILES+=usr/share/doc/bind9/arm/man.dnssec-revoke.html
OLD_FILES+=usr/share/doc/bind9/arm/man.dnssec-settime.html
OLD_FILES+=usr/share/doc/bind9/arm/man.dnssec-signzone.html
OLD_FILES+=usr/share/doc/bind9/arm/man.dnssec-verify.html
OLD_FILES+=usr/share/doc/bind9/arm/man.genrandom.html
OLD_FILES+=usr/share/doc/bind9/arm/man.host.html
OLD_FILES+=usr/share/doc/bind9/arm/man.isc-hmac-fixup.html
OLD_FILES+=usr/share/doc/bind9/arm/man.named-checkconf.html
OLD_FILES+=usr/share/doc/bind9/arm/man.named-checkzone.html
OLD_FILES+=usr/share/doc/bind9/arm/man.named-journalprint.html
OLD_FILES+=usr/share/doc/bind9/arm/man.named.html
OLD_FILES+=usr/share/doc/bind9/arm/man.nsec3hash.html
OLD_FILES+=usr/share/doc/bind9/arm/man.nsupdate.html
OLD_FILES+=usr/share/doc/bind9/arm/man.rndc-confgen.html
OLD_FILES+=usr/share/doc/bind9/arm/man.rndc.conf.html
OLD_FILES+=usr/share/doc/bind9/arm/man.rndc.html
OLD_DIRS+=usr/share/doc/bind9/misc
OLD_FILES+=usr/share/doc/bind9/misc/dnssec
OLD_FILES+=usr/share/doc/bind9/misc/format-options.pl
OLD_FILES+=usr/share/doc/bind9/misc/ipv6
OLD_FILES+=usr/share/doc/bind9/misc/migration
OLD_FILES+=usr/share/doc/bind9/misc/migration-4to9
OLD_FILES+=usr/share/doc/bind9/misc/options
OLD_FILES+=usr/share/doc/bind9/misc/rfc-compliance
OLD_FILES+=usr/share/doc/bind9/misc/roadmap
OLD_FILES+=usr/share/doc/bind9/misc/sdb
OLD_FILES+=usr/share/doc/bind9/misc/sort-options.pl
OLD_FILES+=usr/share/man/man1/arpaname.1.gz
OLD_FILES+=usr/share/man/man1/dig.1.gz
OLD_FILES+=usr/share/man/man1/nslookup.1.gz
OLD_FILES+=usr/share/man/man1/nsupdate.1.gz
OLD_FILES+=usr/share/man/man3/lwres.3.gz
OLD_FILES+=usr/share/man/man3/lwres_addr_parse.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_add.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_back.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_clear.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_first.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_forward.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_getmem.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_getuint16.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_getuint32.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_getuint8.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_init.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_invalidate.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_putmem.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_putuint16.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_putuint32.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_putuint8.3.gz
OLD_FILES+=usr/share/man/man3/lwres_buffer_subtract.3.gz
OLD_FILES+=usr/share/man/man3/lwres_conf_clear.3.gz
OLD_FILES+=usr/share/man/man3/lwres_conf_get.3.gz
OLD_FILES+=usr/share/man/man3/lwres_conf_init.3.gz
OLD_FILES+=usr/share/man/man3/lwres_conf_parse.3.gz
OLD_FILES+=usr/share/man/man3/lwres_conf_print.3.gz
OLD_FILES+=usr/share/man/man3/lwres_config.3.gz
OLD_FILES+=usr/share/man/man3/lwres_context.3.gz
OLD_FILES+=usr/share/man/man3/lwres_context_allocmem.3.gz
OLD_FILES+=usr/share/man/man3/lwres_context_create.3.gz
OLD_FILES+=usr/share/man/man3/lwres_context_destroy.3.gz
OLD_FILES+=usr/share/man/man3/lwres_context_freemem.3.gz
OLD_FILES+=usr/share/man/man3/lwres_context_initserial.3.gz
OLD_FILES+=usr/share/man/man3/lwres_context_nextserial.3.gz
OLD_FILES+=usr/share/man/man3/lwres_context_sendrecv.3.gz
OLD_FILES+=usr/share/man/man3/lwres_endhostent.3.gz
OLD_FILES+=usr/share/man/man3/lwres_endhostent_r.3.gz
OLD_FILES+=usr/share/man/man3/lwres_freeaddrinfo.3.gz
OLD_FILES+=usr/share/man/man3/lwres_freehostent.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gabn.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gabnrequest_free.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gabnrequest_parse.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gabnrequest_render.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gabnresponse_free.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gabnresponse_parse.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gabnresponse_render.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gai_strerror.3.gz
OLD_FILES+=usr/share/man/man3/lwres_getaddrinfo.3.gz
OLD_FILES+=usr/share/man/man3/lwres_getaddrsbyname.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gethostbyaddr.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gethostbyaddr_r.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gethostbyname.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gethostbyname2.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gethostbyname_r.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gethostent.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gethostent_r.3.gz
OLD_FILES+=usr/share/man/man3/lwres_getipnode.3.gz
OLD_FILES+=usr/share/man/man3/lwres_getipnodebyaddr.3.gz
OLD_FILES+=usr/share/man/man3/lwres_getipnodebyname.3.gz
OLD_FILES+=usr/share/man/man3/lwres_getnamebyaddr.3.gz
OLD_FILES+=usr/share/man/man3/lwres_getnameinfo.3.gz
OLD_FILES+=usr/share/man/man3/lwres_getrrsetbyname.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gnba.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gnbarequest_free.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gnbarequest_parse.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gnbarequest_render.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gnbaresponse_free.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gnbaresponse_parse.3.gz
OLD_FILES+=usr/share/man/man3/lwres_gnbaresponse_render.3.gz
OLD_FILES+=usr/share/man/man3/lwres_herror.3.gz
OLD_FILES+=usr/share/man/man3/lwres_hstrerror.3.gz
OLD_FILES+=usr/share/man/man3/lwres_inetntop.3.gz
OLD_FILES+=usr/share/man/man3/lwres_lwpacket_parseheader.3.gz
OLD_FILES+=usr/share/man/man3/lwres_lwpacket_renderheader.3.gz
OLD_FILES+=usr/share/man/man3/lwres_net_ntop.3.gz
OLD_FILES+=usr/share/man/man3/lwres_noop.3.gz
OLD_FILES+=usr/share/man/man3/lwres_nooprequest_free.3.gz
OLD_FILES+=usr/share/man/man3/lwres_nooprequest_parse.3.gz
OLD_FILES+=usr/share/man/man3/lwres_nooprequest_render.3.gz
OLD_FILES+=usr/share/man/man3/lwres_noopresponse_free.3.gz
OLD_FILES+=usr/share/man/man3/lwres_noopresponse_parse.3.gz
OLD_FILES+=usr/share/man/man3/lwres_noopresponse_render.3.gz
OLD_FILES+=usr/share/man/man3/lwres_packet.3.gz
OLD_FILES+=usr/share/man/man3/lwres_resutil.3.gz
OLD_FILES+=usr/share/man/man3/lwres_sethostent.3.gz
OLD_FILES+=usr/share/man/man3/lwres_sethostent_r.3.gz
OLD_FILES+=usr/share/man/man3/lwres_string_parse.3.gz
OLD_FILES+=usr/share/man/man5/named.conf.5.gz
OLD_FILES+=usr/share/man/man5/rndc.conf.5.gz
OLD_FILES+=usr/share/man/man8/ddns-confgen.8.gz
OLD_FILES+=usr/share/man/man8/dnssec-dsfromkey.8.gz
OLD_FILES+=usr/share/man/man8/dnssec-keyfromlabel.8.gz
OLD_FILES+=usr/share/man/man8/dnssec-keygen.8.gz
OLD_FILES+=usr/share/man/man8/dnssec-revoke.8.gz
OLD_FILES+=usr/share/man/man8/dnssec-settime.8.gz
OLD_FILES+=usr/share/man/man8/dnssec-signzone.8.gz
OLD_FILES+=usr/share/man/man8/dnssec-verify.8.gz
OLD_FILES+=usr/share/man/man8/genrandom.8.gz
OLD_FILES+=usr/share/man/man8/isc-hmac-fixup.8.gz
OLD_FILES+=usr/share/man/man8/lwresd.8.gz
OLD_FILES+=usr/share/man/man8/named-checkconf.8.gz
OLD_FILES+=usr/share/man/man8/named-checkzone.8.gz
OLD_FILES+=usr/share/man/man8/named-compilezone.8.gz
OLD_FILES+=usr/share/man/man8/named-journalprint.8.gz
OLD_FILES+=usr/share/man/man8/named.8.gz
OLD_FILES+=usr/share/man/man8/named.reconfig.8.gz
OLD_FILES+=usr/share/man/man8/named.reload.8.gz
OLD_FILES+=usr/share/man/man8/nsec3hash.8.gz
OLD_FILES+=usr/share/man/man8/rndc-confgen.8.gz
OLD_FILES+=usr/share/man/man8/rndc.8.gz
OLD_DIRS+=var/named/dev
OLD_DIRS+=var/named/etc
OLD_DIRS+=var/named/etc/namedb
OLD_FILES+=var/named/etc/namedb/PROTO.localhost-v6.rev
OLD_FILES+=var/named/etc/namedb/PROTO.localhost.rev
OLD_DIRS+=var/named/etc/namedb/dynamic
OLD_FILES+=var/named/etc/namedb/make-localhost
OLD_DIRS+=var/named/etc/namedb/master
OLD_FILES+=var/named/etc/namedb/master/empty.db
OLD_FILES+=var/named/etc/namedb/master/localhost-forward.db
OLD_FILES+=var/named/etc/namedb/master/localhost-reverse.db
#OLD_FILES+=var/named/etc/namedb/named.conf # intentionally left out
OLD_FILES+=var/named/etc/namedb/named.root
OLD_DIRS+=var/named/etc/namedb/slave
OLD_DIRS+=var/named/var
OLD_DIRS+=var/named/var/dump
OLD_DIRS+=var/named/var/log
OLD_DIRS+=var/named/var/run
OLD_DIRS+=var/named/var/run/named
OLD_DIRS+=var/named/var/stats
OLD_DIRS+=var/run/named
# 20130923: example moved
OLD_FILES+=usr/share/examples/bsdconfig/browse_packages.sh
# 20130908: libssh becomes private
OLD_FILES+=usr/lib/libssh.a
OLD_FILES+=usr/lib/libssh.so
OLD_LIBS+=usr/lib/libssh.so.5
OLD_FILES+=usr/lib/libssh_p.a
OLD_FILES+=usr/lib32/libssh.a
OLD_FILES+=usr/lib32/libssh.so
OLD_LIBS+=usr/lib32/libssh.so.5
OLD_FILES+=usr/lib32/libssh_p.a
# 20130903: gnupatch is no more
OLD_FILES+=usr/bin/gnupatch
OLD_FILES+=usr/share/man/man1/gnupatch.1.gz
# 20130829: bsdpatch is patch unconditionally
OLD_FILES+=usr/bin/bsdpatch
OLD_FILES+=usr/share/man/man1/bsdpatch.1.gz
# 20130822: bind 9.9.3-P2 import
OLD_LIBS+=usr/lib/liblwres.so.80
# 20130814: vm_page_busy(9)
OLD_FILES+=usr/share/man/man9/vm_page_flash.9.gz
OLD_FILES+=usr/share/man/man9/vm_page_io.9.gz
OLD_FILES+=usr/share/man/man9/vm_page_io_finish.9.gz
OLD_FILES+=usr/share/man/man9/vm_page_io_start.9.gz
OLD_FILES+=usr/share/man/man9/vm_page_wakeup.9.gz
# 20130710: libkvm version bump
OLD_LIBS+=lib/libkvm.so.5
OLD_LIBS+=usr/lib32/libkvm.so.5
# 20130623: dialog update from 1.1 to 1.2
OLD_LIBS+=usr/lib/libdialog.so.7
OLD_LIBS+=usr/lib32/libdialog.so.7
# 20130616: vfs_mount.9 removed
OLD_FILES+=usr/share/man/man9/vfs_mount.9.gz
# 20130614: remove CVS from base
OLD_FILES+=usr/bin/cvs
OLD_FILES+=usr/bin/cvsbug
OLD_FILES+=usr/share/doc/psd/28.cvs/paper.ascii.gz
OLD_DIRS+=usr/share/doc/psd/28.cvs
OLD_FILES+=usr/share/examples/cvs/contrib/README
OLD_FILES+=usr/share/examples/cvs/contrib/clmerge
OLD_FILES+=usr/share/examples/cvs/contrib/cln_hist
OLD_FILES+=usr/share/examples/cvs/contrib/commit_prep
OLD_FILES+=usr/share/examples/cvs/contrib/cvs2vendor
OLD_FILES+=usr/share/examples/cvs/contrib/cvs_acls
OLD_FILES+=usr/share/examples/cvs/contrib/cvscheck
OLD_FILES+=usr/share/examples/cvs/contrib/cvscheck.man
OLD_FILES+=usr/share/examples/cvs/contrib/cvshelp.man
OLD_FILES+=usr/share/examples/cvs/contrib/descend.man
OLD_FILES+=usr/share/examples/cvs/contrib/easy-import
OLD_FILES+=usr/share/examples/cvs/contrib/intro.doc
OLD_FILES+=usr/share/examples/cvs/contrib/log
OLD_FILES+=usr/share/examples/cvs/contrib/log_accum
OLD_FILES+=usr/share/examples/cvs/contrib/mfpipe
OLD_FILES+=usr/share/examples/cvs/contrib/rcs-to-cvs
OLD_FILES+=usr/share/examples/cvs/contrib/rcs2log
OLD_FILES+=usr/share/examples/cvs/contrib/rcslock
OLD_FILES+=usr/share/examples/cvs/contrib/sccs2rcs
OLD_DIRS+=usr/share/examples/cvs/contrib
OLD_DIRS+=usr/share/examples/cvs
OLD_FILES+=usr/share/info/cvs.info.gz
OLD_FILES+=usr/share/info/cvsclient.info.gz
OLD_FILES+=usr/share/man/man1/cvs.1.gz
OLD_FILES+=usr/share/man/man5/cvs.5.gz
OLD_FILES+=usr/share/man/man8/cvsbug.8.gz
# 20130607: WITH_DEBUG_FILES added
OLD_FILES+=lib/libufs.so.6.symbols
OLD_FILES+=usr/lib32/libufs.so.6.symbols
# 20130417: nfs fha moved from nfsserver to nfs
OLD_FILES+=usr/include/nfsserver/nfs_fha.h
# 20130411: new clang import which bumps version from 3.2 to 3.3.
OLD_FILES+=usr/include/clang/3.2/__wmmintrin_aes.h
OLD_FILES+=usr/include/clang/3.2/__wmmintrin_pclmul.h
OLD_FILES+=usr/include/clang/3.2/altivec.h
OLD_FILES+=usr/include/clang/3.2/ammintrin.h
OLD_FILES+=usr/include/clang/3.2/avx2intrin.h
OLD_FILES+=usr/include/clang/3.2/avxintrin.h
OLD_FILES+=usr/include/clang/3.2/bmi2intrin.h
OLD_FILES+=usr/include/clang/3.2/bmiintrin.h
OLD_FILES+=usr/include/clang/3.2/cpuid.h
OLD_FILES+=usr/include/clang/3.2/emmintrin.h
OLD_FILES+=usr/include/clang/3.2/f16cintrin.h
OLD_FILES+=usr/include/clang/3.2/fma4intrin.h
OLD_FILES+=usr/include/clang/3.2/fmaintrin.h
OLD_FILES+=usr/include/clang/3.2/immintrin.h
OLD_FILES+=usr/include/clang/3.2/lzcntintrin.h
OLD_FILES+=usr/include/clang/3.2/mm3dnow.h
OLD_FILES+=usr/include/clang/3.2/mm_malloc.h
OLD_FILES+=usr/include/clang/3.2/mmintrin.h
OLD_FILES+=usr/include/clang/3.2/module.map
OLD_FILES+=usr/include/clang/3.2/nmmintrin.h
OLD_FILES+=usr/include/clang/3.2/pmmintrin.h
OLD_FILES+=usr/include/clang/3.2/popcntintrin.h
OLD_FILES+=usr/include/clang/3.2/rtmintrin.h
OLD_FILES+=usr/include/clang/3.2/smmintrin.h
OLD_FILES+=usr/include/clang/3.2/tmmintrin.h
OLD_FILES+=usr/include/clang/3.2/wmmintrin.h
OLD_FILES+=usr/include/clang/3.2/x86intrin.h
OLD_FILES+=usr/include/clang/3.2/xmmintrin.h
OLD_FILES+=usr/include/clang/3.2/xopintrin.h
OLD_DIRS+=usr/include/clang/3.2
# 20130404: legacy ATA stack removed
OLD_FILES+=etc/periodic/daily/405.status-ata-raid
OLD_FILES+=rescue/atacontrol
OLD_FILES+=sbin/atacontrol
OLD_FILES+=usr/share/man/man8/atacontrol.8.gz
OLD_FILES+=usr/share/man/man4/atapicam.4.gz
OLD_FILES+=usr/share/man/man4/ataraid.4.gz
OLD_FILES+=usr/sbin/burncd
OLD_FILES+=usr/share/man/man8/burncd.8.gz
# 20130316: vinum.4 removed
OLD_FILES+=usr/share/man/man4/vinum.4.gz
# 20130312: fortunes-o removed
OLD_FILES+=usr/share/games/fortune/fortunes-o
OLD_FILES+=usr/share/games/fortune/fortunes-o.dat
# 20130311: Ports are no more available via cvsup
OLD_FILES+=usr/share/examples/cvsup/ports-supfile
OLD_FILES+=usr/share/examples/cvsup/refuse
OLD_FILES+=usr/share/examples/cvsup/refuse.README
# 20130309: NWFS and NCP supports removed
OLD_FILES+=usr/bin/ncplist
OLD_FILES+=usr/bin/ncplogin
OLD_FILES+=usr/bin/ncplogout
OLD_FILES+=usr/include/fs/nwfs/nwfs.h
OLD_FILES+=usr/include/fs/nwfs/nwfs_mount.h
OLD_FILES+=usr/include/fs/nwfs/nwfs_node.h
OLD_FILES+=usr/include/fs/nwfs/nwfs_subr.h
OLD_DIRS+=usr/include/fs/nwfs
OLD_FILES+=usr/include/netncp/ncp.h
OLD_FILES+=usr/include/netncp/ncp_cfg.h
OLD_FILES+=usr/include/netncp/ncp_conn.h
OLD_FILES+=usr/include/netncp/ncp_file.h
OLD_FILES+=usr/include/netncp/ncp_lib.h
OLD_FILES+=usr/include/netncp/ncp_ncp.h
OLD_FILES+=usr/include/netncp/ncp_nls.h
OLD_FILES+=usr/include/netncp/ncp_rcfile.h
OLD_FILES+=usr/include/netncp/ncp_rq.h
OLD_FILES+=usr/include/netncp/ncp_sock.h
OLD_FILES+=usr/include/netncp/ncp_subr.h
OLD_FILES+=usr/include/netncp/ncp_user.h
OLD_FILES+=usr/include/netncp/ncpio.h
OLD_FILES+=usr/include/netncp/nwerror.h
OLD_DIRS+=usr/include/netncp
OLD_FILES+=usr/lib/libncp.a
OLD_FILES+=usr/lib/libncp.so
OLD_LIBS+=usr/lib/libncp.so.4
OLD_FILES+=usr/lib/libncp_p.a
OLD_FILES+=usr/lib32/libncp.a
OLD_FILES+=usr/lib32/libncp.so
OLD_LIBS+=usr/lib32/libncp.so.4
OLD_FILES+=usr/lib32/libncp_p.a
OLD_FILES+=usr/sbin/mount_nwfs
OLD_FILES+=usr/share/examples/nwclient/dot.nwfsrc
OLD_FILES+=usr/share/examples/nwclient/nwfs.sh.sample
OLD_DIRS+=usr/share/examples/nwclient
OLD_FILES+=usr/share/man/man1/ncplist.1.gz
OLD_FILES+=usr/share/man/man1/ncplogin.1.gz
OLD_FILES+=usr/share/man/man1/ncplogout.1.gz
OLD_FILES+=usr/share/man/man8/mount_nwfs.8.gz
# 20130302: NTFS support removed
OLD_FILES+=rescue/mount_ntfs
OLD_FILES+=sbin/mount_ntfs
OLD_FILES+=usr/include/fs/ntfs/ntfs.h
OLD_FILES+=usr/include/fs/ntfs/ntfs_compr.h
OLD_FILES+=usr/include/fs/ntfs/ntfs_ihash.h
OLD_FILES+=usr/include/fs/ntfs/ntfs_inode.h
OLD_FILES+=usr/include/fs/ntfs/ntfs_subr.h
OLD_FILES+=usr/include/fs/ntfs/ntfs_vfsops.h
OLD_FILES+=usr/include/fs/ntfs/ntfsmount.h
OLD_DIRS+=usr/include/fs/ntfs
OLD_FILES+=usr/share/man/man8/mount_ntfs.8.gz
# 20130302: PORTALFS support removed
OLD_FILES+=usr/include/fs/portalfs/portal.h
OLD_DIRS+=usr/include/fs/portalfs
OLD_FILES+=usr/sbin/mount_portalfs
OLD_FILES+=usr/share/examples/portal/README
OLD_FILES+=usr/share/examples/portal/portal.conf
OLD_DIRS+=usr/share/examples/portal
OLD_FILES+=usr/share/man/man8/mount_portalfs.8.gz
# 20130302: CODAFS support removed
OLD_FILES+=usr/share/man/man4/coda.4.gz
# 20130302: XFS support removed
OLD_FILES+=usr/share/man/man5/xfs.5.gz
# 20130302: Capsicum overhaul
OLD_FILES+=usr/share/man/man2/cap_getrights.2.gz
OLD_FILES+=usr/share/man/man2/cap_new.2.gz
# 20130213: OpenSSL 1.0.1e import
OLD_FILES+=usr/share/openssl/man/man3/EVP_PKEY_verifyrecover.3.gz
OLD_FILES+=usr/share/openssl/man/man3/EVP_PKEY_verifyrecover_init.3.gz
# 20130116: removed long unused directories for .1aout section manpages
OLD_FILES+=usr/share/man/en.ISO8859-1/man1aout
OLD_FILES+=usr/share/man/en.UTF-8/man1aout
OLD_DIRS+=usr/share/man/man1aout
OLD_DIRS+=usr/share/man/cat1aout
OLD_DIRS+=usr/share/man/en.ISO8859-1/cat1aout
OLD_DIRS+=usr/share/man/en.UTF-8/cat1aout
# 20130110: bsd.compat.mk removed
OLD_FILES+=usr/share/mk/bsd.compat.mk
# 20130103: gnats-supfile removed
OLD_FILES+=usr/share/examples/cvsup/gnats-supfile
# 20121230: libdisk removed
OLD_FILES+=usr/share/man/man3/libdisk.3.gz usr/include/libdisk.h
OLD_FILES+=usr/lib/libdisk.a usr/lib32/libdisk.a
# 20121230: remove wrongly created directories for auditdistd
OLD_DIRS+=var/dist
OLD_DIRS+=var/remote
# 20121114: zpool-features manual page moved from section 5 to 7
OLD_FILES+=usr/share/man/man5/zpool-features.5.gz
# 20121022: remove harp, hfa and idt man page
OLD_FILES+=usr/share/man/man4/harp.4.gz
OLD_FILES+=usr/share/man/man4/hfa.4.gz
OLD_FILES+=usr/share/man/man4/idt.4.gz
OLD_FILES+=usr/share/man/man4/if_idt.4.gz
# 20121022: VFS_LOCK_GIANT elimination
OLD_FILES+=usr/share/man/man9/VFS_LOCK_GIANT.9.gz
OLD_FILES+=usr/share/man/man9/VFS_UNLOCK_GIANT.9.gz
# 20121004: remove incomplete unwind.h
OLD_FILES+=usr/include/clang/3.2/unwind.h
# 20120910: NetBSD compat shims removed
OLD_FILES+=usr/include/cam/scsi/scsi_low_pisa.h
OLD_FILES+=usr/include/sys/device_port.h
# 20120909: doc and www supfiles removed
OLD_FILES+=usr/share/examples/cvsup/doc-supfile
OLD_FILES+=usr/share/examples/cvsup/www-supfile
# 20120908: pf cleanup
OLD_FILES+=usr/include/net/if_pflow.h
# 20120816: new clang import which bumps version from 3.1 to 3.2
OLD_FILES+=usr/bin/llvm-ld
OLD_FILES+=usr/bin/llvm-stub
OLD_FILES+=usr/include/clang/3.1/altivec.h
OLD_FILES+=usr/include/clang/3.1/avx2intrin.h
OLD_FILES+=usr/include/clang/3.1/avxintrin.h
OLD_FILES+=usr/include/clang/3.1/bmi2intrin.h
OLD_FILES+=usr/include/clang/3.1/bmiintrin.h
OLD_FILES+=usr/include/clang/3.1/cpuid.h
OLD_FILES+=usr/include/clang/3.1/emmintrin.h
OLD_FILES+=usr/include/clang/3.1/fma4intrin.h
OLD_FILES+=usr/include/clang/3.1/immintrin.h
OLD_FILES+=usr/include/clang/3.1/lzcntintrin.h
OLD_FILES+=usr/include/clang/3.1/mm3dnow.h
OLD_FILES+=usr/include/clang/3.1/mm_malloc.h
OLD_FILES+=usr/include/clang/3.1/mmintrin.h
OLD_FILES+=usr/include/clang/3.1/module.map
OLD_FILES+=usr/include/clang/3.1/nmmintrin.h
OLD_FILES+=usr/include/clang/3.1/pmmintrin.h
OLD_FILES+=usr/include/clang/3.1/popcntintrin.h
OLD_FILES+=usr/include/clang/3.1/smmintrin.h
OLD_FILES+=usr/include/clang/3.1/tmmintrin.h
OLD_FILES+=usr/include/clang/3.1/unwind.h
OLD_FILES+=usr/include/clang/3.1/wmmintrin.h
OLD_FILES+=usr/include/clang/3.1/x86intrin.h
OLD_FILES+=usr/include/clang/3.1/xmmintrin.h
OLD_DIRS+=usr/include/clang/3.1
OLD_FILES+=usr/share/man/man1/llvm-ld.1.gz
# 20120712: OpenSSL 1.0.1c import
OLD_LIBS+=lib/libcrypto.so.6
OLD_LIBS+=usr/lib/libssl.so.6
OLD_LIBS+=usr/lib32/libcrypto.so.6
OLD_LIBS+=usr/lib32/libssl.so.6
OLD_FILES+=usr/include/openssl/aes_locl.h
OLD_FILES+=usr/include/openssl/bio_lcl.h
OLD_FILES+=usr/include/openssl/e_os.h
OLD_FILES+=usr/include/openssl/fips.h
OLD_FILES+=usr/include/openssl/fips_rand.h
OLD_FILES+=usr/include/openssl/md2.h
OLD_FILES+=usr/include/openssl/pq_compat.h
OLD_FILES+=usr/include/openssl/store.h
OLD_FILES+=usr/include/openssl/tmdiff.h
OLD_FILES+=usr/include/openssl/ui_locl.h
OLD_FILES+=usr/share/openssl/man/man3/CRYPTO_set_id_callback.3.gz
# 20120621: remove old man page
OLD_FILES+=usr/share/man/man8/vnconfig.8.gz
# 20120619: TOE support updated
OLD_FILES+=usr/include/netinet/toedev.h
# 20120613: auth.conf removed
OLD_FILES+=etc/auth.conf
OLD_FILES+=usr/share/examples/etc/auth.conf
OLD_FILES+=usr/share/man/man3/auth.3.gz
OLD_FILES+=usr/share/man/man3/auth_getval.3.gz
OLD_FILES+=usr/share/man/man5/auth.conf.5.gz
# 20120530: kde pam lives now in ports
OLD_FILES+=etc/pam.d/kde
# 20120521: byacc import
OLD_FILES+=usr/bin/yyfix
OLD_FILES+=usr/share/man/man1/yyfix.1.gz
# 20120505: new clang import installed a redundant internal header
OLD_FILES+=usr/include/clang/3.1/stdalign.h
# 20120428: MD2 removed from libmd
OLD_LIBS+=lib/libmd.so.5
OLD_FILES+=usr/include/md2.h
OLD_LIBS+=usr/lib32/libmd.so.5
OLD_FILES+=usr/share/man/man3/MD2Data.3.gz
OLD_FILES+=usr/share/man/man3/MD2End.3.gz
OLD_FILES+=usr/share/man/man3/MD2File.3.gz
OLD_FILES+=usr/share/man/man3/MD2FileChunk.3.gz
OLD_FILES+=usr/share/man/man3/MD2Final.3.gz
OLD_FILES+=usr/share/man/man3/MD2Init.3.gz
OLD_FILES+=usr/share/man/man3/MD2Update.3.gz
OLD_FILES+=usr/share/man/man3/md2.3.gz
# 20120425: libusb version bump (r234684)
OLD_LIBS+=usr/lib/libusb.so.2
OLD_LIBS+=usr/lib32/libusb.so.2
OLD_FILES+=usr/share/man/man3/libsub_get_active_config_descriptor.3.gz
# 20120415: new clang import which bumps version from 3.0 to 3.1
OLD_FILES+=usr/include/clang/3.0/altivec.h
OLD_FILES+=usr/include/clang/3.0/avxintrin.h
OLD_FILES+=usr/include/clang/3.0/emmintrin.h
OLD_FILES+=usr/include/clang/3.0/immintrin.h
OLD_FILES+=usr/include/clang/3.0/mm3dnow.h
OLD_FILES+=usr/include/clang/3.0/mm_malloc.h
OLD_FILES+=usr/include/clang/3.0/mmintrin.h
OLD_FILES+=usr/include/clang/3.0/nmmintrin.h
OLD_FILES+=usr/include/clang/3.0/pmmintrin.h
OLD_FILES+=usr/include/clang/3.0/smmintrin.h
OLD_FILES+=usr/include/clang/3.0/tmmintrin.h
OLD_FILES+=usr/include/clang/3.0/wmmintrin.h
OLD_FILES+=usr/include/clang/3.0/x86intrin.h
OLD_FILES+=usr/include/clang/3.0/xmmintrin.h
OLD_DIRS+=usr/include/clang/3.0
# 20120412: BIND 9.8.1 release notes removed
OLD_FILES+=usr/share/doc/bind9/RELEASE-NOTES-BIND-9.8.1.pdf
OLD_FILES+=usr/share/doc/bind9/RELEASE-NOTES-BIND-9.8.1.txt
OLD_FILES+=usr/share/doc/bind9/RELEASE-NOTES-BIND-9.8.1.html
OLD_FILES+=usr/share/doc/bind9/release-notes.css
# 20120330: legacy(4) moved to x86
OLD_FILES+=usr/include/machine/legacyvar.h
# 20120324: MPI headers updated
OLD_FILES+=usr/include/dev/mpt/mpilib/mpi_inb.h
# 20120322: hwpmc_mips24k.h removed
OLD_FILES+=usr/include/dev/hwpmc/hwpmc_mips24k.h
# 20120322: Update heimdal to 1.5.1.
OLD_FILES+=usr/include/krb5-v4compat.h \
usr/include/krb_err.h \
usr/include/hdb-private.h \
usr/share/man/man3/krb5_addresses.3.gz \
usr/share/man/man3/krb5_cc_cursor.3.gz \
usr/share/man/man3/krb5_cc_ops.3.gz \
usr/share/man/man3/krb5_config.3.gz \
usr/share/man/man3/krb5_config_get_int_default.3.gz \
usr/share/man/man3/krb5_context.3.gz \
usr/share/man/man3/krb5_data.3.gz \
usr/share/man/man3/krb5_err.3.gz \
usr/share/man/man3/krb5_errx.3.gz \
usr/share/man/man3/krb5_keyblock.3.gz \
usr/share/man/man3/krb5_keytab_entry.3.gz \
usr/share/man/man3/krb5_kt_cursor.3.gz \
usr/share/man/man3/krb5_kt_ops.3.gz \
usr/share/man/man3/krb5_set_warn_dest.3.gz \
usr/share/man/man3/krb5_verr.3.gz \
usr/share/man/man3/krb5_verrx.3.gz \
usr/share/man/man3/krb5_vwarnx.3.gz \
usr/share/man/man3/krb5_warn.3.gz \
usr/share/man/man3/krb5_warnx.3.gz
OLD_LIBS+=usr/lib/libasn1.so.10 \
usr/lib/libhdb.so.10 \
usr/lib/libheimntlm.so.10 \
usr/lib/libhx509.so.10 \
usr/lib/libkadm5clnt.so.10 \
usr/lib/libkadm5srv.so.10 \
usr/lib/libkafs5.so.10 \
usr/lib/libkrb5.so.10 \
usr/lib/libroken.so.10 \
usr/lib32/libasn1.so.10 \
usr/lib32/libhdb.so.10 \
usr/lib32/libheimntlm.so.10 \
usr/lib32/libhx509.so.10 \
usr/lib32/libkadm5clnt.so.10 \
usr/lib32/libkadm5srv.so.10 \
usr/lib32/libkafs5.so.10 \
usr/lib32/libkrb5.so.10 \
usr/lib32/libroken.so.10
# 20120309: Remove fifofs header files.
OLD_FILES+=usr/include/fs/fifofs/fifo.h
OLD_DIRS+=usr/include/fs/fifofs
# 20120304: xlocale cleanup
OLD_FILES+=usr/include/_xlocale_ctype.h
# 20120225: libarchive 3.0.3
OLD_FILES+=usr/share/man/man3/archive_read_data_into_buffer.3.gz \
usr/share/man/man3/archive_read_support_compression_all.3.gz \
usr/share/man/man3/archive_read_support_compression_bzip2.3.gz \
usr/share/man/man3/archive_read_support_compression_compress.3.gz \
usr/share/man/man3/archive_read_support_compression_gzip.3.gz \
usr/share/man/man3/archive_read_support_compression_lzma.3.gz \
usr/share/man/man3/archive_read_support_compression_none.3.gz \
usr/share/man/man3/archive_read_support_compression_program.3.gz \
usr/share/man/man3/archive_read_support_compression_program_signature.3.gz \
usr/share/man/man3/archive_read_support_compression_xz.3.gz \
usr/share/man/man3/archive_write_set_callbacks.3.gz \
usr/share/man/man3/archive_write_set_compression_bzip2.3.gz \
usr/share/man/man3/archive_write_set_compression_compress.3.gz \
usr/share/man/man3/archive_write_set_compression_gzip.3.gz \
usr/share/man/man3/archive_write_set_compression_none.3.gz \
usr/share/man/man3/archive_write_set_compression_program.3.gz
OLD_LIBS+=usr/lib/libarchive.so.5
OLD_LIBS+=usr/lib32/libarchive.so.5
# 20120113: removal of wtmpcvt(1)
OLD_FILES+=usr/bin/wtmpcvt
OLD_FILES+=usr/share/man/man1/wtmpcvt.1.gz
# 20111214: eventtimers(7) moved to eventtimers(4)
OLD_FILES+=usr/share/man/man7/eventtimers.7.gz
# 20111125: amd(4) removed
OLD_FILES+=usr/share/man/man4/amd.4.gz
# 20111125: libodialog removed
OLD_FILES+=usr/lib/libodialog.a
OLD_FILES+=usr/lib/libodialog.so
OLD_LIBS+=usr/lib/libodialog.so.7
OLD_FILES+=usr/lib/libodialog_p.a
OLD_FILES+=usr/lib32/libodialog.a
OLD_FILES+=usr/lib32/libodialog.so
OLD_LIBS+=usr/lib32/libodialog.so.7
OLD_FILES+=usr/lib32/libodialog_p.a
# 20110930: sysinstall removed
OLD_FILES+=usr/sbin/sysinstall
OLD_FILES+=usr/share/man/man8/sysinstall.8.gz
OLD_FILES+=usr/lib/libftpio.a
OLD_FILES+=usr/lib/libftpio.so
OLD_LIBS+=usr/lib/libftpio.so.8
OLD_FILES+=usr/lib/libftpio_p.a
OLD_FILES+=usr/lib32/libftpio.a
OLD_FILES+=usr/lib32/libftpio.so
OLD_LIBS+=usr/lib32/libftpio.so.8
OLD_FILES+=usr/lib32/libftpio_p.a
OLD_FILES+=usr/include/ftpio.h
OLD_FILES+=usr/share/man/man3/ftpio.3.gz
# 20110915: rename congestion control manpages
OLD_FILES+=usr/share/man/man4/cc.4.gz
OLD_FILES+=usr/share/man/man9/cc.9.gz
# 20110831: atomic page flags operations
OLD_FILES+=usr/share/man/man9/vm_page_flag.9.gz
OLD_FILES+=usr/share/man/man9/vm_page_flag_clear.9.gz
OLD_FILES+=usr/share/man/man9/vm_page_flag_set.9.gz
# 20110828: library version bump for 9.0
OLD_LIBS+=lib/libcam.so.5
OLD_LIBS+=lib/libpcap.so.7
OLD_LIBS+=lib/libufs.so.5
OLD_LIBS+=usr/lib/libbsnmp.so.5
OLD_LIBS+=usr/lib/libdwarf.so.2
OLD_LIBS+=usr/lib/libopie.so.6
OLD_LIBS+=usr/lib/librtld_db.so.1
OLD_LIBS+=usr/lib/libtacplus.so.4
OLD_LIBS+=usr/lib32/libcam.so.5
OLD_LIBS+=usr/lib32/libpcap.so.7
OLD_LIBS+=usr/lib32/libufs.so.5
OLD_LIBS+=usr/lib32/libbsnmp.so.5
OLD_LIBS+=usr/lib32/libdwarf.so.2
OLD_LIBS+=usr/lib32/libopie.so.6
OLD_LIBS+=usr/lib32/librtld_db.so.1
OLD_LIBS+=usr/lib32/libtacplus.so.4
# 20110817: no more acd.4, ad.4, afd.4 and ast.4
OLD_FILES+=usr/share/man/man4/acd.4.gz
OLD_FILES+=usr/share/man/man4/ad.4.gz
OLD_FILES+=usr/share/man/man4/afd.4.gz
OLD_FILES+=usr/share/man/man4/ast.4.gz
# 20110718: no longer useful in the age of rc.d
OLD_FILES+=usr/sbin/named.reconfig
OLD_FILES+=usr/sbin/named.reload
OLD_FILES+=usr/share/man/man8/named.reconfig.8.gz
OLD_FILES+=usr/share/man/man8/named.reload.8.gz
# 20110716: bind 9.8.0 import
OLD_LIBS+=usr/lib/liblwres.so.50
OLD_FILES+=usr/share/doc/bind9/KNOWN-DEFECTS
OLD_FILES+=usr/share/doc/bind9/NSEC3-NOTES
OLD_FILES+=usr/share/doc/bind9/README.idnkit
OLD_FILES+=usr/share/doc/bind9/README.pkcs11
# 20110709: vm_map_clean.9 -> vm_map_sync.9
OLD_FILES+=usr/share/man/man9/vm_map_clean.9.gz
# 20110709: Catch up with removal of these functions.
OLD_FILES+=usr/share/man/man9/vm_page_copy.9.gz
OLD_FILES+=usr/share/man/man9/vm_page_protect.9.gz
OLD_FILES+=usr/share/man/man9/vm_page_zero_fill.9.gz
# 20110707: script no longer needed by /etc/rc.d/nfsd
OLD_FILES+=etc/rc.d/nfsserver
# 20110705: files moved so both NFS clients can share them
OLD_FILES+=usr/include/nfsclient/krpc.h
OLD_FILES+=usr/include/nfsclient/nfsdiskless.h
# 20110705: the switch of default NFS client to the new one
OLD_FILES+=sbin/mount_newnfs
OLD_FILES+=usr/share/man/man8/mount_newnfs.8.gz
OLD_FILES+=usr/include/nfsclient/nfs_kdtrace.h
# 20110628: calendar.msk removed
OLD_FILES+=usr/share/calendar/ru_RU.KOI8-R/calendar.msk
# 20110517: libpkg removed
OLD_FILES+=usr/include/pkg.h
OLD_FILES+=usr/lib/libpkg.a
OLD_FILES+=usr/lib/libpkg.so
OLD_LIBS+=usr/lib/libpkg.so.0
OLD_FILES+=usr/lib/libpkg_p.a
OLD_FILES+=usr/lib32/libpkg.a
OLD_FILES+=usr/lib32/libpkg.so
OLD_LIBS+=usr/lib32/libpkg.so.0
OLD_FILES+=usr/lib32/libpkg_p.a
# 20110517: libsbuf version bump
OLD_LIBS+=lib/libsbuf.so.5
OLD_LIBS+=usr/lib32/libsbuf.so.5
# 20110502: new clang import which bumps version from 2.9 to 3.0
OLD_FILES+=usr/include/clang/2.9/emmintrin.h
OLD_FILES+=usr/include/clang/2.9/mm_malloc.h
OLD_FILES+=usr/include/clang/2.9/mmintrin.h
OLD_FILES+=usr/include/clang/2.9/pmmintrin.h
OLD_FILES+=usr/include/clang/2.9/tmmintrin.h
OLD_FILES+=usr/include/clang/2.9/xmmintrin.h
OLD_DIRS+=usr/include/clang/2.9
# 20110417: removal of Objective-C support
OLD_FILES+=usr/include/objc/encoding.h
OLD_FILES+=usr/include/objc/hash.h
OLD_FILES+=usr/include/objc/NXConstStr.h
OLD_FILES+=usr/include/objc/objc-api.h
OLD_FILES+=usr/include/objc/objc-decls.h
OLD_FILES+=usr/include/objc/objc-list.h
OLD_FILES+=usr/include/objc/objc.h
OLD_FILES+=usr/include/objc/Object.h
OLD_FILES+=usr/include/objc/Protocol.h
OLD_FILES+=usr/include/objc/runtime.h
OLD_FILES+=usr/include/objc/sarray.h
OLD_FILES+=usr/include/objc/thr.h
OLD_FILES+=usr/include/objc/typedstream.h
OLD_FILES+=usr/lib/libobjc.a
OLD_FILES+=usr/lib/libobjc.so
OLD_FILES+=usr/lib/libobjc_p.a
OLD_FILES+=usr/libexec/cc1obj
OLD_LIBS+=usr/lib/libobjc.so.4
OLD_DIRS+=usr/include/objc
OLD_FILES+=usr/lib32/libobjc.a
OLD_FILES+=usr/lib32/libobjc.so
OLD_FILES+=usr/lib32/libobjc_p.a
OLD_LIBS+=usr/lib32/libobjc.so.4
# 20110331: firmware.img created at build time
OLD_FILES+=usr/share/examples/kld/firmware/fwimage/firmware.img
# 20110224: sticky.8 -> sticky.7
OLD_FILES+=usr/share/man/man8/sticky.8.gz
# 20110220: new clang import which bumps version from 2.8 to 2.9
OLD_FILES+=usr/include/clang/2.8/emmintrin.h
OLD_FILES+=usr/include/clang/2.8/mm_malloc.h
OLD_FILES+=usr/include/clang/2.8/mmintrin.h
OLD_FILES+=usr/include/clang/2.8/pmmintrin.h
OLD_FILES+=usr/include/clang/2.8/tmmintrin.h
OLD_FILES+=usr/include/clang/2.8/xmmintrin.h
OLD_DIRS+=usr/include/clang/2.8
# 20110119: netinet/sctp_cc_functions.h removed
OLD_FILES+=usr/include/netinet/sctp_cc_functions.h
# 20110119: Remove SYSCTL_*X* sysctl additions.
OLD_FILES+=usr/share/man/man9/SYSCTL_XINT.9.gz \
usr/share/man/man9/SYSCTL_XLONG.9.gz
# 20110112: Update dialog to new version, rename old libdialog to libodialog,
# removing associated man pages and header files.
OLD_FILES+=usr/share/man/man3/draw_shadow.3.gz \
usr/share/man/man3/draw_box.3.gz usr/share/man/man3/line_edit.3.gz \
usr/share/man/man3/strheight.3.gz usr/share/man/man3/strwidth.3.gz \
usr/share/man/man3/dialog_create_rc.3.gz \
usr/share/man/man3/dialog_yesno.3.gz usr/share/man/man3/dialog_noyes.3.gz \
usr/share/man/man3/dialog_prgbox.3.gz \
usr/share/man/man3/dialog_textbox.3.gz usr/share/man/man3/dialog_menu.3.gz \
usr/share/man/man3/dialog_checklist.3.gz \
usr/share/man/man3/dialog_radiolist.3.gz \
usr/share/man/man3/dialog_inputbox.3.gz \
usr/share/man/man3/dialog_clear_norefresh.3.gz \
usr/share/man/man3/dialog_clear.3.gz usr/share/man/man3/dialog_update.3.gz \
usr/share/man/man3/dialog_fselect.3.gz \
usr/share/man/man3/dialog_notify.3.gz \
usr/share/man/man3/dialog_mesgbox.3.gz \
usr/share/man/man3/dialog_gauge.3.gz usr/share/man/man3/init_dialog.3.gz \
usr/share/man/man3/end_dialog.3.gz usr/share/man/man3/use_helpfile.3.gz \
usr/share/man/man3/use_helpline.3.gz usr/share/man/man3/get_helpline.3.gz \
usr/share/man/man3/restore_helpline.3.gz \
usr/share/man/man3/dialog_msgbox.3.gz \
usr/share/man/man3/dialog_ftree.3.gz usr/share/man/man3/dialog_tree.3.gz \
usr/share/examples/dialog/README usr/share/examples/dialog/checklist \
usr/share/examples/dialog/ftreebox usr/share/examples/dialog/infobox \
usr/share/examples/dialog/inputbox usr/share/examples/dialog/menubox \
usr/share/examples/dialog/msgbox usr/share/examples/dialog/prgbox \
usr/share/examples/dialog/radiolist usr/share/examples/dialog/textbox \
usr/share/examples/dialog/treebox usr/share/examples/dialog/yesno \
usr/share/examples/libdialog/Makefile usr/share/examples/libdialog/check1.c\
usr/share/examples/libdialog/check2.c usr/share/examples/libdialog/check3.c\
usr/share/examples/libdialog/dselect.c \
usr/share/examples/libdialog/fselect.c \
usr/share/examples/libdialog/ftree1.c \
usr/share/examples/libdialog/ftree1.test \
usr/share/examples/libdialog/ftree2.c \
usr/share/examples/libdialog/ftree2.test \
usr/share/examples/libdialog/gauge.c usr/share/examples/libdialog/input1.c \
usr/share/examples/libdialog/input2.c usr/share/examples/libdialog/menu1.c \
usr/share/examples/libdialog/menu2.c usr/share/examples/libdialog/menu3.c \
usr/share/examples/libdialog/msg.c usr/share/examples/libdialog/prgbox.c \
usr/share/examples/libdialog/radio1.c usr/share/examples/libdialog/radio2.c\
usr/share/examples/libdialog/radio3.c usr/share/examples/libdialog/text.c \
usr/share/examples/libdialog/tree.c usr/share/examples/libdialog/yesno.c
OLD_DIRS+=usr/share/examples/libdialog usr/share/examples/dialog
# 20101114: Remove long-obsolete MAKEDEV.8
OLD_FILES+=usr/share/man/man8/MAKEDEV.8.gz
# 20101112: vgonel(9) has gone to private API a while ago
OLD_FILES+=usr/share/man/man9/vgonel.9.gz
# 20101112: removed gasp.info
OLD_FILES+=usr/share/info/gasp.info.gz
# 20101109: machine/mutex.h removed
OLD_FILES+=usr/include/machine/mutex.h
# 20101109: headers moved from machine/ to x86/
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/include/machine/mptable.h
.endif
# 20101101: headers moved from machine/ to x86/
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/include/machine/apicreg.h
OLD_FILES+=usr/include/machine/mca.h
.endif
# 20101020: catch up with vm_page_sleep_if_busy rename
OLD_FILES+=usr/share/man/man9/vm_page_sleep_busy.9.gz
# 20101018: taskqueue(9) updates
OLD_FILES+=usr/share/man/man9/taskqueue_find.9.gz
# 20101011: removed subblock.h from liblzma
OLD_FILES+=usr/include/lzma/subblock.h
# 20101002: removed manpath.config
OLD_FILES+=etc/manpath.config
OLD_FILES+=usr/share/examples/etc/manpath.config
# 20100910: renamed sbuf_overflowed to sbuf_error
OLD_FILES+=usr/share/man/man9/sbuf_overflowed.9.gz
# 20100815: retired last traces of chooseproc(9)
OLD_FILES+=usr/share/man/man9/chooseproc.9.gz
# 20100806: removal of unused libcompat routines
OLD_FILES+=usr/share/man/man3/ascftime.3.gz
OLD_FILES+=usr/share/man/man3/cfree.3.gz
OLD_FILES+=usr/share/man/man3/cftime.3.gz
OLD_FILES+=usr/share/man/man3/getpw.3.gz
# 20100801: tzdata2010k import
OLD_FILES+=usr/share/zoneinfo/Pacific/Ponape
OLD_FILES+=usr/share/zoneinfo/Pacific/Truk
# 20100725: acpi_aiboost(4) removal.
OLD_FILES+=usr/share/man/man4/acpi_aiboost.4.gz
# 20100724: nfsclient/nfs_lock.h moved to nfs/nfs_lock.h
OLD_FILES+=usr/include/nfsclient/nfs_lock.h
# 20100720: new clang import which bumps version from 2.0 to 2.8
OLD_FILES+=usr/include/clang/2.0/emmintrin.h
OLD_FILES+=usr/include/clang/2.0/mm_malloc.h
OLD_FILES+=usr/include/clang/2.0/mmintrin.h
OLD_FILES+=usr/include/clang/2.0/pmmintrin.h
OLD_FILES+=usr/include/clang/2.0/tmmintrin.h
OLD_FILES+=usr/include/clang/2.0/xmmintrin.h
OLD_DIRS+=usr/include/clang/2.0
# 20100706: removed pc-sysinstall's detect-vmware.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/detect-vmware.sh
# 20100701: [powerpc] removed <machine/intr.h>
.if ${TARGET_ARCH} == "powerpc"
OLD_FILES+=usr/include/machine/intr.h
.endif
# 20100514: library version bump for versioned symbols for liblzma
OLD_LIBS+=usr/lib/liblzma.so.0
OLD_LIBS+=usr/lib32/liblzma.so.0
# 20100511: move GCC-specific headers to /usr/include/gcc
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/include/emmintrin.h
OLD_FILES+=usr/include/mm_malloc.h
OLD_FILES+=usr/include/pmmintrin.h
OLD_FILES+=usr/include/xmmintrin.h
.endif
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386" || ${TARGET_ARCH} == "arm"
OLD_FILES+=usr/include/mmintrin.h
.endif
.if ${TARGET_ARCH} == "ia64"
OLD_FILES+=usr/include/ia64intrin.h
.endif
.if ${TARGET_ARCH} == "powerpc"
OLD_FILES+=usr/include/altivec.h
OLD_FILES+=usr/include/ppc-asm.h
OLD_FILES+=usr/include/spe.h
.endif
# 20100416: [mips] removed <machine/psl.h>
.if ${TARGET_ARCH} == "mips"
OLD_FILES+=usr/include/machine/psl.h
.endif
# 20100415: [mips] removed unused headers
.if ${TARGET_ARCH} == "mips"
OLD_FILES+=usr/include/machine/archtype.h
OLD_FILES+=usr/include/machine/segments.h
OLD_FILES+=usr/include/machine/rm7000.h
OLD_FILES+=usr/include/machine/defs.h
OLD_FILES+=usr/include/machine/queue.h
.endif
# 20100326: [ia64] removed <machine/nexusvar.h>
.if ${TARGET_ARCH} == "ia64"
OLD_FILES+=usr/include/machine/nexusvar.h
.endif
# 20100326: gcpio removal
OLD_FILES+=usr/bin/gcpio
OLD_FILES+=usr/share/info/cpio.info.gz
OLD_FILES+=usr/share/man/man1/gcpio.1.gz
# 20100322: libz update
OLD_LIBS+=lib/libz.so.5
OLD_LIBS+=usr/lib32/libz.so.5
# 20100314: removal of regexp.h
OLD_FILES+=usr/include/regexp.h
OLD_FILES+=usr/share/man/man3/regexp.3.gz
OLD_FILES+=usr/share/man/man3/regsub.3.gz
# 20100303: actual removal of utmp.h
OLD_FILES+=usr/include/utmp.h
# 20100227: [ia64] removed <machine/sapicreg.h> and <machine/sapicvar.h>
.if ${TARGET_ARCH} == "ia64"
OLD_FILES+=usr/include/machine/sapicreg.h
OLD_FILES+=usr/include/machine/sapicvar.h
.endif
# 20100208: man pages moved
.if ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/share/man/man4/i386/alpm.4.gz
OLD_FILES+=usr/share/man/man4/i386/amdpm.4.gz
OLD_FILES+=usr/share/man/man4/i386/mcd.4.gz
OLD_FILES+=usr/share/man/man4/i386/padlock.4.gz
OLD_FILES+=usr/share/man/man4/i386/pcf.4.gz
OLD_FILES+=usr/share/man/man4/i386/scd.4.gz
OLD_FILES+=usr/share/man/man4/i386/viapm.4.gz
.endif
# 20100122: move BSDL bc/dc USD documents to /usr/share/doc/usd
OLD_FILES+=usr/share/doc/papers/bc.ascii.gz
OLD_FILES+=usr/share/doc/papers/dc.ascii.gz
# 20100120: replacing GNU bc/dc with BSDL versions
OLD_FILES+=usr/share/examples/bc/ckbook.b
OLD_FILES+=usr/share/examples/bc/pi.b
OLD_FILES+=usr/share/examples/bc/primes.b
OLD_FILES+=usr/share/examples/bc/twins.b
OLD_FILES+=usr/share/info/dc.info.gz
OLD_DIRS+=usr/share/examples/bc
# 20100114: removal of ttyslot(3)
OLD_FILES+=usr/share/man/man3/ttyslot.3.gz
# 20100113: remove utmp.h, replace it by utmpx.h
OLD_FILES+=usr/share/man/man3/login.3.gz
OLD_FILES+=usr/share/man/man3/logout.3.gz
OLD_FILES+=usr/share/man/man3/logwtmp.3.gz
OLD_FILES+=usr/share/man/man3/ulog_endutxent.3.gz
OLD_FILES+=usr/share/man/man3/ulog_getutxent.3.gz
OLD_FILES+=usr/share/man/man3/ulog_getutxline.3.gz
OLD_FILES+=usr/share/man/man3/ulog_getutxuser.3.gz
OLD_FILES+=usr/share/man/man3/ulog_pututxline.3.gz
OLD_FILES+=usr/share/man/man3/ulog_setutxent.3.gz
OLD_FILES+=usr/share/man/man3/ulog_setutxfile.3.gz
OLD_FILES+=usr/share/man/man5/lastlog.5.gz
OLD_FILES+=usr/share/man/man5/utmp.5.gz
OLD_FILES+=usr/share/man/man5/wtmp.5.gz
OLD_LIBS+=lib/libutil.so.8
OLD_LIBS+=usr/lib32/libutil.so.8
# 20100105: new userland semaphore implementation
OLD_FILES+=usr/include/sys/semaphore.h
# 20100103: ntptrace(8) removed
OLD_FILES+=usr/sbin/ntptrace
OLD_FILES+=usr/share/man/man8/ntptrace.8.gz
# 20091229: remove no longer relevant examples
OLD_FILES+=usr/share/examples/pppd/auth-down.sample
OLD_FILES+=usr/share/examples/pppd/auth-up.sample
OLD_FILES+=usr/share/examples/pppd/chap-secrets.sample
OLD_FILES+=usr/share/examples/pppd/chat.sh.sample
OLD_FILES+=usr/share/examples/pppd/ip-down.sample
OLD_FILES+=usr/share/examples/pppd/ip-up.sample
OLD_FILES+=usr/share/examples/pppd/options.sample
OLD_FILES+=usr/share/examples/pppd/pap-secrets.sample
OLD_FILES+=usr/share/examples/pppd/ppp.deny.sample
OLD_FILES+=usr/share/examples/pppd/ppp.shells.sample
OLD_DIRS+=usr/share/examples/pppd
OLD_FILES+=usr/share/examples/slattach/unit-command.sh
OLD_DIRS+=usr/share/examples/slattach
OLD_FILES+=usr/share/examples/sliplogin/slip.hosts
OLD_FILES+=usr/share/examples/sliplogin/slip.login
OLD_FILES+=usr/share/examples/sliplogin/slip.logout
OLD_FILES+=usr/share/examples/sliplogin/slip.slparms
OLD_DIRS+=usr/share/examples/sliplogin
OLD_FILES+=usr/share/examples/startslip/sldown.sh
OLD_FILES+=usr/share/examples/startslip/slip.sh
OLD_FILES+=usr/share/examples/startslip/slup.sh
OLD_DIRS+=usr/share/examples/startslip
# 20091202: unify rc.firewall and rc.firewall6.
OLD_FILES+=etc/rc.d/ip6fw
OLD_FILES+=etc/rc.firewall6
OLD_FILES+=usr/share/examples/etc/rc.firewall6
# 20091117: removal of rc.early(8) link
OLD_FILES+=usr/share/man/man8/rc.early.8.gz
# 20091117: usr/share/zoneinfo/GMT link removed
OLD_FILES+=usr/share/zoneinfo/GMT
# 20091027: pselect.3 implemented as syscall
OLD_FILES+=usr/share/man/man3/pselect.3.gz
# 20091005: fusword.9 and susword.9 removed
OLD_FILES+=usr/share/man/man9/fusword.9.gz
OLD_FILES+=usr/share/man/man9/susword.9.gz
# 20090909: vesa and dpms promoted to be i386/amd64 common
OLD_FILES+=usr/include/machine/pc/vesa.h
OLD_FILES+=usr/share/man/man4/i386/dpms.4.gz
# 20090904: remove lukemftpd
OLD_FILES+=usr/libexec/lukemftpd
OLD_FILES+=usr/share/man/man5/ftpd.conf.5.gz
OLD_FILES+=usr/share/man/man5/ftpusers.5.gz
OLD_FILES+=usr/share/man/man8/lukemftpd.8.gz
# 20090902: BSD.{x11,x11-4}.dist are dead and BSD.local.dist lives in ports/
OLD_FILES+=etc/mtree/BSD.local.dist
OLD_FILES+=etc/mtree/BSD.x11.dist
OLD_FILES+=etc/mtree/BSD.x11-4.dist
# 20090812: net80211 documentation overhaul
OLD_FILES+=usr/share/man/man9/ieee80211_add_rates.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_add_xrates.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_alloc_node.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_attach.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_begin_scan.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_cfgget.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_cfgset.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_chan2ieee.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_chan2mode.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_create_ibss.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_crypto_attach.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_crypto_detach.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_decap.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_dump_pkt.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_dup_bss.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_encap.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_end_scan.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_find_node.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_fix_rate.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_free_allnodes.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_ieee2mhz.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_ioctl.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_lookup_node.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_media2rate.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_media_change.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_media_init.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_media_status.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_mhz2ieee.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_next_scan.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_node_attach.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_node_detach.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_node_lateattach.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_print_essid.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_proto_attach.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_proto_detach.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_rate2media.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_recv_mgmt.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_send_mgmt.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_setmode.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_timeout_nodes.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_watchdog.9.gz
OLD_FILES+=usr/share/man/man9/ieee80211_wep_crypt.9.gz
# 20090801: vimage.h removed in favour of vnet.h
OLD_FILES+=usr/include/sys/vimage.h
# 20101208: libbsnmp was moved to usr/lib
OLD_LIBS+=lib/libbsnmp.so.5
# 20090719: library version bump for 8.0
OLD_LIBS+=lib/libalias.so.6
OLD_LIBS+=lib/libavl.so.1
OLD_LIBS+=lib/libbegemot.so.3
OLD_LIBS+=lib/libbsdxml.so.3
OLD_LIBS+=lib/libbsnmp.so.4
OLD_LIBS+=lib/libcam.so.4
OLD_LIBS+=lib/libcrypt.so.4
OLD_LIBS+=lib/libcrypto.so.5
OLD_LIBS+=lib/libctf.so.1
OLD_LIBS+=lib/libdevstat.so.6
OLD_LIBS+=lib/libdtrace.so.1
OLD_LIBS+=lib/libedit.so.6
OLD_LIBS+=lib/libgeom.so.4
OLD_LIBS+=lib/libipsec.so.3
OLD_LIBS+=lib/libipx.so.4
OLD_LIBS+=lib/libkiconv.so.3
OLD_LIBS+=lib/libkvm.so.4
OLD_LIBS+=lib/libmd.so.4
OLD_LIBS+=lib/libncurses.so.7
OLD_LIBS+=lib/libncursesw.so.7
OLD_LIBS+=lib/libnvpair.so.1
OLD_LIBS+=lib/libpcap.so.6
OLD_LIBS+=lib/libreadline.so.7
OLD_LIBS+=lib/libsbuf.so.4
OLD_LIBS+=lib/libufs.so.4
OLD_LIBS+=lib/libumem.so.1
OLD_LIBS+=lib/libutil.so.7
OLD_LIBS+=lib/libuutil.so.1
OLD_LIBS+=lib/libz.so.4
OLD_LIBS+=lib/libzfs.so.1
OLD_LIBS+=lib/libzpool.so.1
OLD_LIBS+=usr/lib/libarchive.so.4
OLD_LIBS+=usr/lib/libauditd.so.4
OLD_LIBS+=usr/lib/libbluetooth.so.3
OLD_LIBS+=usr/lib/libbsm.so.2
OLD_LIBS+=usr/lib/libbz2.so.3
OLD_LIBS+=usr/lib/libcalendar.so.4
OLD_LIBS+=usr/lib/libcom_err.so.4
OLD_LIBS+=usr/lib/libdevinfo.so.4
OLD_LIBS+=usr/lib/libdialog.so.6
OLD_LIBS+=usr/lib/libdwarf.so.1
OLD_LIBS+=usr/lib/libfetch.so.5
OLD_LIBS+=usr/lib/libform.so.4
OLD_LIBS+=usr/lib/libformw.so.4
OLD_LIBS+=usr/lib/libftpio.so.7
OLD_LIBS+=usr/lib/libgnuregex.so.4
OLD_LIBS+=usr/lib/libgpib.so.2
OLD_LIBS+=usr/lib/libhistory.so.7
OLD_LIBS+=usr/lib/libmagic.so.3
OLD_LIBS+=usr/lib/libmemstat.so.2
OLD_LIBS+=usr/lib/libmenu.so.4
OLD_LIBS+=usr/lib/libmenuw.so.4
OLD_LIBS+=usr/lib/libmilter.so.4
OLD_LIBS+=usr/lib/libncp.so.3
OLD_LIBS+=usr/lib/libnetgraph.so.3
OLD_LIBS+=usr/lib/libngatm.so.3
OLD_LIBS+=usr/lib/libobjc.so.3
OLD_LIBS+=usr/lib/libopie.so.5
OLD_LIBS+=usr/lib/libpam.so.4
OLD_LIBS+=usr/lib/libpanel.so.4
OLD_LIBS+=usr/lib/libpanelw.so.4
OLD_LIBS+=usr/lib/libpmc.so.4
OLD_LIBS+=usr/lib/libproc.so.1
OLD_LIBS+=usr/lib/libradius.so.3
OLD_LIBS+=usr/lib/librpcsvc.so.4
OLD_LIBS+=usr/lib/libsdp.so.3
OLD_LIBS+=usr/lib/libsmb.so.3
OLD_LIBS+=usr/lib/libssh.so.4
OLD_LIBS+=usr/lib/libssl.so.5
OLD_LIBS+=usr/lib/libtacplus.so.3
OLD_LIBS+=usr/lib/libugidfw.so.3
OLD_LIBS+=usr/lib/libusb.so.1
OLD_LIBS+=usr/lib/libusbhid.so.3
OLD_LIBS+=usr/lib/libvgl.so.5
OLD_LIBS+=usr/lib/libwrap.so.5
OLD_LIBS+=usr/lib/libypclnt.so.3
OLD_LIBS+=usr/lib/pam_chroot.so.4
OLD_LIBS+=usr/lib/pam_deny.so.4
OLD_LIBS+=usr/lib/pam_echo.so.4
OLD_LIBS+=usr/lib/pam_exec.so.4
OLD_LIBS+=usr/lib/pam_ftpusers.so.4
OLD_LIBS+=usr/lib/pam_group.so.4
OLD_LIBS+=usr/lib/pam_guest.so.4
OLD_LIBS+=usr/lib/pam_krb5.so.4
OLD_LIBS+=usr/lib/pam_ksu.so.4
OLD_LIBS+=usr/lib/pam_lastlog.so.4
OLD_LIBS+=usr/lib/pam_login_access.so.4
OLD_LIBS+=usr/lib/pam_nologin.so.4
OLD_LIBS+=usr/lib/pam_opie.so.4
OLD_LIBS+=usr/lib/pam_opieaccess.so.4
OLD_LIBS+=usr/lib/pam_passwdqc.so.4
OLD_LIBS+=usr/lib/pam_permit.so.4
OLD_LIBS+=usr/lib/pam_radius.so.4
OLD_LIBS+=usr/lib/pam_rhosts.so.4
OLD_LIBS+=usr/lib/pam_rootok.so.4
OLD_LIBS+=usr/lib/pam_securetty.so.4
OLD_LIBS+=usr/lib/pam_self.so.4
OLD_LIBS+=usr/lib/pam_ssh.so.4
OLD_LIBS+=usr/lib/pam_tacplus.so.4
OLD_LIBS+=usr/lib/pam_unix.so.4
OLD_LIBS+=usr/lib/snmp_atm.so.5
OLD_LIBS+=usr/lib/snmp_bridge.so.5
OLD_LIBS+=usr/lib/snmp_hostres.so.5
OLD_LIBS+=usr/lib/snmp_mibII.so.5
OLD_LIBS+=usr/lib/snmp_netgraph.so.5
OLD_LIBS+=usr/lib/snmp_pf.so.5
OLD_LIBS+=usr/lib32/libalias.so.6
OLD_LIBS+=usr/lib32/libarchive.so.4
OLD_LIBS+=usr/lib32/libauditd.so.4
OLD_LIBS+=usr/lib32/libavl.so.1
OLD_LIBS+=usr/lib32/libbegemot.so.3
OLD_LIBS+=usr/lib32/libbluetooth.so.3
OLD_LIBS+=usr/lib32/libbsdxml.so.3
OLD_LIBS+=usr/lib32/libbsm.so.2
OLD_LIBS+=usr/lib32/libbsnmp.so.4
OLD_LIBS+=usr/lib32/libbz2.so.3
OLD_LIBS+=usr/lib32/libcalendar.so.4
OLD_LIBS+=usr/lib32/libcam.so.4
OLD_LIBS+=usr/lib32/libcom_err.so.4
OLD_LIBS+=usr/lib32/libcrypt.so.4
OLD_LIBS+=usr/lib32/libcrypto.so.5
OLD_LIBS+=usr/lib32/libctf.so.1
OLD_LIBS+=usr/lib32/libdevinfo.so.4
OLD_LIBS+=usr/lib32/libdevstat.so.6
OLD_LIBS+=usr/lib32/libdialog.so.6
OLD_LIBS+=usr/lib32/libdtrace.so.1
OLD_LIBS+=usr/lib32/libdwarf.so.1
OLD_LIBS+=usr/lib32/libedit.so.6
OLD_LIBS+=usr/lib32/libfetch.so.5
OLD_LIBS+=usr/lib32/libform.so.4
OLD_LIBS+=usr/lib32/libformw.so.4
OLD_LIBS+=usr/lib32/libftpio.so.7
OLD_LIBS+=usr/lib32/libgeom.so.4
OLD_LIBS+=usr/lib32/libgnuregex.so.4
OLD_LIBS+=usr/lib32/libgpib.so.2
OLD_LIBS+=usr/lib32/libhistory.so.7
OLD_LIBS+=usr/lib32/libipsec.so.3
OLD_LIBS+=usr/lib32/libipx.so.4
OLD_LIBS+=usr/lib32/libkiconv.so.3
OLD_LIBS+=usr/lib32/libkvm.so.4
OLD_LIBS+=usr/lib32/libmagic.so.3
OLD_LIBS+=usr/lib32/libmd.so.4
OLD_LIBS+=usr/lib32/libmemstat.so.2
OLD_LIBS+=usr/lib32/libmenu.so.4
OLD_LIBS+=usr/lib32/libmenuw.so.4
OLD_LIBS+=usr/lib32/libmilter.so.4
OLD_LIBS+=usr/lib32/libncp.so.3
OLD_LIBS+=usr/lib32/libncurses.so.7
OLD_LIBS+=usr/lib32/libncursesw.so.7
OLD_LIBS+=usr/lib32/libnetgraph.so.3
OLD_LIBS+=usr/lib32/libngatm.so.3
OLD_LIBS+=usr/lib32/libnvpair.so.1
OLD_LIBS+=usr/lib32/libobjc.so.3
OLD_LIBS+=usr/lib32/libopie.so.5
OLD_LIBS+=usr/lib32/libpam.so.4
OLD_LIBS+=usr/lib32/libpanel.so.4
OLD_LIBS+=usr/lib32/libpanelw.so.4
OLD_LIBS+=usr/lib32/libpcap.so.6
OLD_LIBS+=usr/lib32/libpmc.so.4
OLD_LIBS+=usr/lib32/libproc.so.1
OLD_LIBS+=usr/lib32/libradius.so.3
OLD_LIBS+=usr/lib32/libreadline.so.7
OLD_LIBS+=usr/lib32/librpcsvc.so.4
OLD_LIBS+=usr/lib32/libsbuf.so.4
OLD_LIBS+=usr/lib32/libsdp.so.3
OLD_LIBS+=usr/lib32/libsmb.so.3
OLD_LIBS+=usr/lib32/libssh.so.4
OLD_LIBS+=usr/lib32/libssl.so.5
OLD_LIBS+=usr/lib32/libtacplus.so.3
OLD_LIBS+=usr/lib32/libufs.so.4
OLD_LIBS+=usr/lib32/libugidfw.so.3
OLD_LIBS+=usr/lib32/libumem.so.1
OLD_LIBS+=usr/lib32/libusb.so.1
OLD_LIBS+=usr/lib32/libusbhid.so.3
OLD_LIBS+=usr/lib32/libutil.so.7
OLD_LIBS+=usr/lib32/libuutil.so.1
OLD_LIBS+=usr/lib32/libvgl.so.5
OLD_LIBS+=usr/lib32/libwrap.so.5
OLD_LIBS+=usr/lib32/libypclnt.so.3
OLD_LIBS+=usr/lib32/libz.so.4
OLD_LIBS+=usr/lib32/libzfs.so.1
OLD_LIBS+=usr/lib32/libzpool.so.1
OLD_LIBS+=usr/lib32/pam_chroot.so.4
OLD_LIBS+=usr/lib32/pam_deny.so.4
OLD_LIBS+=usr/lib32/pam_echo.so.4
OLD_LIBS+=usr/lib32/pam_exec.so.4
OLD_LIBS+=usr/lib32/pam_ftpusers.so.4
OLD_LIBS+=usr/lib32/pam_group.so.4
OLD_LIBS+=usr/lib32/pam_guest.so.4
OLD_LIBS+=usr/lib32/pam_krb5.so.4
OLD_LIBS+=usr/lib32/pam_ksu.so.4
OLD_LIBS+=usr/lib32/pam_lastlog.so.4
OLD_LIBS+=usr/lib32/pam_login_access.so.4
OLD_LIBS+=usr/lib32/pam_nologin.so.4
OLD_LIBS+=usr/lib32/pam_opie.so.4
OLD_LIBS+=usr/lib32/pam_opieaccess.so.4
OLD_LIBS+=usr/lib32/pam_passwdqc.so.4
OLD_LIBS+=usr/lib32/pam_permit.so.4
OLD_LIBS+=usr/lib32/pam_radius.so.4
OLD_LIBS+=usr/lib32/pam_rhosts.so.4
OLD_LIBS+=usr/lib32/pam_rootok.so.4
OLD_LIBS+=usr/lib32/pam_securetty.so.4
OLD_LIBS+=usr/lib32/pam_self.so.4
OLD_LIBS+=usr/lib32/pam_ssh.so.4
OLD_LIBS+=usr/lib32/pam_tacplus.so.4
OLD_LIBS+=usr/lib32/pam_unix.so.4
# 20090718: the gdm pam.d file is no longer required.
OLD_FILES+=etc/pam.d/gdm
# 20090714: net_add_domain(9) renamed to domain_add(9)
OLD_FILES+=usr/share/man/man9/net_add_domain.9.gz
# 20090713: vimage container structs removed.
OLD_FILES+=usr/include/netinet/vinet.h
OLD_FILES+=usr/include/netinet6/vinet6.h
OLD_FILES+=usr/include/netipsec/vipsec.h
# 20090712: ieee80211.4 -> net80211.4
OLD_FILES+=usr/share/man/man4/ieee80211.4.gz
# 20090711: typo fixed, kproc_resume,.9 -> kproc_resume.9
OLD_FILES+=usr/share/man/man9/kproc_resume,.9.gz
# 20090709: msgctl.3 msgget.3 msgrcv.3 msgsnd.3 manual pages moved
OLD_FILES+=usr/share/man/man3/msgctl.3.gz
OLD_FILES+=usr/share/man/man3/msgget.3.gz
OLD_FILES+=usr/share/man/man3/msgrcv.3.gz
OLD_FILES+=usr/share/man/man3/msgsnd.3.gz
# 20090630: old kernel RPC implementation removal
OLD_FILES+=usr/include/nfs/rpcv2.h
# 20090624: update usbdi(9)
OLD_FILES+=usr/share/man/man9/usbd_abort_default_pipe.9.gz
OLD_FILES+=usr/share/man/man9/usbd_abort_pipe.9.gz
OLD_FILES+=usr/share/man/man9/usbd_alloc_buffer.9.gz
OLD_FILES+=usr/share/man/man9/usbd_alloc_xfer.9.gz
OLD_FILES+=usr/share/man/man9/usbd_clear_endpoint_stall.9.gz
OLD_FILES+=usr/share/man/man9/usbd_clear_endpoint_stall_async.9.gz
OLD_FILES+=usr/share/man/man9/usbd_clear_endpoint_toggle.9.gz
OLD_FILES+=usr/share/man/man9/usbd_close_pipe.9.gz
OLD_FILES+=usr/share/man/man9/usbd_device2interface_handle.9.gz
OLD_FILES+=usr/share/man/man9/usbd_do_request_async.9.gz
OLD_FILES+=usr/share/man/man9/usbd_do_request_flags_pipe.9.gz
OLD_FILES+=usr/share/man/man9/usbd_endpoint_count.9.gz
OLD_FILES+=usr/share/man/man9/usbd_find_edesc.9.gz
OLD_FILES+=usr/share/man/man9/usbd_find_idesc.9.gz
OLD_FILES+=usr/share/man/man9/usbd_free_buffer.9.gz
OLD_FILES+=usr/share/man/man9/usbd_free_xfer.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_buffer.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_config.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_config_desc.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_config_desc_full.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_config_descriptor.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_device_descriptor.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_endpoint_descriptor.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_interface_altindex.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_interface_descriptor.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_no_alts.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_quirks.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_speed.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_string.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_string_desc.9.gz
OLD_FILES+=usr/share/man/man9/usbd_get_xfer_status.9.gz
OLD_FILES+=usr/share/man/man9/usbd_interface2device_handle.9.gz
OLD_FILES+=usr/share/man/man9/usbd_interface2endpoint_descriptor.9.gz
OLD_FILES+=usr/share/man/man9/usbd_interface_count.9.gz
OLD_FILES+=usr/share/man/man9/usbd_open_pipe.9.gz
OLD_FILES+=usr/share/man/man9/usbd_open_pipe_intr.9.gz
OLD_FILES+=usr/share/man/man9/usbd_pipe2device_handle.9.gz
OLD_FILES+=usr/share/man/man9/usbd_set_config_index.9.gz
OLD_FILES+=usr/share/man/man9/usbd_set_config_no.9.gz
OLD_FILES+=usr/share/man/man9/usbd_set_interface.9.gz
OLD_FILES+=usr/share/man/man9/usbd_setup_default_xfer.9.gz
OLD_FILES+=usr/share/man/man9/usbd_setup_isoc_xfer.9.gz
OLD_FILES+=usr/share/man/man9/usbd_setup_xfer.9.gz
OLD_FILES+=usr/share/man/man9/usbd_sync_transfer.9.gz
OLD_FILES+=usr/share/man/man9/usbd_transfer.9.gz
OLD_FILES+=usr/share/man/man9/usb_find_desc.9.gz
# 20090623: number of headers needed for a usb driver reduced
OLD_FILES+=usr/include/dev/usb/usb_defs.h
OLD_FILES+=usr/include/dev/usb/usb_error.h
OLD_FILES+=usr/include/dev/usb/usb_handle_request.h
OLD_FILES+=usr/include/dev/usb/usb_hid.h
OLD_FILES+=usr/include/dev/usb/usb_lookup.h
OLD_FILES+=usr/include/dev/usb/usb_mfunc.h
OLD_FILES+=usr/include/dev/usb/usb_parse.h
OLD_FILES+=usr/include/dev/usb/usb_revision.h
# 20090609: devclass_add_driver is no longer public
OLD_FILES+=usr/share/man/man9/devclass_add_driver.9.gz
OLD_FILES+=usr/share/man/man9/devclass_delete_driver.9.gz
OLD_FILES+=usr/share/man/man9/devclass_find_driver.9.gz
# 20090605: removal of clists
OLD_FILES+=usr/include/sys/clist.h
# 20090602: removal of window(1)
OLD_FILES+=usr/bin/window
OLD_FILES+=usr/share/man/man1/window.1.gz
# 20090531: bind 9.6.1rc1 import
OLD_LIBS+=usr/lib/liblwres.so.30
# 20090530: removal of early.sh
OLD_FILES+=etc/rc.d/early.sh
# 20090527: renaming of S{LIST,TAILQ}_REMOVE_NEXT() to _REMOVE_AFTER()
OLD_FILES+=usr/share/man/man3/SLIST_REMOVE_NEXT.3.gz
OLD_FILES+=usr/share/man/man3/STAILQ_REMOVE_NEXT.3.gz
# 20090527: removal of legacy USB stack
OLD_FILES+=usr/include/legacy/dev/usb/dsbr100io.h
OLD_FILES+=usr/include/legacy/dev/usb/ehcireg.h
OLD_FILES+=usr/include/legacy/dev/usb/ehcivar.h
OLD_FILES+=usr/include/legacy/dev/usb/hid.h
OLD_FILES+=usr/include/legacy/dev/usb/if_urtwreg.h
OLD_FILES+=usr/include/legacy/dev/usb/if_urtwvar.h
OLD_FILES+=usr/include/legacy/dev/usb/ohcireg.h
OLD_FILES+=usr/include/legacy/dev/usb/ohcivar.h
OLD_FILES+=usr/include/legacy/dev/usb/rio500_usb.h
OLD_FILES+=usr/include/legacy/dev/usb/rt2573_ucode.h
OLD_FILES+=usr/include/legacy/dev/usb/sl811hsreg.h
OLD_FILES+=usr/include/legacy/dev/usb/sl811hsvar.h
OLD_FILES+=usr/include/legacy/dev/usb/ubser.h
OLD_FILES+=usr/include/legacy/dev/usb/ucomvar.h
OLD_FILES+=usr/include/legacy/dev/usb/udbp.h
OLD_FILES+=usr/include/legacy/dev/usb/uftdireg.h
OLD_FILES+=usr/include/legacy/dev/usb/ugraphire_rdesc.h
OLD_FILES+=usr/include/legacy/dev/usb/uhcireg.h
OLD_FILES+=usr/include/legacy/dev/usb/uhcivar.h
OLD_FILES+=usr/include/legacy/dev/usb/usb.h
OLD_FILES+=usr/include/legacy/dev/usb/usb_mem.h
OLD_FILES+=usr/include/legacy/dev/usb/usb_port.h
OLD_FILES+=usr/include/legacy/dev/usb/usb_quirks.h
OLD_FILES+=usr/include/legacy/dev/usb/usbcdc.h
OLD_FILES+=usr/include/legacy/dev/usb/usbdi.h
OLD_FILES+=usr/include/legacy/dev/usb/usbdi_util.h
OLD_FILES+=usr/include/legacy/dev/usb/usbdivar.h
OLD_FILES+=usr/include/legacy/dev/usb/usbhid.h
OLD_FILES+=usr/include/legacy/dev/usb/uxb360gp_rdesc.h
OLD_DIRS+=usr/include/legacy/dev/usb
OLD_DIRS+=usr/include/legacy/dev
OLD_DIRS+=usr/include/legacy
# 20090526: removal of makekey(8)
OLD_FILES+=usr/libexec/makekey
OLD_FILES+=usr/share/man/man8/makekey.8.gz
# 20090522: removal of University of Michigan NFSv4 client
OLD_FILES+=etc/rc.d/idmapd
OLD_FILES+=sbin/idmapd
OLD_FILES+=sbin/mount_nfs4
OLD_FILES+=usr/share/man/man8/idmapd.8.gz
OLD_FILES+=usr/share/man/man8/mount_nfs4.8.gz
# 20090513: removal of legacy versions of USB network interface drivers
OLD_FILES+=usr/include/legacy/dev/usb/if_upgtvar.h
OLD_FILES+=usr/include/legacy/dev/usb/usb_ethersubr.h
# 20090417: removal of legacy versions of USB network interface drivers
OLD_FILES+=usr/include/legacy/dev/usb/if_auereg.h
OLD_FILES+=usr/include/legacy/dev/usb/if_axereg.h
OLD_FILES+=usr/include/legacy/dev/usb/if_cdcereg.h
OLD_FILES+=usr/include/legacy/dev/usb/if_cuereg.h
OLD_FILES+=usr/include/legacy/dev/usb/if_kuereg.h
OLD_FILES+=usr/include/legacy/dev/usb/if_ruereg.h
OLD_FILES+=usr/include/legacy/dev/usb/if_rumreg.h
OLD_FILES+=usr/include/legacy/dev/usb/if_rumvar.h
OLD_FILES+=usr/include/legacy/dev/usb/if_udavreg.h
OLD_FILES+=usr/include/legacy/dev/usb/if_uralreg.h
OLD_FILES+=usr/include/legacy/dev/usb/if_uralvar.h
OLD_FILES+=usr/include/legacy/dev/usb/if_zydfw.h
OLD_FILES+=usr/include/legacy/dev/usb/if_zydreg.h
OLD_FILES+=usr/include/legacy/dev/usb/kue_fw.h
# 20090416: removal of ar(4), ray(4), sr(4), raycontrol(8)
OLD_FILES+=usr/sbin/raycontrol
OLD_FILES+=usr/share/man/man4/i386/ar.4.gz
OLD_FILES+=usr/share/man/man4/i386/ray.4.gz
OLD_FILES+=usr/share/man/man4/i386/sr.4.gz
OLD_FILES+=usr/share/man/man8/raycontrol.8.gz
# 20090410: VOP_LEASE.9 removed
OLD_FILES+=usr/share/man/man9/VOP_LEASE.9.gz
# 20090406: usb_sw_transfer.h removed
OLD_FILES+=usr/include/dev/usb/usb_sw_transfer.h
# 20090405: removal of if_ppp(4) and if_sl(4)
OLD_FILES+=sbin/slattach rescue/slattach
OLD_FILES+=sbin/startslip rescue/startslip
OLD_FILES+=usr/include/net/if_ppp.h
OLD_FILES+=usr/include/net/if_pppvar.h
OLD_FILES+=usr/include/net/if_slvar.h
OLD_FILES+=usr/include/net/ppp_comp.h
OLD_FILES+=usr/include/net/slip.h
OLD_FILES+=usr/sbin/sliplogin
OLD_FILES+=usr/sbin/slstat
OLD_FILES+=usr/sbin/pppd
OLD_FILES+=usr/sbin/pppstats
OLD_FILES+=usr/share/man/man1/startslip.1.gz
OLD_FILES+=usr/share/man/man4/if_ppp.4.gz
OLD_FILES+=usr/share/man/man4/if_sl.4.gz
OLD_FILES+=usr/share/man/man4/ppp.4.gz
OLD_FILES+=usr/share/man/man4/sl.4.gz
OLD_FILES+=usr/share/man/man8/pppd.8.gz
OLD_FILES+=usr/share/man/man8/pppstats.8.gz
OLD_FILES+=usr/share/man/man8/slattach.8.gz
OLD_FILES+=usr/share/man/man8/slip.8.gz
OLD_FILES+=usr/share/man/man8/sliplogin.8.gz
OLD_FILES+=usr/share/man/man8/slstat.8.gz
# 20090321: libpcap upgraded to 1.0.0
OLD_LIBS+=lib/libpcap.so.5
OLD_LIBS+=usr/lib32/libpcap.so.5
# 20090319: uscanner(4) has been removed
OLD_FILES+=usr/share/man/man4/uscanner.4.gz
# 20090313: k8temp(4) renamed to amdtemp(4)
OLD_FILES+=usr/share/man/man4/k8temp.4.gz
# 20090308: libusb.so.1 renamed
OLD_LIBS+=usr/lib/libusb20.so.1
OLD_FILES+=usr/lib/libusb20.a
OLD_FILES+=usr/lib/libusb20.so
OLD_FILES+=usr/lib/libusb20_p.a
OLD_FILES+=usr/include/libusb20_compat01.h
OLD_FILES+=usr/include/libusb20_compat10.h
OLD_LIBS+=usr/lib32/libusb20.so.1
OLD_FILES+=usr/lib32/libusb20.a
OLD_FILES+=usr/lib32/libusb20.so
OLD_FILES+=usr/lib32/libusb20_p.a
# 20090226: libmp(3) functions renamed
OLD_LIBS+=usr/lib/libmp.so.6
OLD_LIBS+=usr/lib32/libmp.so.6
# 20090223: changeover of USB stacks
OLD_FILES+=usr/include/dev/usb2/include/ufm2_ioctl.h
OLD_FILES+=usr/include/dev/usb2/include/urio2_ioctl.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_cdc.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_defs.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_devid.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_devtable.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_endian.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_error.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_hid.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_ioctl.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_mfunc.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_revision.h
OLD_FILES+=usr/include/dev/usb2/include/usb2_standard.h
OLD_DIRS+=usr/include/dev/usb2/include
OLD_DIRS+=usr/include/dev/usb2
OLD_FILES+=usr/include/dev/usb/dsbr100io.h
OLD_FILES+=usr/include/dev/usb/ehcireg.h
OLD_FILES+=usr/include/dev/usb/ehcivar.h
OLD_FILES+=usr/include/dev/usb/hid.h
OLD_FILES+=usr/include/dev/usb/if_auereg.h
OLD_FILES+=usr/include/dev/usb/if_axereg.h
OLD_FILES+=usr/include/dev/usb/if_cdcereg.h
OLD_FILES+=usr/include/dev/usb/if_cuereg.h
OLD_FILES+=usr/include/dev/usb/if_kuereg.h
OLD_FILES+=usr/include/dev/usb/if_ruereg.h
OLD_FILES+=usr/include/dev/usb/if_rumreg.h
OLD_FILES+=usr/include/dev/usb/if_rumvar.h
OLD_FILES+=usr/include/dev/usb/if_udavreg.h
OLD_FILES+=usr/include/dev/usb/if_upgtvar.h
OLD_FILES+=usr/include/dev/usb/if_uralreg.h
OLD_FILES+=usr/include/dev/usb/if_uralvar.h
OLD_FILES+=usr/include/dev/usb/if_urtwreg.h
OLD_FILES+=usr/include/dev/usb/if_urtwvar.h
OLD_FILES+=usr/include/dev/usb/if_zydfw.h
OLD_FILES+=usr/include/dev/usb/if_zydreg.h
OLD_FILES+=usr/include/dev/usb/kue_fw.h
OLD_FILES+=usr/include/dev/usb/ohcireg.h
OLD_FILES+=usr/include/dev/usb/ohcivar.h
OLD_FILES+=usr/include/dev/usb/rio500_usb.h
OLD_FILES+=usr/include/dev/usb/rt2573_ucode.h
OLD_FILES+=usr/include/dev/usb/sl811hsreg.h
OLD_FILES+=usr/include/dev/usb/sl811hsvar.h
OLD_FILES+=usr/include/dev/usb/ubser.h
OLD_FILES+=usr/include/dev/usb/ucomvar.h
OLD_FILES+=usr/include/dev/usb/udbp.h
OLD_FILES+=usr/include/dev/usb/uftdireg.h
OLD_FILES+=usr/include/dev/usb/ugraphire_rdesc.h
OLD_FILES+=usr/include/dev/usb/uhcireg.h
OLD_FILES+=usr/include/dev/usb/uhcivar.h
OLD_FILES+=usr/include/dev/usb/usb_ethersubr.h
OLD_FILES+=usr/include/dev/usb/usb_mem.h
OLD_FILES+=usr/include/dev/usb/usb_port.h
OLD_FILES+=usr/include/dev/usb/usb_quirks.h
OLD_FILES+=usr/include/dev/usb/usbcdc.h
OLD_FILES+=usr/include/dev/usb/usbdivar.h
OLD_FILES+=usr/include/dev/usb/uxb360gp_rdesc.h
OLD_FILES+=usr/sbin/usbdevs
OLD_FILES+=usr/share/man/man8/usbdevs.8.gz
# 20090203: removal of pccard header files
OLD_FILES+=usr/include/pccard/cardinfo.h
OLD_FILES+=usr/include/pccard/cis.h
OLD_DIRS+=usr/include/pccard
# 20090203: adding_user.8 moved to adding_user.7
OLD_FILES+=usr/share/man/man8/adding_user.8.gz
# 20090122: tzdata2009a import
OLD_FILES+=usr/share/zoneinfo/Asia/Katmandu
# 20090102: file 4.26 import
OLD_FILES+=usr/share/misc/magic.mime
OLD_FILES+=usr/share/misc/magic.mime.mgc
# 20081223: bind 9.4.3 import, nsupdate.8 moved to nsupdate.1
OLD_FILES+=usr/share/man/man8/nsupdate.8.gz
# 20081223: ipprotosw.h removed
OLD_FILES+=usr/include/netinet/ipprotosw.h
# 20081123: vfs_mountedon.9 removed
OLD_FILES+=usr/share/man/man9/vfs_mountedon.9.gz
# 20081023: FREE.9 and MALLOC.9 removed
OLD_FILES+=usr/share/man/man9/FREE.9.gz
OLD_FILES+=usr/share/man/man9/MALLOC.9.gz
# 20080928: removal of inaccurate device_ids(9) manual page
OLD_FILES+=usr/share/man/man9/device_ids.9.gz
OLD_FILES+=usr/share/man/man9/major.9.gz
OLD_FILES+=usr/share/man/man9/minor.9.gz
OLD_FILES+=usr/share/man/man9/umajor.9.gz
OLD_FILES+=usr/share/man/man9/uminor.9.gz
# 20080917: removal of manpage for axed kernel primitive suser(9)
OLD_FILES+=usr/share/man/man9/suser.9.gz
OLD_FILES+=usr/share/man/man9/suser_cred.9.gz
# 20080913: pax removed from rescue
OLD_FILES+=rescue/pax
# 20080823: removal of unneeded pt_chown, to implement grantpt(3)
OLD_FILES+=usr/libexec/pt_chown
# 20080822: ntp 4.2.4p5 import
OLD_FILES+=usr/share/doc/ntp/driver23.html
OLD_FILES+=usr/share/doc/ntp/driver24.html
# 20080821: several man pages moved from man4.i386 to man4
.if ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/share/man/man4/i386/acpi_aiboost.4.gz
OLD_FILES+=usr/share/man/man4/i386/acpi_asus.4.gz
OLD_FILES+=usr/share/man/man4/i386/acpi_fujitsu.4.gz
OLD_FILES+=usr/share/man/man4/i386/acpi_ibm.4.gz
OLD_FILES+=usr/share/man/man4/i386/acpi_panasonic.4.gz
OLD_FILES+=usr/share/man/man4/i386/acpi_sony.4.gz
OLD_FILES+=usr/share/man/man4/i386/acpi_toshiba.4.gz
OLD_FILES+=usr/share/man/man4/i386/ichwd.4.gz
OLD_FILES+=usr/share/man/man4/i386/if_ndis.4.gz
OLD_FILES+=usr/share/man/man4/i386/io.4.gz
OLD_FILES+=usr/share/man/man4/i386/linux.4.gz
OLD_FILES+=usr/share/man/man4/i386/ndis.4.gz
.endif
# 20080820: MPSAFE TTY layer integrated
OLD_FILES+=usr/include/sys/linedisc.h
OLD_FILES+=usr/share/man/man3/posix_openpt.3.gz
# 20080725: sgtty.h removed
OLD_FILES+=usr/include/sgtty.h
# 20080706: bsdlabel(8) removed on powerpc
.if ${TARGET_ARCH} == "powerpc"
OLD_FILES+=sbin/bsdlabel
OLD_FILES+=usr/share/man/man8/bsdlabel.8.gz
.endif
# 20080704: sbsh(4) removed
OLD_FILES+=usr/share/man/man4/if_sbsh.4.gz
OLD_FILES+=usr/share/man/man4/sbsh.4.gz
# 20080704: cnw(4) removed
OLD_FILES+=usr/share/man/man4/if_cnw.4.gz
OLD_FILES+=usr/share/man/man4/cnw.4.gz
# 20080704: oltr(4) removed
.if ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/share/man/man4/i386/if_oltr.4.gz
OLD_FILES+=usr/share/man/man4/i386/oltr.4.gz
.endif
# 20080704: arl(4) removed
.if ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/sbin/arlcontrol
OLD_FILES+=usr/share/man/man4/i386/arl.4.gz
OLD_FILES+=usr/share/man/man8/arlcontrol.8.gz
.endif
# 20080703: sunlabel only for sparc64
.if ${TARGET_ARCH} != "sparc64"
OLD_FILES+=sbin/sunlabel
OLD_FILES+=usr/share/man/man8/sunlabel.8.gz
.endif
# 20080703: bsdlabel & fdisk removed on ia64
.if ${TARGET_ARCH} == "ia64"
OLD_FILES+=sbin/bsdlabel
OLD_FILES+=usr/share/man/man8/bsdlabel.8.gz
OLD_FILES+=usr/share/man/man8/disklabel.8.gz
OLD_FILES+=sbin/fdisk
OLD_FILES+=usr/share/man/man8/fdisk.8.gz
.endif
# 20080701: wpa_supplicant.conf moved to share/examples/etc/
OLD_FILES+=usr/share/examples/wpa_supplicant/wpa_supplicant.conf
OLD_DIRS+=usr/share/examples/wpa_supplicant
# 20080614: pecoff image activator removed
.if ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/include/machine/pecoff_machdep.h
.endif
# 20080614: sgtty removed
OLD_FILES+=usr/include/sys/ttychars.h
OLD_FILES+=usr/include/sys/ttydev.h
OLD_FILES+=usr/share/man/man3/gtty.3.gz
OLD_FILES+=usr/share/man/man3/stty.3.gz
# 20080609: gpt(8) removed
OLD_FILES+=sbin/gpt
OLD_FILES+=usr/share/man/man8/gpt.8.gz
# 20080525: I4B removed
OLD_FILES+=etc/isdn/answer
OLD_FILES+=etc/isdn/isdntel
OLD_FILES+=etc/isdn/record
OLD_FILES+=etc/isdn/tell
OLD_FILES+=etc/isdn/tell-record
OLD_FILES+=etc/isdn/unknown_incoming
OLD_FILES+=etc/isdn/holidays.D
OLD_FILES+=etc/isdn/isdnd.rates.A
OLD_FILES+=etc/isdn/isdnd.rates.D
OLD_FILES+=etc/isdn/isdnd.rates.F
OLD_FILES+=etc/isdn/isdnd.rates.L
OLD_FILES+=etc/isdn/isdnd.rates.UK.BT
OLD_FILES+=etc/isdn/isdnd.rc.sample
OLD_FILES+=etc/isdn/isdntel.alias.sample
OLD_DIRS+=etc/isdn
OLD_FILES+=etc/rc.d/isdnd
OLD_FILES+=usr/include/i4b/i4b_cause.h
OLD_FILES+=usr/include/i4b/i4b_debug.h
OLD_FILES+=usr/include/i4b/i4b_ioctl.h
OLD_FILES+=usr/include/i4b/i4b_rbch_ioctl.h
OLD_FILES+=usr/include/i4b/i4b_tel_ioctl.h
OLD_FILES+=usr/include/i4b/i4b_trace.h
OLD_DIRS+=usr/include/i4b
OLD_FILES+=usr/sbin/dtmfdecode
OLD_FILES+=usr/sbin/g711conv
OLD_FILES+=usr/sbin/isdnd
OLD_FILES+=usr/sbin/isdndebug
OLD_FILES+=usr/sbin/isdndecode
OLD_FILES+=usr/sbin/isdnmonitor
OLD_FILES+=usr/sbin/isdnphone
OLD_FILES+=usr/sbin/isdntel
OLD_FILES+=usr/sbin/isdntelctl
OLD_FILES+=usr/sbin/isdntrace
OLD_FILES+=usr/share/isdn/0.al
OLD_FILES+=usr/share/isdn/1.al
OLD_FILES+=usr/share/isdn/2.al
OLD_FILES+=usr/share/isdn/3.al
OLD_FILES+=usr/share/isdn/4.al
OLD_FILES+=usr/share/isdn/5.al
OLD_FILES+=usr/share/isdn/6.al
OLD_FILES+=usr/share/isdn/7.al
OLD_FILES+=usr/share/isdn/8.al
OLD_FILES+=usr/share/isdn/9.al
OLD_FILES+=usr/share/isdn/beep.al
OLD_FILES+=usr/share/isdn/msg.al
OLD_DIRS+=usr/share/isdn
OLD_FILES+=usr/share/man/man1/dtmfdecode.1.gz
OLD_FILES+=usr/share/man/man1/g711conv.1.gz
OLD_FILES+=usr/share/man/man4/i4b.4.gz
OLD_FILES+=usr/share/man/man4/i4bcapi.4.gz
OLD_FILES+=usr/share/man/man4/i4bctl.4.gz
OLD_FILES+=usr/share/man/man4/i4bing.4.gz
OLD_FILES+=usr/share/man/man4/i4bipr.4.gz
OLD_FILES+=usr/share/man/man4/i4bisppp.4.gz
OLD_FILES+=usr/share/man/man4/i4bq921.4.gz
OLD_FILES+=usr/share/man/man4/i4bq931.4.gz
OLD_FILES+=usr/share/man/man4/i4brbch.4.gz
OLD_FILES+=usr/share/man/man4/i4btel.4.gz
OLD_FILES+=usr/share/man/man4/i4btrc.4.gz
OLD_FILES+=usr/share/man/man4/iavc.4.gz
OLD_FILES+=usr/share/man/man4/isic.4.gz
OLD_FILES+=usr/share/man/man4/ifpi.4.gz
OLD_FILES+=usr/share/man/man4/ifpi2.4.gz
OLD_FILES+=usr/share/man/man4/ifpnp.4.gz
OLD_FILES+=usr/share/man/man4/ihfc.4.gz
OLD_FILES+=usr/share/man/man4/itjc.4.gz
OLD_FILES+=usr/share/man/man4/iwic.4.gz
OLD_FILES+=usr/share/man/man5/isdnd.rc.5.gz
OLD_FILES+=usr/share/man/man5/isdnd.rates.5.gz
OLD_FILES+=usr/share/man/man5/isdnd.acct.5.gz
OLD_FILES+=usr/share/man/man8/isdnd.8.gz
OLD_FILES+=usr/share/man/man8/isdndebug.8.gz
OLD_FILES+=usr/share/man/man8/isdndecode.8.gz
OLD_FILES+=usr/share/man/man8/isdnmonitor.8.gz
OLD_FILES+=usr/share/man/man8/isdnphone.8.gz
OLD_FILES+=usr/share/man/man8/isdntel.8.gz
OLD_FILES+=usr/share/man/man8/isdntelctl.8.gz
OLD_FILES+=usr/share/man/man8/isdntrace.8.gz
OLD_FILES+=usr/share/examples/isdn/contrib/README
OLD_FILES+=usr/share/examples/isdn/contrib/anleitung.ppp
OLD_FILES+=usr/share/examples/isdn/contrib/answer.c
OLD_FILES+=usr/share/examples/isdn/contrib/answer.sh
OLD_FILES+=usr/share/examples/isdn/contrib/convert.sh
OLD_FILES+=usr/share/examples/isdn/contrib/hplay.c
OLD_FILES+=usr/share/examples/isdn/contrib/i4b-ppp-newbie.txt
OLD_FILES+=usr/share/examples/isdn/contrib/isdnctl
OLD_FILES+=usr/share/examples/isdn/contrib/isdnd_acct
OLD_FILES+=usr/share/examples/isdn/contrib/isdnd_acct.pl
OLD_FILES+=usr/share/examples/isdn/contrib/isdntelmux.c
OLD_FILES+=usr/share/examples/isdn/contrib/mrtg-isp0.sh
OLD_FILES+=usr/share/examples/isdn/i4brunppp/Makefile
OLD_FILES+=usr/share/examples/isdn/i4brunppp/README
OLD_FILES+=usr/share/examples/isdn/i4brunppp/i4brunppp-isdnd.rc
OLD_FILES+=usr/share/examples/isdn/i4brunppp/i4brunppp.8
OLD_FILES+=usr/share/examples/isdn/i4brunppp/i4brunppp.c
OLD_FILES+=usr/share/examples/isdn/v21/Makefile
OLD_FILES+=usr/share/examples/isdn/v21/README
OLD_FILES+=usr/share/examples/isdn/v21/v21modem.c
OLD_FILES+=usr/share/examples/isdn/FAQ
OLD_FILES+=usr/share/examples/isdn/KERNEL
OLD_FILES+=usr/share/examples/isdn/Overview
OLD_FILES+=usr/share/examples/isdn/README
OLD_FILES+=usr/share/examples/isdn/ROADMAP
OLD_FILES+=usr/share/examples/isdn/ReleaseNotes
OLD_FILES+=usr/share/examples/isdn/Resources
OLD_FILES+=usr/share/examples/isdn/SupportedCards
OLD_FILES+=usr/share/examples/isdn/ThankYou
OLD_DIRS+=usr/share/examples/isdn/contrib
OLD_DIRS+=usr/share/examples/isdn/i4brunppp
OLD_DIRS+=usr/share/examples/isdn/v21
OLD_DIRS+=usr/share/examples/isdn
OLD_FILES+=usr/share/examples/ppp/isdnd.rc
OLD_FILES+=usr/share/examples/ppp/ppp.conf.isdn
# 20080525: ng_atmpif removed
OLD_FILES+=usr/include/netgraph/atm/ng_atmpif.h
OLD_FILES+=usr/share/man/man4/ng_atmpif.4.gz
# 20080522: pmap_addr_hint removed
OLD_FILES+=usr/share/man/man9/pmap_addr_hint.9.gz
# 20080517: ipsec_osdep.h removed
OLD_FILES+=usr/include/netipsec/ipsec_osdep.h
# 20080507: heimdal 1.1 import
OLD_LIBS+=usr/lib/libasn1.so.9
OLD_LIBS+=usr/lib/libgssapi.so.9
OLD_LIBS+=usr/lib/libgssapi_krb5.so.9
OLD_LIBS+=usr/lib/libhdb.so.9
OLD_LIBS+=usr/lib/libkadm5clnt.so.9
OLD_LIBS+=usr/lib/libkadm5srv.so.9
OLD_LIBS+=usr/lib/libkafs5.so.9
OLD_LIBS+=usr/lib/libkrb5.so.9
OLD_LIBS+=usr/lib/libroken.so.9
OLD_LIBS+=usr/lib32/libgssapi.so.9
# 20080420: Symbol card support dropped
OLD_FILES+=usr/include/dev/wi/spectrum24t_cf.h
# 20080420: awi removal
OLD_FILES+=usr/share/man/man4/awi.4.gz
OLD_FILES+=usr/share/man/man4/if_awi.4.gz
# 20080331: pkg_sign has been removed
OLD_FILES+=usr/sbin/pkg_check
OLD_FILES+=usr/sbin/pkg_sign
OLD_FILES+=usr/share/man/man1/pkg_check.1.gz
OLD_FILES+=usr/share/man/man1/pkg_sign.1.gz
# 20080325: tzdata2008b import
OLD_FILES+=usr/share/zoneinfo/Asia/Calcutta
OLD_FILES+=usr/share/zoneinfo/Asia/Saigon
# 20080314: stack_print(9) mlink fixed
OLD_FILES+=usr/share/man/man9/stack_printf.9.gz
# 20080312: libkse removal
OLD_FILES+=usr/include/sys/kse.h
OLD_FILES+=usr/lib/libkse.so
OLD_LIBS+=usr/lib/libkse.so.3
OLD_FILES+=usr/share/man/man2/kse.2.gz
OLD_FILES+=usr/share/man/man2/kse_create.2.gz
OLD_FILES+=usr/share/man/man2/kse_exit.2.gz
OLD_FILES+=usr/share/man/man2/kse_release.2.gz
OLD_FILES+=usr/share/man/man2/kse_switchin.2.gz
OLD_FILES+=usr/share/man/man2/kse_thr_interrupt.2.gz
OLD_FILES+=usr/share/man/man2/kse_wakeup.2.gz
OLD_FILES+=usr/lib32/libkse.so
OLD_LIBS+=usr/lib32/libkse.so.3
# 20080225: bsdar/bsdranlib rename to ar/ranlib
OLD_FILES+=usr/bin/bsdar
OLD_FILES+=usr/bin/bsdranlib
OLD_FILES+=usr/share/man/man1/bsdar.1.gz
OLD_FILES+=usr/share/man/man1/bsdranlib.1.gz
# 20080220: geom_lvm rename to geom_linux_lvm
OLD_FILES+=usr/share/man/man4/geom_lvm.4.gz
# 20080126: oldcard.4 removal
OLD_FILES+=usr/share/man/man4/card.4.gz
OLD_FILES+=usr/share/man/man4/oldcard.4.gz
# 20080122: Removed from the tree
OLD_FILES+=usr/share/man/man9/BUF_REFCNT.9.gz
# 20080108: Moved to section 2
OLD_FILES+=usr/share/man/man3/shm_open.3.gz
OLD_FILES+=usr/share/man/man3/shm_unlink.3.gz
# 20071207: Merged with fortunes-o.real
OLD_FILES+=usr/share/games/fortune/fortunes2-o
OLD_FILES+=usr/share/games/fortune/fortunes2-o.dat
# 20071201: Removal of XRPU driver
OLD_FILES+=usr/include/sys/xrpuio.h
# 20071129: Disabled static versions of libkse by default
OLD_FILES+=usr/lib/libkse.a
OLD_FILES+=usr/lib/libkse_p.a
OLD_FILES+=usr/lib/libkse_pic.a
OLD_FILES+=usr/lib32/libkse.a
OLD_FILES+=usr/lib32/libkse_p.a
OLD_FILES+=usr/lib32/libkse_pic.a
# 20071129: Removed a Solaris compatibility header
OLD_FILES+=usr/include/sys/_elf_solaris.h
# 20071125: Renamed to pmc_get_msr()
OLD_FILES+=usr/share/man/man3/pmc_x86_get_msr.3.gz
# 20071108: Removed very crunch OLDCARD support file
OLD_FILES+=etc/defaults/pccard.conf
# 20071104: Removed bsdlabel, fdisk and gpt from rescue on ia64.
.if ${TARGET_ARCH} == "ia64"
OLD_FILES+=rescue/bsdlabel
OLD_FILES+=rescue/fdisk
OLD_FILES+=rescue/gpt
.endif
# 20071025: rc.d/nfslocking superceeded by rc.d/lockd and rc.d/statd
OLD_FILES+=etc/rc.d/nfslocking
# 20070930: rename of cached to nscd
OLD_FILES+=etc/cached.conf
OLD_FILES+=etc/rc.d/cached
OLD_FILES+=usr/sbin/cached
OLD_FILES+=usr/share/man/man5/cached.conf.5.gz
OLD_FILES+=usr/share/man/man8/cached.8.gz
# 20070807: removal of PowerPC specific header file.
.if ${TARGET_ARCH} == "powerpc"
OLD_FILES+=usr/include/machine/interruptvar.h
.endif
# 20070801: fast_ipsec.4 gone
OLD_FILES+=usr/share/man/man4/fast_ipsec.4.gz
# 20070715: netatm temporarily disconnected (removed 20080525)
OLD_FILES+=rescue/atm
OLD_FILES+=rescue/fore_dnld
OLD_FILES+=rescue/ilmid
OLD_FILES+=sbin/atm
OLD_FILES+=sbin/fore_dnld
OLD_FILES+=sbin/ilmid
OLD_FILES+=usr/include/libatm.h
OLD_FILES+=usr/include/netatm/atm.h
OLD_FILES+=usr/include/netatm/atm_cm.h
OLD_FILES+=usr/include/netatm/atm_if.h
OLD_FILES+=usr/include/netatm/atm_ioctl.h
OLD_FILES+=usr/include/netatm/atm_pcb.h
OLD_FILES+=usr/include/netatm/atm_sap.h
OLD_FILES+=usr/include/netatm/atm_sigmgr.h
OLD_FILES+=usr/include/netatm/atm_stack.h
OLD_FILES+=usr/include/netatm/atm_sys.h
OLD_FILES+=usr/include/netatm/atm_var.h
OLD_FILES+=usr/include/netatm/atm_vc.h
OLD_FILES+=usr/include/netatm/ipatm/ipatm.h
OLD_FILES+=usr/include/netatm/ipatm/ipatm_serv.h
OLD_FILES+=usr/include/netatm/ipatm/ipatm_var.h
OLD_FILES+=usr/include/netatm/port.h
OLD_FILES+=usr/include/netatm/queue.h
OLD_FILES+=usr/include/netatm/sigpvc/sigpvc_var.h
OLD_FILES+=usr/include/netatm/spans/spans_cls.h
OLD_FILES+=usr/include/netatm/spans/spans_kxdr.h
OLD_FILES+=usr/include/netatm/spans/spans_var.h
OLD_FILES+=usr/include/netatm/uni/sscf_uni.h
OLD_FILES+=usr/include/netatm/uni/sscf_uni_var.h
OLD_FILES+=usr/include/netatm/uni/sscop.h
OLD_FILES+=usr/include/netatm/uni/sscop_misc.h
OLD_FILES+=usr/include/netatm/uni/sscop_pdu.h
OLD_FILES+=usr/include/netatm/uni/sscop_var.h
OLD_FILES+=usr/include/netatm/uni/uni.h
OLD_FILES+=usr/include/netatm/uni/uniip_var.h
OLD_FILES+=usr/include/netatm/uni/unisig.h
OLD_FILES+=usr/include/netatm/uni/unisig_decode.h
OLD_FILES+=usr/include/netatm/uni/unisig_mbuf.h
OLD_FILES+=usr/include/netatm/uni/unisig_msg.h
OLD_FILES+=usr/include/netatm/uni/unisig_print.h
OLD_FILES+=usr/include/netatm/uni/unisig_var.h
OLD_FILES+=usr/lib/libatm.a
OLD_FILES+=usr/lib/libatm_p.a
OLD_FILES+=usr/sbin/atmarpd
OLD_FILES+=usr/sbin/scspd
OLD_FILES+=usr/share/man/en.ISO8859-1/man8/atm.8.gz
OLD_FILES+=usr/share/man/en.ISO8859-1/man8/atmarpd.8.gz
OLD_FILES+=usr/share/man/en.ISO8859-1/man8/fore_dnld.8.gz
OLD_FILES+=usr/share/man/en.ISO8859-1/man8/ilmid.8.gz
OLD_FILES+=usr/share/man/en.ISO8859-1/man8/scspd.8.gz
OLD_FILES+=usr/share/man/man8/atm.8.gz
OLD_FILES+=usr/share/man/man8/atmarpd.8.gz
OLD_FILES+=usr/share/man/man8/fore_dnld.8.gz
OLD_FILES+=usr/share/man/man8/ilmid.8.gz
OLD_FILES+=usr/share/man/man8/scspd.8.gz
OLD_FILES+=usr/share/examples/atm/NOTES
OLD_FILES+=usr/share/examples/atm/README
OLD_FILES+=usr/share/examples/atm/Startup
OLD_FILES+=usr/share/examples/atm/atm-config.sh
OLD_FILES+=usr/share/examples/atm/atm-sockets.txt
OLD_FILES+=usr/share/examples/atm/cpcs-design.txt
OLD_FILES+=usr/share/examples/atm/fore-microcode.txt
OLD_FILES+=usr/share/examples/atm/sscf-design.txt
OLD_FILES+=usr/share/examples/atm/sscop-design.txt
OLD_LIBS+=lib/libatm.so.5
OLD_LIBS+=usr/lib/libatm.so
OLD_DIRS+=usr/include/netatm/sigpvc
OLD_DIRS+=usr/include/netatm/spans
OLD_DIRS+=usr/include/netatm/ipatm
OLD_DIRS+=usr/include/netatm/uni
OLD_DIRS+=usr/include/netatm
OLD_DIRS+=usr/share/examples/atm
OLD_FILES+=usr/lib32/libatm.a
OLD_FILES+=usr/lib32/libatm.so
OLD_LIBS+=usr/lib32/libatm.so.5
OLD_FILES+=usr/lib32/libatm_p.a
# 20070705: I4B headers repo-copied to include/i4b/
.if ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/include/machine/i4b_cause.h
OLD_FILES+=usr/include/machine/i4b_debug.h
OLD_FILES+=usr/include/machine/i4b_ioctl.h
OLD_FILES+=usr/include/machine/i4b_rbch_ioctl.h
OLD_FILES+=usr/include/machine/i4b_tel_ioctl.h
OLD_FILES+=usr/include/machine/i4b_trace.h
.endif
# 20070703: pf 4.1 import
OLD_FILES+=usr/libexec/ftp-proxy
# 20070701: KAME IPSec removal
OLD_FILES+=usr/include/netinet6/ah.h
OLD_FILES+=usr/include/netinet6/ah6.h
OLD_FILES+=usr/include/netinet6/ah_aesxcbcmac.h
OLD_FILES+=usr/include/netinet6/esp.h
OLD_FILES+=usr/include/netinet6/esp6.h
OLD_FILES+=usr/include/netinet6/esp_aesctr.h
OLD_FILES+=usr/include/netinet6/esp_camellia.h
OLD_FILES+=usr/include/netinet6/esp_rijndael.h
OLD_FILES+=usr/include/netinet6/ipsec.h
OLD_FILES+=usr/include/netinet6/ipsec6.h
OLD_FILES+=usr/include/netinet6/ipcomp.h
OLD_FILES+=usr/include/netinet6/ipcomp6.h
OLD_FILES+=usr/include/netkey/key.h
OLD_FILES+=usr/include/netkey/key_debug.h
OLD_FILES+=usr/include/netkey/key_var.h
OLD_FILES+=usr/include/netkey/keydb.h
OLD_FILES+=usr/include/netkey/keysock.h
OLD_DIRS+=usr/include/netkey
# 20070701: remove wicontrol
OLD_FILES+=usr/sbin/wicontrol
OLD_FILES+=usr/share/man/man8/wicontrol.8.gz
# 20070625: umapfs removal
OLD_FILES+=rescue/mount_umapfs
OLD_FILES+=sbin/mount_umapfs
OLD_FILES+=usr/include/fs/umapfs/umap.h
OLD_FILES+=usr/share/man/man8/mount_umapfs.8.gz
OLD_DIRS+=usr/include/fs/umapfs
# 20070618: Removal of the PROTO.localhost* files
OLD_FILES+=etc/namedb/PROTO.localhost-v6.rev
OLD_FILES+=etc/namedb/PROTO.localhost.rev
OLD_FILES+=etc/namedb/make-localhost
# 20070618: shared library version bump
OLD_LIBS+=lib/libalias.so.5
OLD_LIBS+=lib/libbsnmp.so.3
OLD_LIBS+=lib/libncurses.so.6
OLD_LIBS+=lib/libncursesw.so.6
OLD_LIBS+=lib/libreadline.so.6
OLD_LIBS+=usr/lib/libdialog.so.5
OLD_LIBS+=usr/lib/libgnuregex.so.3
OLD_LIBS+=usr/lib/libhistory.so.6
OLD_LIBS+=usr/lib/libpam.so.3
OLD_LIBS+=usr/lib/libssh.so.3
OLD_LIBS+=usr/lib/pam_chroot.so.3
OLD_LIBS+=usr/lib/pam_deny.so.3
OLD_LIBS+=usr/lib/pam_echo.so.3
OLD_LIBS+=usr/lib/pam_exec.so.3
OLD_LIBS+=usr/lib/pam_ftpusers.so.3
OLD_LIBS+=usr/lib/pam_group.so.3
OLD_LIBS+=usr/lib/pam_guest.so.3
OLD_LIBS+=usr/lib/pam_krb5.so.3
OLD_LIBS+=usr/lib/pam_ksu.so.3
OLD_LIBS+=usr/lib/pam_lastlog.so.3
OLD_LIBS+=usr/lib/pam_login_access.so.3
OLD_LIBS+=usr/lib/pam_nologin.so.3
OLD_LIBS+=usr/lib/pam_opie.so.3
OLD_LIBS+=usr/lib/pam_opieaccess.so.3
OLD_LIBS+=usr/lib/pam_passwdqc.so.3
OLD_LIBS+=usr/lib/pam_permit.so.3
OLD_LIBS+=usr/lib/pam_radius.so.3
OLD_LIBS+=usr/lib/pam_rhosts.so.3
OLD_LIBS+=usr/lib/pam_rootok.so.3
OLD_LIBS+=usr/lib/pam_securetty.so.3
OLD_LIBS+=usr/lib/pam_self.so.3
OLD_LIBS+=usr/lib/pam_ssh.so.3
OLD_LIBS+=usr/lib/pam_tacplus.so.3
OLD_LIBS+=usr/lib/pam_unix.so.3
OLD_LIBS+=usr/lib/snmp_atm.so.4
OLD_LIBS+=usr/lib/snmp_bridge.so.4
OLD_LIBS+=usr/lib/snmp_hostres.so.4
OLD_LIBS+=usr/lib/snmp_mibII.so.4
OLD_LIBS+=usr/lib/snmp_netgraph.so.4
OLD_LIBS+=usr/lib/snmp_pf.so.4
OLD_LIBS+=usr/lib32/libalias.so.5
OLD_LIBS+=usr/lib32/libbsnmp.so.3
OLD_LIBS+=usr/lib32/libdialog.so.5
OLD_LIBS+=usr/lib32/libgnuregex.so.3
OLD_LIBS+=usr/lib32/libhistory.so.6
OLD_LIBS+=usr/lib32/libncurses.so.6
OLD_LIBS+=usr/lib32/libncursesw.so.6
OLD_LIBS+=usr/lib32/libpam.so.3
OLD_LIBS+=usr/lib32/libreadline.so.6
OLD_LIBS+=usr/lib32/libssh.so.3
OLD_LIBS+=usr/lib32/pam_chroot.so.3
OLD_LIBS+=usr/lib32/pam_deny.so.3
OLD_LIBS+=usr/lib32/pam_echo.so.3
OLD_LIBS+=usr/lib32/pam_exec.so.3
OLD_LIBS+=usr/lib32/pam_ftpusers.so.3
OLD_LIBS+=usr/lib32/pam_group.so.3
OLD_LIBS+=usr/lib32/pam_guest.so.3
OLD_LIBS+=usr/lib32/pam_krb5.so.3
OLD_LIBS+=usr/lib32/pam_ksu.so.3
OLD_LIBS+=usr/lib32/pam_lastlog.so.3
OLD_LIBS+=usr/lib32/pam_login_access.so.3
OLD_LIBS+=usr/lib32/pam_nologin.so.3
OLD_LIBS+=usr/lib32/pam_opie.so.3
OLD_LIBS+=usr/lib32/pam_opieaccess.so.3
OLD_LIBS+=usr/lib32/pam_passwdqc.so.3
OLD_LIBS+=usr/lib32/pam_permit.so.3
OLD_LIBS+=usr/lib32/pam_radius.so.3
OLD_LIBS+=usr/lib32/pam_rhosts.so.3
OLD_LIBS+=usr/lib32/pam_rootok.so.3
OLD_LIBS+=usr/lib32/pam_securetty.so.3
OLD_LIBS+=usr/lib32/pam_self.so.3
OLD_LIBS+=usr/lib32/pam_ssh.so.3
OLD_LIBS+=usr/lib32/pam_tacplus.so.3
OLD_LIBS+=usr/lib32/pam_unix.so.3
# 20070613: IPX over IP tunnel removal
OLD_FILES+=usr/include/netipx/ipx_ip.h
# 20070605: sched_core removal
OLD_FILES+=usr/share/man/man4/sched_core.4.gz
# 20070603: BIND 9.4.1 import
OLD_LIBS+=usr/lib/liblwres.so.10
# 20070521: shared library version bump
OLD_LIBS+=lib/libatm.so.4
OLD_LIBS+=lib/libbegemot.so.2
OLD_LIBS+=lib/libbsdxml.so.2
OLD_LIBS+=lib/libcam.so.3
OLD_LIBS+=lib/libcrypt.so.3
OLD_LIBS+=lib/libdevstat.so.5
OLD_LIBS+=lib/libedit.so.5
OLD_LIBS+=lib/libgeom.so.3
OLD_LIBS+=lib/libipsec.so.2
OLD_LIBS+=lib/libipx.so.3
OLD_LIBS+=lib/libkiconv.so.2
OLD_LIBS+=lib/libkse.so.2
OLD_LIBS+=lib/libkvm.so.3
OLD_LIBS+=lib/libm.so.4
OLD_LIBS+=lib/libmd.so.3
OLD_LIBS+=lib/libpcap.so.4
OLD_LIBS+=lib/libpthread.so.2
OLD_LIBS+=lib/libsbuf.so.3
OLD_LIBS+=lib/libthr.so.2
OLD_LIBS+=lib/libufs.so.3
OLD_LIBS+=lib/libutil.so.6
OLD_LIBS+=lib/libz.so.3
OLD_LIBS+=usr/lib/libbluetooth.so.2
OLD_LIBS+=usr/lib/libbsm.so.1
OLD_LIBS+=usr/lib/libbz2.so.2
OLD_LIBS+=usr/lib/libcalendar.so.3
OLD_LIBS+=usr/lib/libcom_err.so.3
OLD_LIBS+=usr/lib/libdevinfo.so.3
OLD_LIBS+=usr/lib/libfetch.so.4
OLD_LIBS+=usr/lib/libform.so.3
OLD_LIBS+=usr/lib/libformw.so.3
OLD_LIBS+=usr/lib/libftpio.so.6
OLD_LIBS+=usr/lib/libgpib.so.1
OLD_LIBS+=usr/lib/libkse.so.2
OLD_LIBS+=usr/lib/libmagic.so.2
OLD_LIBS+=usr/lib/libmemstat.so.1
OLD_LIBS+=usr/lib/libmenu.so.3
OLD_LIBS+=usr/lib/libmenuw.so.3
OLD_LIBS+=usr/lib/libmilter.so.3
OLD_LIBS+=usr/lib/libmp.so.5
OLD_LIBS+=usr/lib/libncp.so.2
OLD_LIBS+=usr/lib/libnetgraph.so.2
OLD_LIBS+=usr/lib/libngatm.so.2
OLD_LIBS+=usr/lib/libopie.so.4
OLD_LIBS+=usr/lib/libpanel.so.3
OLD_LIBS+=usr/lib/libpanelw.so.3
OLD_LIBS+=usr/lib/libpmc.so.3
OLD_LIBS+=usr/lib/libradius.so.2
OLD_LIBS+=usr/lib/librpcsvc.so.3
OLD_LIBS+=usr/lib/libsdp.so.2
OLD_LIBS+=usr/lib/libsmb.so.2
OLD_LIBS+=usr/lib/libstdc++.so.5
OLD_LIBS+=usr/lib/libtacplus.so.2
OLD_LIBS+=usr/lib/libthr.so.2
OLD_LIBS+=usr/lib/libthread_db.so.2
OLD_LIBS+=usr/lib/libugidfw.so.2
OLD_LIBS+=usr/lib/libusbhid.so.2
OLD_LIBS+=usr/lib/libvgl.so.4
OLD_LIBS+=usr/lib/libwrap.so.4
OLD_LIBS+=usr/lib/libypclnt.so.2
OLD_LIBS+=usr/lib/snmp_bridge.so.3
OLD_LIBS+=usr/lib/snmp_hostres.so.3
OLD_LIBS+=usr/lib32/libatm.so.4
OLD_LIBS+=usr/lib32/libbegemot.so.2
OLD_LIBS+=usr/lib32/libbluetooth.so.2
OLD_LIBS+=usr/lib32/libbsdxml.so.2
OLD_LIBS+=usr/lib32/libbsm.so.1
OLD_LIBS+=usr/lib32/libbz2.so.2
OLD_LIBS+=usr/lib32/libcalendar.so.3
OLD_LIBS+=usr/lib32/libcam.so.3
OLD_LIBS+=usr/lib32/libcom_err.so.3
OLD_LIBS+=usr/lib32/libcrypt.so.3
OLD_LIBS+=usr/lib32/libdevinfo.so.3
OLD_LIBS+=usr/lib32/libdevstat.so.5
OLD_LIBS+=usr/lib32/libedit.so.5
OLD_LIBS+=usr/lib32/libfetch.so.4
OLD_LIBS+=usr/lib32/libform.so.3
OLD_LIBS+=usr/lib32/libformw.so.3
OLD_LIBS+=usr/lib32/libftpio.so.6
OLD_LIBS+=usr/lib32/libgeom.so.3
OLD_LIBS+=usr/lib32/libgpib.so.1
OLD_LIBS+=usr/lib32/libipsec.so.2
OLD_LIBS+=usr/lib32/libipx.so.3
OLD_LIBS+=usr/lib32/libkiconv.so.2
OLD_LIBS+=usr/lib32/libkse.so.2
OLD_LIBS+=usr/lib32/libkvm.so.3
OLD_LIBS+=usr/lib32/libm.so.4
OLD_LIBS+=usr/lib32/libmagic.so.2
OLD_LIBS+=usr/lib32/libmd.so.3
OLD_LIBS+=usr/lib32/libmemstat.so.1
OLD_LIBS+=usr/lib32/libmenu.so.3
OLD_LIBS+=usr/lib32/libmenuw.so.3
OLD_LIBS+=usr/lib32/libmilter.so.3
OLD_LIBS+=usr/lib32/libmp.so.5
OLD_LIBS+=usr/lib32/libncp.so.2
OLD_LIBS+=usr/lib32/libnetgraph.so.2
OLD_LIBS+=usr/lib32/libngatm.so.2
OLD_LIBS+=usr/lib32/libopie.so.4
OLD_LIBS+=usr/lib32/libpanel.so.3
OLD_LIBS+=usr/lib32/libpanelw.so.3
OLD_LIBS+=usr/lib32/libpcap.so.4
OLD_LIBS+=usr/lib32/libpmc.so.3
OLD_LIBS+=usr/lib32/libpthread.so.2
OLD_LIBS+=usr/lib32/libradius.so.2
OLD_LIBS+=usr/lib32/librpcsvc.so.3
OLD_LIBS+=usr/lib32/libsbuf.so.3
OLD_LIBS+=usr/lib32/libsdp.so.2
OLD_LIBS+=usr/lib32/libsmb.so.2
OLD_LIBS+=usr/lib32/libstdc++.so.5
OLD_LIBS+=usr/lib32/libtacplus.so.2
OLD_LIBS+=usr/lib32/libthr.so.2
OLD_LIBS+=usr/lib32/libthread_db.so.2
OLD_LIBS+=usr/lib32/libufs.so.3
OLD_LIBS+=usr/lib32/libugidfw.so.2
OLD_LIBS+=usr/lib32/libusbhid.so.2
OLD_LIBS+=usr/lib32/libutil.so.6
OLD_LIBS+=usr/lib32/libvgl.so.4
OLD_LIBS+=usr/lib32/libwrap.so.4
OLD_LIBS+=usr/lib32/libypclnt.so.2
OLD_LIBS+=usr/lib32/libz.so.3
# 20070519: GCC 4.2
OLD_FILES+=usr/bin/f77
OLD_FILES+=usr/bin/protoize
OLD_FILES+=usr/include/g2c.h
OLD_FILES+=usr/libexec/f771
OLD_FILES+=usr/share/info/g77.info.gz
OLD_FILES+=usr/share/man/man1/f77.1.gz
OLD_FILES+=usr/include/c++/3.4/algorithm
OLD_FILES+=usr/include/c++/3.4/backward/algo.h
OLD_FILES+=usr/include/c++/3.4/backward/algobase.h
OLD_FILES+=usr/include/c++/3.4/backward/alloc.h
OLD_FILES+=usr/include/c++/3.4/backward/backward_warning.h
OLD_FILES+=usr/include/c++/3.4/backward/bvector.h
OLD_FILES+=usr/include/c++/3.4/backward/complex.h
OLD_FILES+=usr/include/c++/3.4/backward/defalloc.h
OLD_FILES+=usr/include/c++/3.4/backward/deque.h
OLD_FILES+=usr/include/c++/3.4/backward/fstream.h
OLD_FILES+=usr/include/c++/3.4/backward/function.h
OLD_FILES+=usr/include/c++/3.4/backward/hash_map.h
OLD_FILES+=usr/include/c++/3.4/backward/hash_set.h
OLD_FILES+=usr/include/c++/3.4/backward/hashtable.h
OLD_FILES+=usr/include/c++/3.4/backward/heap.h
OLD_FILES+=usr/include/c++/3.4/backward/iomanip.h
OLD_FILES+=usr/include/c++/3.4/backward/iostream.h
OLD_FILES+=usr/include/c++/3.4/backward/istream.h
OLD_FILES+=usr/include/c++/3.4/backward/iterator.h
OLD_FILES+=usr/include/c++/3.4/backward/list.h
OLD_FILES+=usr/include/c++/3.4/backward/map.h
OLD_FILES+=usr/include/c++/3.4/backward/multimap.h
OLD_FILES+=usr/include/c++/3.4/backward/multiset.h
OLD_FILES+=usr/include/c++/3.4/backward/new.h
OLD_FILES+=usr/include/c++/3.4/backward/ostream.h
OLD_FILES+=usr/include/c++/3.4/backward/pair.h
OLD_FILES+=usr/include/c++/3.4/backward/queue.h
OLD_FILES+=usr/include/c++/3.4/backward/rope.h
OLD_FILES+=usr/include/c++/3.4/backward/set.h
OLD_FILES+=usr/include/c++/3.4/backward/slist.h
OLD_FILES+=usr/include/c++/3.4/backward/stack.h
OLD_FILES+=usr/include/c++/3.4/backward/stream.h
OLD_FILES+=usr/include/c++/3.4/backward/streambuf.h
OLD_FILES+=usr/include/c++/3.4/backward/strstream
OLD_FILES+=usr/include/c++/3.4/backward/tempbuf.h
OLD_FILES+=usr/include/c++/3.4/backward/tree.h
OLD_FILES+=usr/include/c++/3.4/backward/vector.h
OLD_FILES+=usr/include/c++/3.4/bits/allocator.h
OLD_FILES+=usr/include/c++/3.4/bits/atomic_word.h
OLD_FILES+=usr/include/c++/3.4/bits/atomicity.h
OLD_FILES+=usr/include/c++/3.4/bits/basic_file.h
OLD_FILES+=usr/include/c++/3.4/bits/basic_ios.h
OLD_FILES+=usr/include/c++/3.4/bits/basic_ios.tcc
OLD_FILES+=usr/include/c++/3.4/bits/basic_string.h
OLD_FILES+=usr/include/c++/3.4/bits/basic_string.tcc
OLD_FILES+=usr/include/c++/3.4/bits/boost_concept_check.h
OLD_FILES+=usr/include/c++/3.4/bits/c++allocator.h
OLD_FILES+=usr/include/c++/3.4/bits/c++config.h
OLD_FILES+=usr/include/c++/3.4/bits/c++io.h
OLD_FILES+=usr/include/c++/3.4/bits/c++locale.h
OLD_FILES+=usr/include/c++/3.4/bits/c++locale_internal.h
OLD_FILES+=usr/include/c++/3.4/bits/char_traits.h
OLD_FILES+=usr/include/c++/3.4/bits/cmath.tcc
OLD_FILES+=usr/include/c++/3.4/bits/codecvt.h
OLD_FILES+=usr/include/c++/3.4/bits/codecvt_specializations.h
OLD_FILES+=usr/include/c++/3.4/bits/concept_check.h
OLD_FILES+=usr/include/c++/3.4/bits/concurrence.h
OLD_FILES+=usr/include/c++/3.4/bits/cpp_type_traits.h
OLD_FILES+=usr/include/c++/3.4/bits/ctype_base.h
OLD_FILES+=usr/include/c++/3.4/bits/ctype_inline.h
OLD_FILES+=usr/include/c++/3.4/bits/ctype_noninline.h
OLD_FILES+=usr/include/c++/3.4/bits/deque.tcc
OLD_FILES+=usr/include/c++/3.4/bits/fstream.tcc
OLD_FILES+=usr/include/c++/3.4/bits/functexcept.h
OLD_FILES+=usr/include/c++/3.4/bits/gslice.h
OLD_FILES+=usr/include/c++/3.4/bits/gslice_array.h
OLD_FILES+=usr/include/c++/3.4/bits/gthr-default.h
OLD_FILES+=usr/include/c++/3.4/bits/gthr-posix.h
OLD_FILES+=usr/include/c++/3.4/bits/gthr-single.h
OLD_FILES+=usr/include/c++/3.4/bits/gthr.h
OLD_FILES+=usr/include/c++/3.4/bits/indirect_array.h
OLD_FILES+=usr/include/c++/3.4/bits/ios_base.h
OLD_FILES+=usr/include/c++/3.4/bits/istream.tcc
OLD_FILES+=usr/include/c++/3.4/bits/list.tcc
OLD_FILES+=usr/include/c++/3.4/bits/locale_classes.h
OLD_FILES+=usr/include/c++/3.4/bits/locale_facets.h
OLD_FILES+=usr/include/c++/3.4/bits/locale_facets.tcc
OLD_FILES+=usr/include/c++/3.4/bits/localefwd.h
OLD_FILES+=usr/include/c++/3.4/bits/mask_array.h
OLD_FILES+=usr/include/c++/3.4/bits/messages_members.h
OLD_FILES+=usr/include/c++/3.4/bits/os_defines.h
OLD_FILES+=usr/include/c++/3.4/bits/ostream.tcc
OLD_FILES+=usr/include/c++/3.4/bits/postypes.h
OLD_FILES+=usr/include/c++/3.4/bits/slice_array.h
OLD_FILES+=usr/include/c++/3.4/bits/sstream.tcc
OLD_FILES+=usr/include/c++/3.4/bits/stl_algo.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_algobase.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_bvector.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_construct.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_deque.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_function.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_heap.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_iterator.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_iterator_base_funcs.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_iterator_base_types.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_list.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_map.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_multimap.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_multiset.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_numeric.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_pair.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_queue.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_raw_storage_iter.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_relops.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_set.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_stack.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_tempbuf.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_threads.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_tree.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_uninitialized.h
OLD_FILES+=usr/include/c++/3.4/bits/stl_vector.h
OLD_FILES+=usr/include/c++/3.4/bits/stream_iterator.h
OLD_FILES+=usr/include/c++/3.4/bits/streambuf.tcc
OLD_FILES+=usr/include/c++/3.4/bits/streambuf_iterator.h
OLD_FILES+=usr/include/c++/3.4/bits/stringfwd.h
OLD_FILES+=usr/include/c++/3.4/bits/time_members.h
OLD_FILES+=usr/include/c++/3.4/bits/type_traits.h
OLD_FILES+=usr/include/c++/3.4/bits/valarray_after.h
OLD_FILES+=usr/include/c++/3.4/bits/valarray_array.h
OLD_FILES+=usr/include/c++/3.4/bits/valarray_array.tcc
OLD_FILES+=usr/include/c++/3.4/bits/valarray_before.h
OLD_FILES+=usr/include/c++/3.4/bits/vector.tcc
OLD_FILES+=usr/include/c++/3.4/bitset
OLD_FILES+=usr/include/c++/3.4/cassert
OLD_FILES+=usr/include/c++/3.4/cctype
OLD_FILES+=usr/include/c++/3.4/cerrno
OLD_FILES+=usr/include/c++/3.4/cfloat
OLD_FILES+=usr/include/c++/3.4/ciso646
OLD_FILES+=usr/include/c++/3.4/climits
OLD_FILES+=usr/include/c++/3.4/clocale
OLD_FILES+=usr/include/c++/3.4/cmath
OLD_FILES+=usr/include/c++/3.4/complex
OLD_FILES+=usr/include/c++/3.4/csetjmp
OLD_FILES+=usr/include/c++/3.4/csignal
OLD_FILES+=usr/include/c++/3.4/cstdarg
OLD_FILES+=usr/include/c++/3.4/cstddef
OLD_FILES+=usr/include/c++/3.4/cstdio
OLD_FILES+=usr/include/c++/3.4/cstdlib
OLD_FILES+=usr/include/c++/3.4/cstring
OLD_FILES+=usr/include/c++/3.4/ctime
OLD_FILES+=usr/include/c++/3.4/cwchar
OLD_FILES+=usr/include/c++/3.4/cwctype
OLD_FILES+=usr/include/c++/3.4/cxxabi.h
OLD_FILES+=usr/include/c++/3.4/debug/bitset
OLD_FILES+=usr/include/c++/3.4/debug/debug.h
OLD_FILES+=usr/include/c++/3.4/debug/deque
OLD_FILES+=usr/include/c++/3.4/debug/formatter.h
OLD_FILES+=usr/include/c++/3.4/debug/hash_map
OLD_FILES+=usr/include/c++/3.4/debug/hash_map.h
OLD_FILES+=usr/include/c++/3.4/debug/hash_multimap.h
OLD_FILES+=usr/include/c++/3.4/debug/hash_multiset.h
OLD_FILES+=usr/include/c++/3.4/debug/hash_set
OLD_FILES+=usr/include/c++/3.4/debug/hash_set.h
OLD_FILES+=usr/include/c++/3.4/debug/list
OLD_FILES+=usr/include/c++/3.4/debug/map
OLD_FILES+=usr/include/c++/3.4/debug/map.h
OLD_FILES+=usr/include/c++/3.4/debug/multimap.h
OLD_FILES+=usr/include/c++/3.4/debug/multiset.h
OLD_FILES+=usr/include/c++/3.4/debug/safe_base.h
OLD_FILES+=usr/include/c++/3.4/debug/safe_iterator.h
OLD_FILES+=usr/include/c++/3.4/debug/safe_iterator.tcc
OLD_FILES+=usr/include/c++/3.4/debug/safe_sequence.h
OLD_FILES+=usr/include/c++/3.4/debug/set
OLD_FILES+=usr/include/c++/3.4/debug/set.h
OLD_FILES+=usr/include/c++/3.4/debug/string
OLD_FILES+=usr/include/c++/3.4/debug/vector
OLD_FILES+=usr/include/c++/3.4/deque
OLD_FILES+=usr/include/c++/3.4/exception
OLD_FILES+=usr/include/c++/3.4/exception_defines.h
OLD_FILES+=usr/include/c++/3.4/ext/algorithm
OLD_FILES+=usr/include/c++/3.4/ext/bitmap_allocator.h
OLD_FILES+=usr/include/c++/3.4/ext/debug_allocator.h
OLD_FILES+=usr/include/c++/3.4/ext/enc_filebuf.h
OLD_FILES+=usr/include/c++/3.4/ext/functional
OLD_FILES+=usr/include/c++/3.4/ext/hash_fun.h
OLD_FILES+=usr/include/c++/3.4/ext/hash_map
OLD_FILES+=usr/include/c++/3.4/ext/hash_set
OLD_FILES+=usr/include/c++/3.4/ext/hashtable.h
OLD_FILES+=usr/include/c++/3.4/ext/iterator
OLD_FILES+=usr/include/c++/3.4/ext/malloc_allocator.h
OLD_FILES+=usr/include/c++/3.4/ext/memory
OLD_FILES+=usr/include/c++/3.4/ext/mt_allocator.h
OLD_FILES+=usr/include/c++/3.4/ext/new_allocator.h
OLD_FILES+=usr/include/c++/3.4/ext/numeric
OLD_FILES+=usr/include/c++/3.4/ext/pod_char_traits.h
OLD_FILES+=usr/include/c++/3.4/ext/pool_allocator.h
OLD_FILES+=usr/include/c++/3.4/ext/rb_tree
OLD_FILES+=usr/include/c++/3.4/ext/rope
OLD_FILES+=usr/include/c++/3.4/ext/ropeimpl.h
OLD_FILES+=usr/include/c++/3.4/ext/slist
OLD_FILES+=usr/include/c++/3.4/ext/stdio_filebuf.h
OLD_FILES+=usr/include/c++/3.4/ext/stdio_sync_filebuf.h
OLD_FILES+=usr/include/c++/3.4/fstream
OLD_FILES+=usr/include/c++/3.4/functional
OLD_FILES+=usr/include/c++/3.4/iomanip
OLD_FILES+=usr/include/c++/3.4/ios
OLD_FILES+=usr/include/c++/3.4/iosfwd
OLD_FILES+=usr/include/c++/3.4/iostream
OLD_FILES+=usr/include/c++/3.4/istream
OLD_FILES+=usr/include/c++/3.4/iterator
OLD_FILES+=usr/include/c++/3.4/limits
OLD_FILES+=usr/include/c++/3.4/list
OLD_FILES+=usr/include/c++/3.4/locale
OLD_FILES+=usr/include/c++/3.4/map
OLD_FILES+=usr/include/c++/3.4/memory
OLD_FILES+=usr/include/c++/3.4/new
OLD_FILES+=usr/include/c++/3.4/numeric
OLD_FILES+=usr/include/c++/3.4/ostream
OLD_FILES+=usr/include/c++/3.4/queue
OLD_FILES+=usr/include/c++/3.4/set
OLD_FILES+=usr/include/c++/3.4/sstream
OLD_FILES+=usr/include/c++/3.4/stack
OLD_FILES+=usr/include/c++/3.4/stdexcept
OLD_FILES+=usr/include/c++/3.4/streambuf
OLD_FILES+=usr/include/c++/3.4/string
OLD_FILES+=usr/include/c++/3.4/typeinfo
OLD_FILES+=usr/include/c++/3.4/utility
OLD_FILES+=usr/include/c++/3.4/valarray
OLD_FILES+=usr/include/c++/3.4/vector
OLD_DIRS+=usr/include/c++/3.4/backward
OLD_DIRS+=usr/include/c++/3.4/bits
OLD_DIRS+=usr/include/c++/3.4/debug
OLD_DIRS+=usr/include/c++/3.4/ext
OLD_DIRS+=usr/include/c++/3.4
# 20070510: zpool/zfs moved to /sbin
OLD_FILES+=usr/sbin/zfs
OLD_FILES+=usr/sbin/zpool
# 20070423: rc.bluetooth (examples) removed
OLD_FILES+=usr/share/examples/netgraph/bluetooth/rc.bluetooth
OLD_DIRS+=usr/share/examples/netgraph/bluetooth
# 20070421: worm.4 removed
OLD_FILES+=usr/share/man/man4/worm.4.gz
# 20070417: trunk(4) renamed to lagg(4)
OLD_FILES+=usr/include/net/if_trunk.h
# 20070409: uuidgen moved to /bin/
OLD_FILES+=usr/bin/uuidgen
# 20070328: bzip2 1.0.4
OLD_FILES+=usr/share/info/bzip2.info.gz
# 20070303: libarchive 2.0
OLD_LIBS+=usr/lib/libarchive.so.3
OLD_LIBS+=usr/lib32/libarchive.so.3
# 20070301: remove addr2ascii and ascii2addr
OLD_FILES+=usr/share/man/man3/addr2ascii.3.gz
OLD_FILES+=usr/share/man/man3/ascii2addr.3.gz
# 20070225: vm_page_unmanage() removed
OLD_FILES+=usr/share/man/man9/vm_page_unmanage.9.gz
# 20070216: VFS_VPTOFH(9) -> VOP_VPTOFH(9)
OLD_FILES+=usr/share/man/man9/VFS_VPTOFH.9.gz
# 20070212: kame.4 removed
OLD_FILES+=usr/share/man/man4/kame.4.gz
# 20070201: remove libmytinfo link
OLD_FILES+=usr/lib/libmytinfo.a
OLD_FILES+=usr/lib/libmytinfo.so
OLD_FILES+=usr/lib/libmytinfo_p.a
OLD_FILES+=usr/lib/libmytinfow.a
OLD_FILES+=usr/lib/libmytinfow.so
OLD_FILES+=usr/lib/libmytinfow_p.a
OLD_FILES+=usr/lib32/libmytinfo.a
OLD_FILES+=usr/lib32/libmytinfo.so
OLD_FILES+=usr/lib32/libmytinfo_p.a
OLD_FILES+=usr/lib32/libmytinfow.a
OLD_FILES+=usr/lib32/libmytinfow.so
OLD_FILES+=usr/lib32/libmytinfow_p.a
# 20070128: remove vnconfig
OLD_FILES+=usr/sbin/vnconfig
# 20070127: remove bpf_compat.h
OLD_FILES+=usr/include/net/bpf_compat.h
# 20070125: objformat bites the dust
OLD_FILES+=usr/bin/objformat
OLD_FILES+=usr/share/man/man1/objformat.1.gz
OLD_FILES+=usr/include/objformat.h
OLD_FILES+=usr/share/man/man3/getobjformat.3.gz
# 20061201: remove symlink to *.so.4 libalias modules
OLD_FILES+=usr/lib/libalias_cuseeme.so
OLD_FILES+=usr/lib/libalias_dummy.so
OLD_FILES+=usr/lib/libalias_ftp.so
OLD_FILES+=usr/lib/libalias_irc.so
OLD_FILES+=usr/lib/libalias_nbt.so
OLD_FILES+=usr/lib/libalias_pptp.so
OLD_FILES+=usr/lib/libalias_skinny.so
OLD_FILES+=usr/lib/libalias_smedia.so
# 20061201: remove old *.so.4 libalias modules
OLD_FILES+=lib/libalias_cuseeme.so.4
OLD_FILES+=lib/libalias_dummy.so.4
OLD_FILES+=lib/libalias_ftp.so.4
OLD_FILES+=lib/libalias_irc.so.4
OLD_FILES+=lib/libalias_nbt.so.4
OLD_FILES+=lib/libalias_pptp.so.4
OLD_FILES+=lib/libalias_skinny.so.4
OLD_FILES+=lib/libalias_smedia.so.4
# 20061126: remove old man page
OLD_FILES+=usr/share/man/man3/archive_read_set_bytes_per_block.3.gz
# 20061125: remove old man page
OLD_FILES+=usr/share/man/man9/devsw.9.gz
# 20061122: remove obsolete mount programs
OLD_FILES+=sbin/mount_devfs
OLD_FILES+=sbin/mount_ext2fs
OLD_FILES+=sbin/mount_fdescfs
OLD_FILES+=sbin/mount_linprocfs
OLD_FILES+=sbin/mount_procfs
OLD_FILES+=sbin/mount_std
OLD_FILES+=rescue/mount_devfs
OLD_FILES+=rescue/mount_ext2fs
OLD_FILES+=rescue/mount_fdescfs
OLD_FILES+=rescue/mount_linprocfs
OLD_FILES+=rescue/mount_procfs
OLD_FILES+=rescue/mount_std
OLD_FILES+=usr/share/man/man8/mount_devfs.8.gz
OLD_FILES+=usr/share/man/man8/mount_ext2fs.8.gz
OLD_FILES+=usr/share/man/man8/mount_fdescfs.8.gz
OLD_FILES+=usr/share/man/man8/mount_linprocfs.8.gz
OLD_FILES+=usr/share/man/man8/mount_procfs.8.gz
OLD_FILES+=usr/share/man/man8/mount_std.8.gz
# 20061116: uhidev.4 removed
OLD_FILES+=usr/share/man/man4/uhidev.4.gz
# 20061106: archive_write_prepare.3 removed
OLD_FILES+=usr/share/man/man3/archive_write_prepare.3.gz
.if ${TARGET_ARCH} == "ia64"
# 20061104: skiload.help removed
OLD_FILES+=boot/skiload.help
.endif
# 20061018: pccardc removed
OLD_FILES+=usr/sbin/pccardc usr/share/man/man8/pccardc.8.gz
# 20060930: demangle.h from contrib/libstdc++/include/ext/
OLD_FILES+=usr/include/c++/3.4/ext/demangle.h
# 20060929: mrouted removed
OLD_FILES+=usr/sbin/map-mbone
OLD_FILES+=usr/sbin/mrinfo
OLD_FILES+=usr/sbin/mrouted
OLD_FILES+=usr/sbin/mtrace
OLD_FILES+=usr/share/man/man8/map-mbone.8.gz
OLD_FILES+=usr/share/man/man8/mrinfo.8.gz
OLD_FILES+=usr/share/man/man8/mrouted.8.gz
OLD_FILES+=usr/share/man/man8/mtrace.8.gz
# 20060924: tcpslice removed
OLD_FILES+=usr/sbin/tcpslice
OLD_FILES+=usr/share/man/man1/tcpslice.1.gz
# 20060829: kvmdb cleanup script removed
OLD_FILES+=etc/periodic/weekly/120.clean-kvmdb
# 20060822: ramdisk{,-own} have been replaced by mdconfig{,2}
OLD_FILES+=etc/rc.d/ramdisk
OLD_FILES+=etc/rc.d/ramdisk-own
# 20060729: OpenSSL 0.9.7e -> 0.9.8b upgrade
OLD_FILES+=usr/include/openssl/eng_int.h
OLD_FILES+=usr/include/openssl/hw_4758_cca_err.h
OLD_FILES+=usr/include/openssl/hw_aep_err.h
OLD_FILES+=usr/include/openssl/hw_atalla_err.h
OLD_FILES+=usr/include/openssl/hw_cswift_err.h
OLD_FILES+=usr/include/openssl/hw_ncipher_err.h
OLD_FILES+=usr/include/openssl/hw_nuron_err.h
OLD_FILES+=usr/include/openssl/hw_sureware_err.h
OLD_FILES+=usr/include/openssl/hw_ubsec_err.h
# 20060713: mount_linsysfs(8) never existed in 7.x
OLD_FILES+=sbin/mount_linsysfs
OLD_FILES+=usr/share/man/man8/mount_linsysfs.8.gz
# 20060704: KAME compat file net_osdep.h removed
OLD_FILES+=usr/include/net/net_osdep.h
# 20060605: man page links removed by OpenBSM 1.0 alpha 6 import
OLD_FILES+=usr/share/man/man3/au_to_socket.3.gz
OLD_FILES+=usr/share/man/man3/au_to_socket_ex_128.3.gz
OLD_FILES+=usr/share/man/man3/au_to_socket_ex_32.3.gz
# 20060517: pcvt removed
OLD_FILES+=usr/share/pcvt/README.FIRST
OLD_FILES+=usr/share/pcvt/Etc/xmodmap-german
OLD_FILES+=usr/share/pcvt/Etc/pcvt.sh
OLD_FILES+=usr/share/pcvt/Etc/pcvt.el
OLD_FILES+=usr/share/pcvt/Etc/Terminfo
OLD_FILES+=usr/share/pcvt/Etc/Termcap
OLD_DIRS+=usr/share/pcvt/Etc
OLD_FILES+=usr/share/pcvt/Doc/NotesAndHints
OLD_FILES+=usr/share/pcvt/Doc/Keyboard.VT
OLD_FILES+=usr/share/pcvt/Doc/Keyboard.HP
OLD_FILES+=usr/share/pcvt/Doc/EscapeSequences
OLD_FILES+=usr/share/pcvt/Doc/Charsets
OLD_FILES+=usr/share/pcvt/Doc/CharGen
OLD_FILES+=usr/share/pcvt/Doc/Bibliography
OLD_FILES+=usr/share/pcvt/Doc/Acknowledgements
OLD_DIRS+=usr/share/pcvt/Doc
OLD_DIRS+=usr/share/pcvt
OLD_FILES+=usr/share/misc/pcvtfonts/vt220l.816
OLD_FILES+=usr/share/misc/pcvtfonts/vt220l.814
OLD_FILES+=usr/share/misc/pcvtfonts/vt220l.810
OLD_FILES+=usr/share/misc/pcvtfonts/vt220l.808
OLD_FILES+=usr/share/misc/pcvtfonts/vt220h.816
OLD_FILES+=usr/share/misc/pcvtfonts/vt220h.814
OLD_FILES+=usr/share/misc/pcvtfonts/vt220h.810
OLD_FILES+=usr/share/misc/pcvtfonts/vt220h.808
OLD_DIRS+=usr/share/misc/pcvtfonts
OLD_FILES+=usr/share/misc/keycap.pcvt
OLD_FILES+=usr/share/man/man8/ispcvt.8.gz
OLD_FILES+=usr/share/man/man5/keycap.5.gz
OLD_FILES+=usr/share/man/man4/vt.4.gz
OLD_FILES+=usr/share/man/man4/pcvt.4.gz
OLD_FILES+=usr/share/man/man3/kgetstr.3.gz
OLD_FILES+=usr/share/man/man3/kgetnum.3.gz
OLD_FILES+=usr/share/man/man3/kgetflag.3.gz
OLD_FILES+=usr/share/man/man3/kgetent.3.gz
OLD_FILES+=usr/share/man/man3/keycap.3.gz
OLD_FILES+=usr/share/man/man1/vt220keys.1.gz
OLD_FILES+=usr/share/man/man1/scon.1.gz
OLD_FILES+=usr/share/man/man1/loadfont.1.gz
OLD_FILES+=usr/share/man/man1/kcon.1.gz
OLD_FILES+=usr/share/man/man1/fontedit.1.gz
OLD_FILES+=usr/share/man/man1/cursor.1.gz
OLD_FILES+=usr/sbin/vt220keys
OLD_FILES+=usr/sbin/scon
OLD_FILES+=usr/sbin/loadfont
OLD_FILES+=usr/sbin/kcon
OLD_FILES+=usr/sbin/ispcvt
OLD_FILES+=usr/sbin/fontedit
OLD_FILES+=usr/sbin/cursor
OLD_FILES+=usr/lib/libkeycap_p.a
OLD_FILES+=usr/lib/libkeycap.a
OLD_FILES+=usr/include/machine/pcvt_ioctl.h
# 20060514: lnc(4) replaced by le(4)
OLD_FILES+=usr/share/man/man4/i386/lnc.4.gz
# 20060512: remove ip6fw
OLD_FILES+=etc/periodic/security/600.ip6fwdenied
OLD_FILES+=etc/periodic/security/650.ip6fwlimit
OLD_FILES+=sbin/ip6fw
OLD_FILES+=usr/include/netinet6/ip6_fw.h
OLD_FILES+=usr/share/man/man8/ip6fw.8.gz
# 20060424: sab(4) removed
OLD_FILES+=usr/share/man/man4/sab.4.gz
# 20060328: remove redundant rc.d script
OLD_FILES+=etc/rc.d/ike
# 20060127: revert libdisk to static-only
OLD_FILES+=usr/lib/libdisk.so
# 20060115: sys/pccard includes cleanup
OLD_FILES+=usr/include/pccard/driver.h
OLD_FILES+=usr/include/pccard/i82365.h
OLD_FILES+=usr/include/pccard/meciareg.h
OLD_FILES+=usr/include/pccard/pccard_nbk.h
OLD_FILES+=usr/include/pccard/pcic_pci.h
OLD_FILES+=usr/include/pccard/pcicvar.h
OLD_FILES+=usr/include/pccard/slot.h
# 20051215: rescue/nextboot.sh renamed to rescue/nextboot
OLD_FILES+=rescue/nextboot.sh
# 20051214: usbd(8) removed
OLD_FILES+=etc/rc.d/usbd
OLD_FILES+=etc/usbd.conf
OLD_FILES+=usr/sbin/usbd
OLD_FILES+=usr/share/man/man8/usbd.8.gz
# 20051029: rc.d/ppp-user renamed to rc.d/ppp for convenience
OLD_FILES+=etc/rc.d/ppp-user
# 20051012: setkey(8) moved to /sbin/
OLD_FILES+=usr/sbin/setkey
# 20050930: pccardd(8) removed
OLD_FILES+=usr/sbin/pccardd
OLD_FILES+=usr/share/man/man5/pccard.conf.5.gz
OLD_FILES+=usr/share/man/man8/pccardd.8.gz
# 20050927: bridge(4) replaced by if_bridge(4)
OLD_FILES+=usr/include/net/bridge.h
# 20050831: not implemented
OLD_FILES+=usr/share/man/man3/getino.3.gz
OLD_FILES+=usr/share/man/man3/putino.3.gz
# 20050825: T/TCP retired several months ago
OLD_FILES+=usr/share/man/man4/ttcp.4.gz
# 20050805 tn3270 retired long ago
OLD_FILES+=usr/share/misc/map3270
# 20050801: too old to be interesting here
OLD_FILES+=usr/share/doc/papers/px.ps.gz
# 20050721: moved to ports
OLD_FILES+=usr/sbin/vttest
OLD_FILES+=usr/share/man/man1/vttest.1.gz
# 20050617: wpa man pages moved to section 8
OLD_FILES+=usr/share/man/man1/hostapd.1.gz
OLD_FILES+=usr/share/man/man1/hostapd_cli.1.gz
OLD_FILES+=usr/share/man/man1/wpa_cli.1.gz
OLD_FILES+=usr/share/man/man1/wpa_supplicant.1.gz
# 20050610: rexecd (insecure by design)
OLD_FILES+=etc/pam.d/rexecd
OLD_FILES+=usr/share/man/man8/rexecd.8.gz
OLD_FILES+=usr/libexec/rexecd
# 20050606: OpenBSD dhclient replaces ISC one
OLD_FILES+=bin/omshell
OLD_FILES+=sbin/omshell
OLD_FILES+=usr/share/man/man1/omshell.1.gz
OLD_FILES+=usr/share/man/man5/dhcp-eval.5.gz
# 200504XX: ipf tools moved from /usr to /
OLD_FILES+=rescue/ipfs
OLD_FILES+=rescue/ipfstat
OLD_FILES+=rescue/ipmon
OLD_FILES+=rescue/ipnat
OLD_FILES+=usr/sbin/ipftest
OLD_FILES+=usr/sbin/ipresend
OLD_FILES+=usr/sbin/ipsend
OLD_FILES+=usr/sbin/iptest
OLD_FILES+=usr/share/man/man1/ipnat.1.gz
OLD_FILES+=usr/share/man/man1/ipsend.1.gz
OLD_FILES+=usr/share/man/man1/iptest.1.gz
OLD_FILES+=usr/share/man/man5/ipsend.5.gz
# 200503XX: bsdtar takes over gtar
OLD_FILES+=usr/bin/gtar
OLD_FILES+=usr/share/man/man1/gtar.1.gz
# 200503XX
OLD_FILES+=usr/share/man/man3/exp10.3.gz
OLD_FILES+=usr/share/man/man3/exp10f.3.gz
OLD_FILES+=usr/share/man/man3/fpsetsticky.3.gz
# 20050324: updated release infrastructure
OLD_FILES+=usr/share/man/man5/drivers.conf.5.gz
# 20050317: removed from BIND 9 distribution
OLD_FILES+=usr/share/doc/bind9/KNOWN_DEFECTS
# 2005XXXX:
OLD_FILES+=sbin/mount_autofs
OLD_FILES+=usr/lib/libautofs.a
OLD_FILES+=usr/lib/libautofs.so
OLD_FILES+=usr/share/man/man8/mount_autofs.8.gz
# 20050203: Merged with fortunes
OLD_FILES+=usr/share/games/fortune/fortunes2
OLD_FILES+=usr/share/games/fortune/fortunes2.dat
# 200501XX:
OLD_FILES+=usr/libexec/getNAME
# 200411XX: gvinum replaces vinum
OLD_FILES+=bin/vinum
OLD_FILES+=rescue/vinum
OLD_FILES+=sbin/vinum
OLD_FILES+=usr/share/man/man8/vinum.8.gz
# 200411XX: libxpg4 removal
OLD_FILES+=usr/lib/libxpg4.a
OLD_FILES+=usr/lib/libxpg4.so
OLD_FILES+=usr/lib/libxpg4_p.a
# 20041109: replaced by em(4)
OLD_FILES+=usr/share/man/man4/gx.4.gz
OLD_FILES+=usr/share/man/man4/if_gx.4.gz
# 20041017: rune interface removed
OLD_FILES+=usr/include/rune.h
OLD_FILES+=usr/share/man/man3/fgetrune.3.gz
OLD_FILES+=usr/share/man/man3/fputrune.3.gz
OLD_FILES+=usr/share/man/man3/fungetrune.3.gz
OLD_FILES+=usr/share/man/man3/mbrrune.3.gz
OLD_FILES+=usr/share/man/man3/mbrune.3.gz
OLD_FILES+=usr/share/man/man3/rune.3.gz
OLD_FILES+=usr/share/man/man3/setinvalidrune.3.gz
OLD_FILES+=usr/share/man/man3/sgetrune.3.gz
OLD_FILES+=usr/share/man/man3/sputrune.3.gz
# 20040925: bind9 import
OLD_FILES+=usr/bin/dnskeygen
OLD_FILES+=usr/bin/dnsquery
OLD_FILES+=usr/lib/libisc.a
OLD_FILES+=usr/lib/libisc.so
OLD_FILES+=usr/lib/libisc_p.a
OLD_FILES+=usr/libexec/named-xfer
OLD_FILES+=usr/sbin/named.restart
OLD_FILES+=usr/sbin/ndc
OLD_FILES+=usr/sbin/nslookup
OLD_FILES+=usr/sbin/nsupdate
OLD_FILES+=usr/share/doc/bind/html/acl.html
OLD_FILES+=usr/share/doc/bind/html/address_list.html
OLD_FILES+=usr/share/doc/bind/html/comments.html
OLD_FILES+=usr/share/doc/bind/html/config.html
OLD_FILES+=usr/share/doc/bind/html/controls.html
OLD_FILES+=usr/share/doc/bind/html/docdef.html
OLD_FILES+=usr/share/doc/bind/html/example.html
OLD_FILES+=usr/share/doc/bind/html/include.html
OLD_FILES+=usr/share/doc/bind/html/index.html
OLD_FILES+=usr/share/doc/bind/html/key.html
OLD_FILES+=usr/share/doc/bind/html/logging.html
OLD_FILES+=usr/share/doc/bind/html/master.html
OLD_FILES+=usr/share/doc/bind/html/options.html
OLD_FILES+=usr/share/doc/bind/html/server.html
OLD_FILES+=usr/share/doc/bind/html/trusted-keys.html
OLD_FILES+=usr/share/doc/bind/html/zone.html
OLD_FILES+=usr/share/doc/bind/misc/DynamicUpdate
OLD_FILES+=usr/share/doc/bind/misc/FAQ.1of2
OLD_FILES+=usr/share/doc/bind/misc/FAQ.2of2
OLD_FILES+=usr/share/doc/bind/misc/rfc2317-notes.txt
OLD_FILES+=usr/share/doc/bind/misc/style.txt
OLD_FILES+=usr/share/man/man1/dnskeygen.1.gz
OLD_FILES+=usr/share/man/man1/dnsquery.1.gz
OLD_FILES+=usr/share/man/man8/named-bootconf.8.gz
OLD_FILES+=usr/share/man/man8/named-xfer.8.gz
OLD_FILES+=usr/share/man/man8/named.restart.8.gz
OLD_FILES+=usr/share/man/man8/ndc.8.gz
OLD_FILES+=usr/share/man/man8/nslookup.8.gz
# 200409XX
OLD_FILES+=usr/share/man/man3/ENSURE.3.gz
OLD_FILES+=usr/share/man/man3/ENSURE_ERR.3.gz
OLD_FILES+=usr/share/man/man3/INSIST.3.gz
OLD_FILES+=usr/share/man/man3/INSIST_ERR.3.gz
OLD_FILES+=usr/share/man/man3/INVARIANT.3.gz
OLD_FILES+=usr/share/man/man3/INVARIANT_ERR.3.gz
OLD_FILES+=usr/share/man/man3/REQUIRE.3.gz
OLD_FILES+=usr/share/man/man3/REQUIRE_ERR.3.gz
OLD_FILES+=usr/share/man/man3/assertion_type_to_text.3.gz
OLD_FILES+=usr/share/man/man3/assertions.3.gz
OLD_FILES+=usr/share/man/man3/bitncmp.3.gz
OLD_FILES+=usr/share/man/man3/evAddTime.3.gz
OLD_FILES+=usr/share/man/man3/evCancelConn.3.gz
OLD_FILES+=usr/share/man/man3/evCancelRW.3.gz
OLD_FILES+=usr/share/man/man3/evClearIdleTimer.3.gz
OLD_FILES+=usr/share/man/man3/evClearTimer.3.gz
OLD_FILES+=usr/share/man/man3/evCmpTime.3.gz
OLD_FILES+=usr/share/man/man3/evConnFunc.3.gz
OLD_FILES+=usr/share/man/man3/evConnect.3.gz
OLD_FILES+=usr/share/man/man3/evConsIovec.3.gz
OLD_FILES+=usr/share/man/man3/evConsTime.3.gz
OLD_FILES+=usr/share/man/man3/evCreate.3.gz
OLD_FILES+=usr/share/man/man3/evDefer.3.gz
OLD_FILES+=usr/share/man/man3/evDeselectFD.3.gz
OLD_FILES+=usr/share/man/man3/evDestroy.3.gz
OLD_FILES+=usr/share/man/man3/evDispatch.3.gz
OLD_FILES+=usr/share/man/man3/evDo.3.gz
OLD_FILES+=usr/share/man/man3/evDrop.3.gz
OLD_FILES+=usr/share/man/man3/evFileFunc.3.gz
OLD_FILES+=usr/share/man/man3/evGetNext.3.gz
OLD_FILES+=usr/share/man/man3/evHold.3.gz
OLD_FILES+=usr/share/man/man3/evInitID.3.gz
OLD_FILES+=usr/share/man/man3/evLastEventTime.3.gz
OLD_FILES+=usr/share/man/man3/evListen.3.gz
OLD_FILES+=usr/share/man/man3/evMainLoop.3.gz
OLD_FILES+=usr/share/man/man3/evNowTime.3.gz
OLD_FILES+=usr/share/man/man3/evPrintf.3.gz
OLD_FILES+=usr/share/man/man3/evRead.3.gz
OLD_FILES+=usr/share/man/man3/evResetTimer.3.gz
OLD_FILES+=usr/share/man/man3/evSelectFD.3.gz
OLD_FILES+=usr/share/man/man3/evSetDebug.3.gz
OLD_FILES+=usr/share/man/man3/evSetIdleTimer.3.gz
OLD_FILES+=usr/share/man/man3/evSetTimer.3.gz
OLD_FILES+=usr/share/man/man3/evStreamFunc.3.gz
OLD_FILES+=usr/share/man/man3/evSubTime.3.gz
OLD_FILES+=usr/share/man/man3/evTestID.3.gz
OLD_FILES+=usr/share/man/man3/evTimeRW.3.gz
OLD_FILES+=usr/share/man/man3/evTimeSpec.3.gz
OLD_FILES+=usr/share/man/man3/evTimeVal.3.gz
OLD_FILES+=usr/share/man/man3/evTimerFunc.3.gz
OLD_FILES+=usr/share/man/man3/evTouchIdleTimer.3.gz
OLD_FILES+=usr/share/man/man3/evTryAccept.3.gz
OLD_FILES+=usr/share/man/man3/evUnhold.3.gz
OLD_FILES+=usr/share/man/man3/evUntimeRW.3.gz
OLD_FILES+=usr/share/man/man3/evUnwait.3.gz
OLD_FILES+=usr/share/man/man3/evWaitFor.3.gz
OLD_FILES+=usr/share/man/man3/evWaitFunc.3.gz
OLD_FILES+=usr/share/man/man3/evWrite.3.gz
OLD_FILES+=usr/share/man/man3/eventlib.3.gz
OLD_FILES+=usr/share/man/man3/heap.3.gz
OLD_FILES+=usr/share/man/man3/heap_decreased.3.gz
OLD_FILES+=usr/share/man/man3/heap_delete.3.gz
OLD_FILES+=usr/share/man/man3/heap_element.3.gz
OLD_FILES+=usr/share/man/man3/heap_for_each.3.gz
OLD_FILES+=usr/share/man/man3/heap_free.3.gz
OLD_FILES+=usr/share/man/man3/heap_increased.3.gz
OLD_FILES+=usr/share/man/man3/heap_insert.3.gz
OLD_FILES+=usr/share/man/man3/heap_new.3.gz
OLD_FILES+=usr/share/man/man3/log_add_channel.3.gz
OLD_FILES+=usr/share/man/man3/log_category_is_active.3.gz
OLD_FILES+=usr/share/man/man3/log_close_stream.3.gz
OLD_FILES+=usr/share/man/man3/log_dec_references.3.gz
OLD_FILES+=usr/share/man/man3/log_free_channel.3.gz
OLD_FILES+=usr/share/man/man3/log_free_context.3.gz
OLD_FILES+=usr/share/man/man3/log_get_filename.3.gz
OLD_FILES+=usr/share/man/man3/log_get_stream.3.gz
OLD_FILES+=usr/share/man/man3/log_inc_references.3.gz
OLD_FILES+=usr/share/man/man3/log_new_context.3.gz
OLD_FILES+=usr/share/man/man3/log_new_file_channel.3.gz
OLD_FILES+=usr/share/man/man3/log_new_null_channel.3.gz
OLD_FILES+=usr/share/man/man3/log_new_syslog_channel.3.gz
OLD_FILES+=usr/share/man/man3/log_open_stream.3.gz
OLD_FILES+=usr/share/man/man3/log_option.3.gz
OLD_FILES+=usr/share/man/man3/log_remove_channel.3.gz
OLD_FILES+=usr/share/man/man3/log_set_file_owner.3.gz
OLD_FILES+=usr/share/man/man3/log_vwrite.3.gz
OLD_FILES+=usr/share/man/man3/log_write.3.gz
OLD_FILES+=usr/share/man/man3/logging.3.gz
OLD_FILES+=usr/share/man/man3/memcluster.3.gz
OLD_FILES+=usr/share/man/man3/memget.3.gz
OLD_FILES+=usr/share/man/man3/memput.3.gz
OLD_FILES+=usr/share/man/man3/memstats.3.gz
OLD_FILES+=usr/share/man/man3/set_assertion_failure_callback.3.
OLD_FILES+=usr/share/man/man3/sigwait.3.gz
OLD_FILES+=usr/share/man/man3/tree_add.3.gz
OLD_FILES+=usr/share/man/man3/tree_delete.3.gz
OLD_FILES+=usr/share/man/man3/tree_init.3.gz
OLD_FILES+=usr/share/man/man3/tree_mung.3.gz
OLD_FILES+=usr/share/man/man3/tree_srch.3.gz
OLD_FILES+=usr/share/man/man3/tree_trav.3.gz
# 2004XXYY: OS internal libs, no ports use them, no need to use OLD_LIBS
OLD_FILES+=lib/geom/geom_concat.so.1
OLD_FILES+=lib/geom/geom_label.so.1
OLD_FILES+=lib/geom/geom_nop.so.1
OLD_FILES+=lib/geom/geom_stripe.so.1
# 20040713: fla(4) removed.
OLD_FILES+=usr/share/man/man4/fla.4.gz
# 200407XX
OLD_FILES+=usr/sbin/kernbb
OLD_FILES+=usr/sbin/ntp-genkeys
OLD_FILES+=usr/sbin/ntptimeset
OLD_FILES+=usr/share/man/man8/kernbb.8.gz
OLD_FILES+=usr/share/man/man8/ntp-genkeys.8.gz
# 20040627: usbdevs.h and usbdevs_data.h removal
OLD_FILES+=usr/include/dev/usb/usbdevs.h
OLD_FILES+=usr/include/dev/usb/usbdevs_data.h
# 200406XX
OLD_FILES+=usr/bin/gasp
OLD_FILES+=usr/bin/gdbreplay
OLD_FILES+=usr/share/man/man1/gasp.1.gz
OLD_FILES+=sbin/mountd
OLD_FILES+=sbin/mount_fdesc
OLD_FILES+=sbin/mount_umap
OLD_FILES+=sbin/mount_union
OLD_FILES+=sbin/mount_msdos
OLD_FILES+=sbin/mount_null
OLD_FILES+=sbin/mount_kernfs
# 200405XX: arl
OLD_FILES+=usr/sbin/arlconfig
OLD_FILES+=usr/share/man/man8/arlconfig.8.gz
# 200403XX
OLD_FILES+=bin/raidctl
OLD_FILES+=sbin/raidctl
OLD_FILES+=usr/bin/sasc
OLD_FILES+=usr/sbin/sgsc
OLD_FILES+=usr/sbin/stlload
OLD_FILES+=usr/sbin/stlstats
OLD_FILES+=usr/share/man/man1/sasc.1.gz
OLD_FILES+=usr/share/man/man1/sgsc.1.gz
OLD_FILES+=usr/share/man/man4/i386/stl.4.gz
OLD_FILES+=usr/share/man/man8/raidctl.8.gz
# 20040229: clean_environment() was removed after 3 days
OLD_FILES+=usr/share/man/man3/clean_environment.3.gz
# 20040119: installed as `isdntel' in newer systems
OLD_FILES+=etc/isdn/isdntel.sh
# 200XYYZZ: /lib transition clitches
OLD_FILES+=lib/libalias.so
OLD_FILES+=lib/libatm.so
OLD_FILES+=lib/libbsdxml.so
OLD_FILES+=lib/libc.so
OLD_FILES+=lib/libcam.so
OLD_FILES+=lib/libcrypt.so
OLD_FILES+=lib/libcrypto.so
OLD_FILES+=lib/libdevstat.so
OLD_FILES+=lib/libedit.so
OLD_FILES+=lib/libgeom.so
OLD_FILES+=lib/libipsec.so
OLD_FILES+=lib/libipx.so
OLD_FILES+=lib/libkvm.so
OLD_FILES+=lib/libm.so
OLD_FILES+=lib/libmd.so
OLD_FILES+=lib/libncurses.so
OLD_FILES+=lib/libreadline.so
OLD_FILES+=lib/libsbuf.so
OLD_FILES+=lib/libufs.so
OLD_FILES+=lib/libz.so
# 200312XX
OLD_FILES+=bin/cxconfig
OLD_FILES+=sbin/cxconfig
OLD_FILES+=usr/share/man/man8/cxconfig.8.gz
# 20031016: MULTI_DRIVER_MODULE macro removed
OLD_FILES+=usr/share/man/man9/MULTI_DRIVER_MODULE.9.gz
# 200309XX
OLD_FILES+=usr/bin/symorder
OLD_FILES+=usr/share/man/man1/symorder.1.gz
# 200308XX
OLD_FILES+=usr/sbin/amldb
OLD_FILES+=usr/share/man/man8/amldb.8.gz
# 200307XX
OLD_FILES+=sbin/mount_nwfs
OLD_FILES+=sbin/mount_portalfs
OLD_FILES+=sbin/mount_smbfs
# 200306XX
OLD_FILES+=usr/sbin/dev_mkdb
OLD_FILES+=usr/share/man/man8/dev_mkdb.8.gz
# 200304XX
OLD_FILES+=usr/lib/libcipher.a
OLD_FILES+=usr/lib/libcipher.so
OLD_FILES+=usr/lib/libcipher_p.a
OLD_FILES+=usr/lib/libgmp.a
OLD_FILES+=usr/lib/libgmp.so
OLD_FILES+=usr/lib/libgmp_p.a
OLD_FILES+=usr/lib/libperl.a
OLD_FILES+=usr/lib/libperl.so
OLD_FILES+=usr/lib/libperl_p.a
OLD_FILES+=usr/lib/libposix1e.a
OLD_FILES+=usr/lib/libposix1e.so
OLD_FILES+=usr/lib/libposix1e_p.a
OLD_FILES+=usr/lib/libskey.a
OLD_FILES+=usr/lib/libskey.so
OLD_FILES+=usr/lib/libskey_p.a
OLD_FILES+=usr/libexec/tradcpp0
OLD_FILES+=usr/libexec/cpp0
# 200304XX: removal of xten
OLD_FILES+=usr/libexec/xtend
OLD_FILES+=usr/sbin/xten
OLD_FILES+=usr/share/man/man1/xten.1.gz
OLD_FILES+=usr/share/man/man8/xtend.8.gz
# 200303XX
OLD_FILES+=usr/lib/libacl.so
OLD_FILES+=usr/lib/libdescrypt.so
OLD_FILES+=usr/lib/libf2c.so
OLD_FILES+=usr/lib/libg++.so
OLD_FILES+=usr/lib/libkdb.so
OLD_FILES+=usr/lib/librsaINTL.so
OLD_FILES+=usr/lib/libscrypt.so
OLD_FILES+=usr/lib/libss.so
# 200302XX
OLD_FILES+=usr/lib/libacl.a
OLD_FILES+=usr/lib/libacl_p.a
OLD_FILES+=usr/lib/libkadm.a
OLD_FILES+=usr/lib/libkadm.so
OLD_FILES+=usr/lib/libkadm_p.a
OLD_FILES+=usr/lib/libkafs.a
OLD_FILES+=usr/lib/libkafs.so
OLD_FILES+=usr/lib/libkafs_p.a
OLD_FILES+=usr/lib/libkdb.a
OLD_FILES+=usr/lib/libkdb_p.a
OLD_FILES+=usr/lib/libkrb.a
OLD_FILES+=usr/lib/libkrb.so
OLD_FILES+=usr/lib/libkrb_p.a
OLD_FILES+=usr/share/man/man3/SSL_CIPHER_get_name.3.gz
OLD_FILES+=usr/share/man/man3/SSL_COMP_add_compression_method.3
OLD_FILES+=usr/share/man/man3/SSL_CTX_add_extra_chain_cert.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_add_session.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_ctrl.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_flush_sessions.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_free.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_get_verify_mode.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_load_verify_locations.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_new.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_sess_number.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_sess_set_cache_size.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_sess_set_get_cb.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_sessions.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_cert_store.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_cert_verify_callback.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_cipher_list.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_client_CA_list.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_client_cert_cb.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_default_passwd_cb.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_generate_session_id.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_info_callback.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_max_cert_list.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_mode.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_msg_callback.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_options.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_quiet_shutdown.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_session_cache_mode.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_session_id_context.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_ssl_version.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_timeout.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_tmp_dh_callback.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_tmp_rsa_callback.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_set_verify.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_use_certificate.3.gz
OLD_FILES+=usr/share/man/man3/SSL_SESSION_free.3.gz
OLD_FILES+=usr/share/man/man3/SSL_SESSION_get_ex_new_index.3.gz
OLD_FILES+=usr/share/man/man3/SSL_SESSION_get_time.3.gz
OLD_FILES+=usr/share/man/man3/SSL_accept.3.gz
OLD_FILES+=usr/share/man/man3/SSL_alert_type_string.3.gz
OLD_FILES+=usr/share/man/man3/SSL_clear.3.gz
OLD_FILES+=usr/share/man/man3/SSL_connect.3.gz
OLD_FILES+=usr/share/man/man3/SSL_do_handshake.3.gz
OLD_FILES+=usr/share/man/man3/SSL_free.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_SSL_CTX.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_ciphers.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_client_CA_list.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_current_cipher.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_default_timeout.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_error.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_ex_data_X509_STORE_CTX_idx.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_ex_new_index.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_fd.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_peer_cert_chain.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_peer_certificate.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_rbio.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_session.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_verify_result.3.gz
OLD_FILES+=usr/share/man/man3/SSL_get_version.3.gz
OLD_FILES+=usr/share/man/man3/SSL_library_init.3.gz
OLD_FILES+=usr/share/man/man3/SSL_load_client_CA_file.3.gz
OLD_FILES+=usr/share/man/man3/SSL_new.3.gz
OLD_FILES+=usr/share/man/man3/SSL_pending.3.gz
OLD_FILES+=usr/share/man/man3/SSL_read.3.gz
OLD_FILES+=usr/share/man/man3/SSL_rstate_string.3.gz
OLD_FILES+=usr/share/man/man3/SSL_session_reused.3.gz
OLD_FILES+=usr/share/man/man3/SSL_set_bio.3.gz
OLD_FILES+=usr/share/man/man3/SSL_set_connect_state.3.gz
OLD_FILES+=usr/share/man/man3/SSL_set_fd.3.gz
OLD_FILES+=usr/share/man/man3/SSL_set_session.3.gz
OLD_FILES+=usr/share/man/man3/SSL_set_shutdown.3.gz
OLD_FILES+=usr/share/man/man3/SSL_set_verify_result.3.gz
OLD_FILES+=usr/share/man/man3/SSL_shutdown.3.gz
OLD_FILES+=usr/share/man/man3/SSL_state_string.3.gz
OLD_FILES+=usr/share/man/man3/SSL_want.3.gz
OLD_FILES+=usr/share/man/man3/SSL_write.3.gz
OLD_FILES+=usr/share/man/man3/d2i_SSL_SESSION.3.gz
# 200301XX
OLD_FILES+=usr/share/man/man3/des_3cbc_encrypt.3.gz
OLD_FILES+=usr/share/man/man3/des_3ecb_encrypt.3.gz
OLD_FILES+=usr/share/man/man3/des_cbc_cksum.3.gz
OLD_FILES+=usr/share/man/man3/des_cbc_encrypt.3.gz
OLD_FILES+=usr/share/man/man3/des_cfb_encrypt.3.gz
OLD_FILES+=usr/share/man/man3/des_ecb_encrypt.3.gz
OLD_FILES+=usr/share/man/man3/des_enc_read.3.gz
OLD_FILES+=usr/share/man/man3/des_enc_write.3.gz
OLD_FILES+=usr/share/man/man3/des_is_weak_key.3.gz
OLD_FILES+=usr/share/man/man3/des_key_sched.3.gz
OLD_FILES+=usr/share/man/man3/des_ofb_encrypt.3.gz
OLD_FILES+=usr/share/man/man3/des_pcbc_encrypt.3.gz
OLD_FILES+=usr/share/man/man3/des_quad_cksum.3.gz
OLD_FILES+=usr/share/man/man3/des_random_key.3.gz
OLD_FILES+=usr/share/man/man3/des_read_2password.3.gz
OLD_FILES+=usr/share/man/man3/des_read_password.3.gz
OLD_FILES+=usr/share/man/man3/des_read_pw_string.3.gz
OLD_FILES+=usr/share/man/man3/des_set_key.3.gz
OLD_FILES+=usr/share/man/man3/des_set_odd_parity.3.gz
OLD_FILES+=usr/share/man/man3/des_string_to_2key.3.gz
OLD_FILES+=usr/share/man/man3/des_string_to_key.3.gz
# 200212XX
OLD_FILES+=usr/sbin/kenv
OLD_FILES+=usr/bin/kenv
OLD_FILES+=usr/sbin/elf2aout
# 200210XX
OLD_FILES+=usr/include/libusbhid.h
OLD_FILES+=usr/share/man/man3/All_FreeBSD.3.gz
OLD_FILES+=usr/share/man/man3/CheckRules.3.gz
OLD_FILES+=usr/share/man/man3/ChunkCanBeRoot.3.gz
OLD_FILES+=usr/share/man/man3/Clone_Disk.3.gz
OLD_FILES+=usr/share/man/man3/Collapse_Chunk.3.gz
OLD_FILES+=usr/share/man/man3/Collapse_Disk.3.gz
OLD_FILES+=usr/share/man/man3/Create_Chunk.3.gz
OLD_FILES+=usr/share/man/man3/Create_Chunk_DWIM.3.gz
OLD_FILES+=usr/share/man/man3/Cyl_Aligned.3.gz
OLD_FILES+=usr/share/man/man3/Debug_Disk.3.gz
OLD_FILES+=usr/share/man/man3/Delete_Chunk.3.gz
OLD_FILES+=usr/share/man/man3/Disk_Names.3.gz
OLD_FILES+=usr/share/man/man3/Free_Disk.3.gz
OLD_FILES+=usr/share/man/man3/MakeDev.3.gz
OLD_FILES+=usr/share/man/man3/MakeDevDisk.3.gz
OLD_FILES+=usr/share/man/man3/Next_Cyl_Aligned.3.gz
OLD_FILES+=usr/share/man/man3/Next_Track_Aligned.3.gz
OLD_FILES+=usr/share/man/man3/Open_Disk.3.gz
OLD_FILES+=usr/share/man/man3/Prev_Cyl_Aligned.3.gz
OLD_FILES+=usr/share/man/man3/Prev_Track_Aligned.3.gz
OLD_FILES+=usr/share/man/man3/Set_Bios_Geom.3.gz
OLD_FILES+=usr/share/man/man3/Set_Boot_Blocks.3.gz
OLD_FILES+=usr/share/man/man3/Set_Boot_Mgr.3.gz
OLD_FILES+=usr/share/man/man3/ShowChunkFlags.3.gz
OLD_FILES+=usr/share/man/man3/Track_Aligned.3.gz
OLD_FILES+=usr/share/man/man3/Write_Disk.3.gz
OLD_FILES+=usr/share/man/man3/slice_type_name.3.gz
# 200210XX: most games moved to ports
OLD_FILES+=usr/share/man/man6/adventure.6.gz
OLD_FILES+=usr/share/man/man6/arithmetic.6.gz
OLD_FILES+=usr/share/man/man6/atc.6.gz
OLD_FILES+=usr/share/man/man6/backgammon.6.gz
OLD_FILES+=usr/share/man/man6/battlestar.6.gz
OLD_FILES+=usr/share/man/man6/bs.6.gz
OLD_FILES+=usr/share/man/man6/canfield.6.gz
OLD_FILES+=usr/share/man/man6/cfscores.6.gz
OLD_FILES+=usr/share/man/man6/cribbage.6.gz
OLD_FILES+=usr/share/man/man6/fish.6.gz
OLD_FILES+=usr/share/man/man6/hack.6.gz
OLD_FILES+=usr/share/man/man6/hangman.6.gz
OLD_FILES+=usr/share/man/man6/larn.6.gz
OLD_FILES+=usr/share/man/man6/mille.6.gz
OLD_FILES+=usr/share/man/man6/phantasia.6.gz
OLD_FILES+=usr/share/man/man6/piano.6.gz
OLD_FILES+=usr/share/man/man6/pig.6.gz
OLD_FILES+=usr/share/man/man6/quiz.6.gz
OLD_FILES+=usr/share/man/man6/rain.6.gz
OLD_FILES+=usr/share/man/man6/robots.6.gz
OLD_FILES+=usr/share/man/man6/rogue.6.gz
OLD_FILES+=usr/share/man/man6/sail.6.gz
OLD_FILES+=usr/share/man/man6/snake.6.gz
OLD_FILES+=usr/share/man/man6/snscore.6.gz
OLD_FILES+=usr/share/man/man6/trek.6.gz
OLD_FILES+=usr/share/man/man6/wargames.6.gz
OLD_FILES+=usr/share/man/man6/worm.6.gz
OLD_FILES+=usr/share/man/man6/worms.6.gz
OLD_FILES+=usr/share/man/man6/wump.6.gz
# 200207XX
OLD_FILES+=usr/share/man/man1aout/ar.1aout.gz
OLD_FILES+=usr/share/man/man1aout/as.1aout.gz
OLD_FILES+=usr/share/man/man1aout/ld.1aout.gz
OLD_FILES+=usr/share/man/man1aout/nm.1aout.gz
OLD_FILES+=usr/share/man/man1aout/ranlib.1aout.gz
OLD_FILES+=usr/share/man/man1aout/size.1aout.gz
OLD_FILES+=usr/share/man/man1aout/strings.1aout.gz
OLD_FILES+=usr/share/man/man1aout/strip.1aout.gz
OLD_FILES+=bin/mountd
OLD_FILES+=bin/nfsd
# 20020707 sbin/nfsd -> usr.sbin/nfsd
OLD_FILES+=sbin/nfsd
# 200206XX
OLD_FILES+=usr/lib/libpam_ssh.a
OLD_FILES+=usr/lib/libpam_ssh_p.a
OLD_FILES+=usr/bin/help
OLD_FILES+=usr/bin/sccs
.if ${TARGET_ARCH} != "amd64" && ${TARGET} != "arm" && ${TARGET_ARCH} != "i386" && ${TARGET} != "powerpc"
OLD_FILES+=usr/bin/gdbserver
.endif
OLD_FILES+=usr/bin/ssh-keysign
OLD_FILES+=usr/sbin/gifconfig
OLD_FILES+=usr/sbin/prefix
# 200205XX
OLD_FILES+=usr/bin/doscmd
# 200204XX
OLD_FILES+=usr/bin/a2p
OLD_FILES+=usr/bin/ptx
OLD_FILES+=usr/bin/pod2text
OLD_FILES+=usr/bin/pod2man
OLD_FILES+=usr/bin/pod2latex
OLD_FILES+=usr/bin/pod2html
OLD_FILES+=usr/bin/h2ph
OLD_FILES+=usr/bin/dprofpp
OLD_FILES+=usr/bin/c2ph
OLD_FILES+=usr/bin/h2xs
OLD_FILES+=usr/bin/pl2pm
OLD_FILES+=usr/bin/splain
OLD_FILES+=usr/bin/s2p
OLD_FILES+=usr/bin/find2perl
OLD_FILES+=usr/sbin/pkg_update
OLD_FILES+=usr/sbin/scriptdump
# 20020409 GC kget(1), userconfig is long dead.
OLD_FILES+=sbin/kget
OLD_FILES+=usr/share/man/man8/kget.8.gz
# 200203XX
OLD_FILES+=usr/lib/libss.a
OLD_FILES+=usr/lib/libss_p.a
OLD_FILES+=usr/lib/libtelnet.a
OLD_FILES+=usr/lib/libtelnet_p.a
OLD_FILES+=usr/sbin/diskpart
# 200202XX
OLD_FILES+=usr/bin/gprof4
# 200201XX
OLD_FILES+=usr/sbin/linux
# 2001XXXX
OLD_FILES+=usr/bin/joy
OLD_FILES+=usr/sbin/ibcs2
OLD_FILES+=usr/sbin/svr4
OLD_FILES+=usr/bin/chflags
OLD_FILES+=usr/sbin/uuconv
OLD_FILES+=usr/sbin/uuchk
OLD_FILES+=usr/sbin/portmap
OLD_FILES+=usr/sbin/pmap_set
OLD_FILES+=usr/sbin/pmap_dump
OLD_FILES+=usr/sbin/mcon
OLD_FILES+=usr/sbin/stlstty
OLD_FILES+=usr/sbin/ispppcontrol
OLD_FILES+=usr/sbin/rndcontrol
# 20011001: UUCP migration to ports
OLD_FILES+=usr/bin/uucp
OLD_FILES+=usr/bin/uulog
OLD_FILES+=usr/bin/uuname
OLD_FILES+=usr/bin/uupick
OLD_FILES+=usr/bin/uusched
OLD_FILES+=usr/bin/uustat
OLD_FILES+=usr/bin/uuto
OLD_FILES+=usr/bin/uux
OLD_FILES+=usr/libexec/uucp/uucico
OLD_FILES+=usr/libexec/uucp/uuxqt
OLD_FILES+=usr/libexec/uucpd
OLD_FILES+=usr/share/man/man1/uuconv.1.gz
OLD_FILES+=usr/share/man/man1/uucp.1.gz
OLD_FILES+=usr/share/man/man1/uulog.1.gz
OLD_FILES+=usr/share/man/man1/uuname.1.gz
OLD_FILES+=usr/share/man/man1/uupick.1.gz
OLD_FILES+=usr/share/man/man1/uustat.1.gz
OLD_FILES+=usr/share/man/man1/uuto.1.gz
OLD_FILES+=usr/share/man/man1/uux.1.gz
OLD_FILES+=usr/share/man/man8/uuchk.8.gz
OLD_FILES+=usr/share/man/man8/uucico.8.gz
OLD_FILES+=usr/share/man/man8/uucpd.8.gz
OLD_FILES+=usr/share/man/man8/uusched.8.gz
OLD_FILES+=usr/share/man/man8/uuxqt.8.gz
# 20010523 mount_portal -> mount_portalfs
OLD_FILES+=sbin/mount_portal
OLD_FILES+=usr/share/man/man8/mount_portal.8.gz
# 200104XX
OLD_FILES+=usr/lib/libdescrypt.a
OLD_FILES+=usr/lib/libscrypt.a
OLD_FILES+=usr/lib/libscrypt_p.a
OLD_FILES+=usr/sbin/pim6stat
OLD_FILES+=usr/sbin/pim6sd
OLD_FILES+=usr/sbin/pim6dd
# 20010217
OLD_FILES+=usr/share/doc/bind/misc/dns-setup
# 20001200
OLD_FILES+=usr/lib/libgcc_r_pic.a
# 200009XX
OLD_FILES+=usr/lib/libRSAglue.a
OLD_FILES+=usr/lib/libRSAglue.so
OLD_FILES+=usr/lib/librsaINTL.a
OLD_FILES+=usr/lib/librsaUSA.a
OLD_FILES+=usr/lib/librsaUSA.so
# 200002XX ?
OLD_FILES+=usr/lib/libf2c.a
OLD_FILES+=usr/lib/libf2c_p.a
OLD_FILES+=usr/lib/libg++.a
OLD_FILES+=usr/lib/libg++_p.a
# 20001006
OLD_FILES+=usr/bin/miniperl
# 20000810
OLD_FILES+=usr/bin/sperl
# 200001XX
OLD_FILES+=usr/sbin/apmconf
# 199911XX
OLD_FILES+=usr/sbin/ipfstat
OLD_FILES+=usr/sbin/ipmon
OLD_FILES+=usr/sbin/ipnat
OLD_FILES+=usr/sbin/bad144
OLD_FILES+=usr/sbin/wormcontrol
OLD_FILES+=usr/sbin/named-bootconf
OLD_FILES+=usr/sbin/kvm_mkdb
OLD_FILES+=usr/sbin/keyadmin
# 199909XX
OLD_FILES+=usr/lib/libdesrypt_p.a
OLD_FILES+=sbin/ft
# 199903XX
OLD_FILES+=sbin/modload
OLD_FILES+=sbin/modunload
OLD_FILES+=usr/sbin/natd
# 199812XX
OLD_FILES+=sbin/dset
# 199809XX
OLD_FILES+=sbin/scsi
OLD_FILES+=sbin/scsiformat
OLD_FILES+=usr/sbin/ncrcontrol
OLD_FILES+=usr/sbin/tickadj
# 199806XX
OLD_FILES+=usr/sbin/mkdosfs
# 199801XX
OLD_FILES+=sbin/mount_lfs
OLD_FILES+=sbin/newlfs
OLD_FILES+=sbin/dumplfs
OLD_FILES+=usr/sbin/qcamcontrol
OLD_FILES+=usr/sbin/supscan
# 1997XXXX
OLD_FILES+=usr/sbin/sysctl
OLD_FILES+=usr/sbin/ctm_scan
OLD_FILES+=usr/sbin/addgroup
OLD_FILES+=usr/sbin/rmgroup
# 1996XXXX
OLD_FILES+=sbin/rdisc
OLD_FILES+=usr/sbin/cdplay
OLD_FILES+=usr/sbin/supfilesrv
OLD_FILES+=usr/sbin/routed
OLD_FILES+=usr/sbin/lsdev
OLD_FILES+=usr/sbin/yppasswdd
## unsorted
# do we still support aout builds?
#OLD_FILES+=usr/lib/aout/c++rt0.o
#OLD_FILES+=usr/lib/aout/crt0.o
#OLD_FILES+=usr/lib/aout/gcrt0.o
#OLD_FILES+=usr/lib/aout/scrt0.o
#OLD_FILES+=usr/lib/aout/sgcrt0.o
OLD_FILES+=usr/bin/sperl5
OLD_FILES+=usr/bin/perl5.6.0
OLD_FILES+=usr/bin/sperl5.6.0
OLD_FILES+=usr/bin/perlbc
OLD_FILES+=usr/bin/perl5.00503
OLD_FILES+=usr/bin/sperl5.00503
OLD_FILES+=usr/bin/perlbug
OLD_FILES+=usr/bin/perlcc
OLD_FILES+=usr/bin/perldoc
OLD_FILES+=usr/bin/suidperl
OLD_FILES+=usr/lib/pam_ftp.so
OLD_FILES+=usr/libdata/perl/5.00503/CGI/Apache.pm
OLD_FILES+=usr/libdata/perl/5.00503/CGI/Carp.pm
OLD_FILES+=usr/libdata/perl/5.00503/CGI/Cookie.pm
OLD_FILES+=usr/libdata/perl/5.00503/CGI/Fast.pm
OLD_FILES+=usr/libdata/perl/5.00503/CGI/Push.pm
OLD_FILES+=usr/libdata/perl/5.00503/CGI/Switch.pm
OLD_FILES+=usr/libdata/perl/5.00503/CPAN/FirstTime.pm
OLD_FILES+=usr/libdata/perl/5.00503/CPAN/Nox.pm
OLD_FILES+=usr/libdata/perl/5.00503/Class/Struct.pm
OLD_FILES+=usr/libdata/perl/5.00503/Devel/SelfStubber.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/Command.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/Embed.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/Install.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/Installed.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/Liblist.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/MM_OS2.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/MM_Unix.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/MM_VMS.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/MM_Win32.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/MakeMaker.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/Manifest.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/Mkbootstrap.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/Mksymlists.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/Packlist.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/inst
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/testlib.pm
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/typemap
OLD_FILES+=usr/libdata/perl/5.00503/ExtUtils/xsubpp
OLD_FILES+=usr/libdata/perl/5.00503/File/Spec/Mac.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/Spec/OS2.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/Spec/Unix.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/Spec/VMS.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/Spec/Win32.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/Basename.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/CheckTree.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/Compare.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/Copy.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/DosGlob.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/Find.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/Path.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/Spec.pm
OLD_FILES+=usr/libdata/perl/5.00503/File/stat.pm
OLD_FILES+=usr/libdata/perl/5.00503/Getopt/Long.pm
OLD_FILES+=usr/libdata/perl/5.00503/Getopt/Std.pm
OLD_FILES+=usr/libdata/perl/5.00503/I18N/Collate.pm
OLD_FILES+=usr/libdata/perl/5.00503/IPC/Open2.pm
OLD_FILES+=usr/libdata/perl/5.00503/IPC/Open3.pm
OLD_FILES+=usr/libdata/perl/5.00503/Math/BigFloat.pm
OLD_FILES+=usr/libdata/perl/5.00503/Math/BigInt.pm
OLD_FILES+=usr/libdata/perl/5.00503/Math/Complex.pm
OLD_FILES+=usr/libdata/perl/5.00503/Math/Trig.pm
OLD_FILES+=usr/libdata/perl/5.00503/Net/Ping.pm
OLD_FILES+=usr/libdata/perl/5.00503/Net/hostent.pm
OLD_FILES+=usr/libdata/perl/5.00503/Net/netent.pm
OLD_FILES+=usr/libdata/perl/5.00503/Net/protoent.pm
OLD_FILES+=usr/libdata/perl/5.00503/Net/servent.pm
OLD_FILES+=usr/libdata/perl/5.00503/Pod/Functions.pm
OLD_FILES+=usr/libdata/perl/5.00503/Pod/Html.pm
OLD_FILES+=usr/libdata/perl/5.00503/Pod/Text.pm
OLD_FILES+=usr/libdata/perl/5.00503/Search/Dict.pm
OLD_FILES+=usr/libdata/perl/5.00503/Sys/Hostname.pm
OLD_FILES+=usr/libdata/perl/5.00503/Sys/Syslog.pm
OLD_FILES+=usr/libdata/perl/5.00503/Term/Cap.pm
OLD_FILES+=usr/libdata/perl/5.00503/Term/Complete.pm
OLD_FILES+=usr/libdata/perl/5.00503/Term/ReadLine.pm
OLD_FILES+=usr/libdata/perl/5.00503/Test/Harness.pm
OLD_FILES+=usr/libdata/perl/5.00503/Text/Abbrev.pm
OLD_FILES+=usr/libdata/perl/5.00503/Text/ParseWords.pm
OLD_FILES+=usr/libdata/perl/5.00503/Text/Soundex.pm
OLD_FILES+=usr/libdata/perl/5.00503/Text/Tabs.pm
OLD_FILES+=usr/libdata/perl/5.00503/Text/Wrap.pm
OLD_FILES+=usr/libdata/perl/5.00503/Tie/Array.pm
OLD_FILES+=usr/libdata/perl/5.00503/Tie/Handle.pm
OLD_FILES+=usr/libdata/perl/5.00503/Tie/Hash.pm
OLD_FILES+=usr/libdata/perl/5.00503/Tie/RefHash.pm
OLD_FILES+=usr/libdata/perl/5.00503/Tie/Scalar.pm
OLD_FILES+=usr/libdata/perl/5.00503/Tie/SubstrHash.pm
OLD_FILES+=usr/libdata/perl/5.00503/Time/Local.pm
OLD_FILES+=usr/libdata/perl/5.00503/Time/gmtime.pm
OLD_FILES+=usr/libdata/perl/5.00503/Time/localtime.pm
OLD_FILES+=usr/libdata/perl/5.00503/Time/tm.pm
OLD_FILES+=usr/libdata/perl/5.00503/User/grent.pm
OLD_FILES+=usr/libdata/perl/5.00503/User/pwent.pm
OLD_FILES+=usr/libdata/perl/5.00503/auto/Getopt/Long/GetOptions.al
OLD_FILES+=usr/libdata/perl/5.00503/auto/Getopt/Long/FindOption.al
OLD_FILES+=usr/libdata/perl/5.00503/auto/Getopt/Long/Configure.al
OLD_FILES+=usr/libdata/perl/5.00503/auto/Getopt/Long/config.al
OLD_FILES+=usr/libdata/perl/5.00503/auto/Getopt/Long/Croak.al
OLD_FILES+=usr/libdata/perl/5.00503/auto/Getopt/Long/autosplit.ix
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Deparse.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/CC.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Debug.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Showlex.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/makeliblinks
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Bblock.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/cc_harness
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Bytecode.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Stackobj.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Xref.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Lint.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Asmdata.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Assembler.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Disassembler.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/disassemble
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/assemble
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/Terse.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B/C.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/EXTERN.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/INTERN.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/XSUB.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/XSlock.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/av.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/bytecode.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/byterun.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/cc_runtime.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/config.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/cop.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/cv.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/dosish.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/embed.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/embedvar.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/fakethr.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/form.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/gv.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/handy.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/hv.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/intrpvar.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/iperlsys.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/keywords.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/mg.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/nostdio.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/objXSUB.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/objpp.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/op.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/opcode.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/patchlevel.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/perl.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/perlio.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/perlsdio.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/perlsfio.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/perlvars.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/perly.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/pp.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/pp_proto.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/proto.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/regcomp.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/regexp.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/regnodes.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/scope.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/sv.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/thrdvar.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/thread.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/unixish.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/CORE/util.h
OLD_FILES+=usr/libdata/perl/5.00503/mach/Data/Dumper.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/IO/File.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/IO/Select.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/IO/Socket.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/IO/Handle.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/IO/Seekable.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/IO/Pipe.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/IPC/SysV.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/IPC/Msg.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/IPC/Semaphore.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/B/B.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/B/B.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/B/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DB_File/autosplit.ix
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DB_File/DB_File.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DB_File/DB_File.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DB_File/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Data/Dumper/Dumper.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Data/Dumper/Dumper.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Data/Dumper/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DynaLoader/.exists
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DynaLoader/dl_findfile.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DynaLoader/dl_expandspec.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DynaLoader/dl_find_symbol_anywhere.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DynaLoader/autosplit.ix
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DynaLoader/DynaLoader.a
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/DynaLoader/extralibs.ld
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Fcntl/Fcntl.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Fcntl/Fcntl.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Fcntl/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/IO/IO.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/IO/IO.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/IO/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/IPC/SysV/SysV.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/IPC/SysV/SysV.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/IPC/SysV/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/NDBM_File/NDBM_File.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/NDBM_File/NDBM_File.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/NDBM_File/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Opcode/Opcode.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Opcode/Opcode.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Opcode/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/assert.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/tolower.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/toupper.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/closedir.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/opendir.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/readdir.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/rewinddir.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/errno.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/creat.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fcntl.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getgrgid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getgrnam.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/atan2.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/cos.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/exp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fabs.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/log.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/pow.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/sin.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/sqrt.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getpwnam.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getpwuid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/longjmp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/setjmp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/kill.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/feof.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/siglongjmp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/sigsetjmp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/raise.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/offsetof.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/clearerr.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fclose.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fdopen.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fgetc.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fgets.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fileno.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fopen.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fprintf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fputc.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fputs.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fread.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/freopen.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fscanf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fseek.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/ferror.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fflush.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fgetpos.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fsetpos.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/ftell.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fwrite.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getc.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getchar.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/gets.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/perror.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/printf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/putc.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/putchar.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/puts.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/remove.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/rename.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/rewind.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/scanf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/sprintf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/sscanf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/tmpfile.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/ungetc.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/vfprintf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/vprintf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/vsprintf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/abs.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/atexit.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/atof.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/atoi.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/atol.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/bsearch.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/calloc.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/div.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/exit.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/free.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getenv.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/labs.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/ldiv.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/malloc.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/qsort.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/rand.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/realloc.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/srand.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/system.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/memchr.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/memcmp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/memcpy.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/memmove.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/memset.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strcat.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strchr.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strcmp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strcpy.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strcspn.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strerror.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strlen.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strncat.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strncmp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strncpy.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strpbrk.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strrchr.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strspn.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strstr.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/strtok.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/chmod.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fstat.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/mkdir.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/stat.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/umask.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/wait.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/waitpid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/gmtime.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/localtime.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/time.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/alarm.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/chdir.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/chown.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/execl.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/execle.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/execlp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/execv.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/execve.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/execvp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/fork.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getcwd.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getegid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/geteuid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getgid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getgroups.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getlogin.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getpgrp.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getpid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getppid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/getuid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/isatty.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/link.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/rmdir.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/setbuf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/setgid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/setuid.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/setvbuf.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/sleep.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/unlink.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/utime.al
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/autosplit.ix
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/POSIX.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/POSIX.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/POSIX/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/SDBM_File/SDBM_File.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/SDBM_File/SDBM_File.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/SDBM_File/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Socket/Socket.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Socket/Socket.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Socket/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/attrs/attrs.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/attrs/attrs.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/attrs/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/re/re.so
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/re/re.bs
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/re/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/sdbm/extralibs.ld
OLD_FILES+=usr/libdata/perl/5.00503/mach/auto/Errno/.packlist
OLD_FILES+=usr/libdata/perl/5.00503/mach/Config.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/B.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/O.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/perllocal.pod
OLD_FILES+=usr/libdata/perl/5.00503/mach/DB_File.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/Errno.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/Fcntl.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/IO.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/NDBM_File.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/Safe.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/Opcode.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/ops.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/POSIX.pod
OLD_FILES+=usr/libdata/perl/5.00503/mach/POSIX.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/SDBM_File.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/Socket.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/attrs.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/re.pm
OLD_FILES+=usr/libdata/perl/5.00503/mach/_h2ph_pre.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/a.out.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/cam/cam.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/cam/cam_ccb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/cam/cam_debug.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/cam/cam_extend.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/cam/cam_periph.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/cam/cam_queue.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/cam/cam_sim.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/cam/cam_xpt.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/cam/cam_xpt_periph.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/cam/cam_xpt_sim.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/aio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/alias.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/assert.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/bitstring.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/calendar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/camlib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/com_err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/com_right.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ctype.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/curses.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/db.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/des.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/devstat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/dialog.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/dirent.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/disktab.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/dlfcn.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/elf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/errno.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/eti.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/fcntl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/fetch.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/float.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/floatingpoint.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/fnmatch.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/form.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/fstab.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ftpio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/fts.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/glob.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/gmp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/gnuregex.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/grp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/histedit.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ieeefp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ifaddrs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/inttypes.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/iso646.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/kvm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/libatm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/libdisk.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/libgen.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/libusb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/libutil.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/limits.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/link.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/linker_set.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/locale.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/login_cap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/malloc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/FlexLexer.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/PlotFile.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/SFile.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/_G_config.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/algo.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/algobase.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/alloc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/builtinbuf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/bvector.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/complex.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/defalloc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/deque.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/editbuf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/floatio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/fstream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/function.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/hash_map.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/hash_set.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/hashtable.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/heap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/indstream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/iolibio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/iomanip.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/list.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/iostdio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/iostream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/iostreamP.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/istream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/iterator.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/libio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/libioP.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/map.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/multimap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/multiset.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/new.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/ostream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/pair.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/parsestream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/pfstream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/procbuf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/pthread_alloc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/rope.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/ropeimpl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/set.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/slist.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stack.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stdiostream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_algo.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/tree.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_algobase.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_alloc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_bvector.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_config.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_construct.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_deque.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_function.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_hash_fun.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_hash_map.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_hash_set.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_hashtable.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_heap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_iterator.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_list.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_map.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_multimap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_multiset.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_numeric.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_pair.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_queue.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_raw_storage_iter.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_relops.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_rope.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_set.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_slist.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_stack.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_tempbuf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_tree.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_uninitialized.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stl_vector.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/stream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/streambuf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/strfile.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/strstream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/tempbuf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/type_traits.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g++/vector.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/math.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/md2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/md4.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/md5.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/memory.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/menu.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/mp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/mpool.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/mqueue.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ncurses.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ndbm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netdb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nl_types.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nlist.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objformat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/opie.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/osreldate.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/panel.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/paths.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pcap-int.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pcap-namedb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pcap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/poll.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pthread.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pthread_np.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pwd.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/radlib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ranlib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/regex.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/regexp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/resolv.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ripemd.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rune.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/runetype.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sched.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/search.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/semaphore.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/setjmp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sgtty.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sha.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/signal.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/skey.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/stab.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/stand.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/stdarg.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/stddef.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/stdio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/stdlib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/strhash.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/string.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/stringlist.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/strings.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/struct.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sysexits.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/syslog.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/taclib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/tar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/tcpd.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/term.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/termcap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/termios.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/time.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/timers.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ttyent.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ucontext.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/unctrl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/unistd.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/utime.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/utmp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/values.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/varargs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vgl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vis.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/zconf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/zlib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/arpa/ftp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/arpa/inet.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/arpa/nameser.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/arpa/nameser_compat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/arpa/telnet.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/arpa/tftp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/assertions.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/ctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/dst.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/eventlib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/heap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/irpmarshall.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/logging.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/memcluster.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/misc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/tree.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/isc/list.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/ansi.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/apic.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/apm_bios.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/apm_segments.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/asc_ioctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/asm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/asmacros.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/asnames.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/atomic.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/bootinfo.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/bus.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/bus_at386.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/bus_memio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/bus_pc98.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/bus_pio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/bus_pio_ind.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/cdk.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/clock.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/comstats.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/console.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/cpu.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/cpufunc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/cputypes.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/cronyx.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/db_machdep.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/dvcfg.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/elf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/endian.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/exec.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/float.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/floatingpoint.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/frame.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/globaldata.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/globals.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/gsc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/i4b_cause.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/i4b_debug.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/i4b_ioctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/i4b_rbch_ioctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/i4b_tel_ioctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/i4b_trace.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/ieeefp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/if_wavelan_ieee.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/if_wl_wavelan.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/iic.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/in_cksum.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/ioctl_bt848.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/ioctl_ctx.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/ioctl_fd.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/ioctl_meteor.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/ipl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/joystick.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/limits.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/lock.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/md_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/mouse.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/mpapic.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/mtpr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/bus_dma.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/npx.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/param.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/pcaudioio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/pcb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/pcb_ext.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/pcvt_ioctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/perfmon.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/physio_proc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/pmap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/proc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/profile.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/psl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/ptrace.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/reg.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/reloc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/resource.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/segments.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/setjmp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/sigframe.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/signal.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/smb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/smp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/smptests.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/soundcard.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/speaker.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/specialreg.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/spigot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/stdarg.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/sysarch.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/trap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/tss.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/types.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/uc_device.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/ucontext.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/ultrasound.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/varargs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/vm86.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/vmparam.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/wtio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/i4b_isppp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/machine/pci_cfgreg.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/msdosfs/bootsect.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/msdosfs/bpb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/msdosfs/denode.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/msdosfs/direntry.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/msdosfs/fat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/msdosfs/msdosfsmount.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/bpf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/bpf_compat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/bpfdesc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/bridge.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/ethernet.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/hostcache.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_arp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_atm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_dl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_gif.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_ieee80211.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_llc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_media.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_mib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_ppp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_pppvar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_slvar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_sppp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_stf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_tap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_tapvar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_tun.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/slip.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_tunvar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_types.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_vlan_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/intrq.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/iso88025.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/net_osdep.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/netisr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/pfkeyv2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/ppp_comp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/ppp_defs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/radix.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/raw_cb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/route.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/slcompress.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/zlib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_faith.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_arc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/net/if_gre.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/krpc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/nfs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/nfsdiskless.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/nfsm_subs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/nfsmount.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/nfsnode.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/nfsproto.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/nfsrtt.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/nfsrvcache.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/nfsv2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/nqnfs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/rpcv2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nfs/xdr_subs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatalk/aarp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatalk/at.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatalk/at_extern.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatalk/at_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatalk/ddp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatalk/ddp_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatalk/endian.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatalk/phase2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm_cm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm_if.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm_ioctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm_pcb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm_sap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm_sigmgr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm_stack.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm_sys.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/atm_vc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/kern_include.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/port.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netatm/queue.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/netgraph.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_UI.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_async.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_bpf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_bridge.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_cisco.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_echo.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_ether.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_frame_relay.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_hole.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_iface.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_ksocket.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_lmi.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_message.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_mppc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_one2many.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_parse.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_ppp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_pppoe.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_pptpgre.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_rfc1490.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_sample.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_socket.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_socketvar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_tee.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_tty.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_vjc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_eiface.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_etf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_device.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_l2tp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netgraph/ng_fec.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/icmp6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/icmp_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/if_atm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/if_ether.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/if_fddi.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/igmp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/igmp_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/in.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/in_gif.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/in_hostcache.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/in_pcb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/in_systm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/in_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_auth.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_compat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_dummynet.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_ecn.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_encap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_fil.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_flow.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_frag.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_fw.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_icmp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_mroute.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_nat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_proxy.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_state.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ipl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ipprotosw.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/tcp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/tcp_debug.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/tcp_fsm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/tcp_seq.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/tcp_timer.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/tcp_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/tcpip.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/udp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/udp_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_fw2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet/ip_gre.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ah.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ah6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/esp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/esp6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/icmp6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/in6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/in6_gif.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/in6_ifattach.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/in6_pcb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/in6_prefix.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/in6_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ip6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ip6_ecn.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ip6_fw.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ip6_mroute.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ip6_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ip6protosw.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ipcomp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ipcomp6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ipsec.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/ipsec6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/mld6_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/nd6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/pim6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/pim6_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/scope6_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/tcp6_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/udp6_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/esp_rijndael.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netinet6/raw_ip6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipx/ipx.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipx/ipx_if.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipx/ipx_ip.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipx/ipx_pcb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipx/ipx_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipx/spx.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipx/spx_debug.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipx/spx_timer.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipx/spx_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netkey/key.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netkey/key_debug.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netkey/key_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netkey/keydb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netkey/keysock.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netnatm/natm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_cfg.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_conn.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_file.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_lib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_ncp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_nls.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_rcfile.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_rq.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_sock.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_subr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/ncp_user.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netncp/nwerror.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/idp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/idp_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/ns.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/ns_error.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/ns_if.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/ns_pcb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/sp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/spidp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/spp_debug.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/spp_timer.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netns/spp_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ntfs/ntfs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ntfs/ntfs_compr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ntfs/ntfs_ihash.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ntfs/ntfs_inode.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ntfs/ntfs_subr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ntfs/ntfs_vfsops.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ntfs/ntfsmount.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nwfs/nwfs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nwfs/nwfs_mount.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nwfs/nwfs_node.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/nwfs/nwfs_subr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/NXConstStr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/Object.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/Protocol.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/encoding.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/hash.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/objc-api.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/objc-list.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/objc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/runtime.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/sarray.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/thr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/objc/typedstream.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/asn1.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/asn1_mac.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/bio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/blowfish.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/bn.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/buffer.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/cast.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/comp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/conf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/conf_api.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/crypto.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/des.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/dh.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/dsa.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/dso.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/e_os.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/e_os2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ebcdic.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/evp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/hmac.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/lhash.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/md2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/md4.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/md5.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/mdc2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/obj_mac.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/objects.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/opensslconf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/opensslv.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/pem.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/pem2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/pkcs12.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/pkcs7.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/rand.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/rc2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/rc4.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/rc5.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ripemd.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/rsa.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/safestack.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/sha.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ssl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ssl2.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ssl23.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ssl3.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ssl_locl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/stack.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/symhacks.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/tls1.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/tmdiff.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/txt_db.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/x509.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/x509_vfy.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/x509v3.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/idea.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/aes.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/asn1t.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/cryptlib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/des_old.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ec.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/engine.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/krb5_asn.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/kssl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ocsp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ossl_typ.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ui.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ui_compat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/aes_locl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/eng_int.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/hw_4758_cca_err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/hw_aep_err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/hw_atalla_err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/hw_cswift_err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/hw_ncipher_err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/hw_nuron_err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/hw_sureware_err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/hw_ubsec_err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/openssl/ui_locl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pccard/cardinfo.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pccard/cis.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pccard/driver.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pccard/i82365.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pccard/pccard_nbk.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pccard/slot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pccard/meciareg.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pccard/pcic_pci.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/pccard/pcicvar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/posix4/aio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/posix4/mqueue.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/posix4/posix4.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/posix4/sched.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/posix4/semaphore.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/protocols/dumprestore.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/protocols/routed.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/protocols/rwhod.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/protocols/talkd.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/protocols/timed.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/readline/chardefs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/readline/history.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/readline/keymaps.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/readline/readline.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/readline/rlconf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/readline/rlstdc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/readline/tilde.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/auth.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/auth_des.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/auth_unix.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/clnt.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/des.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/des_crypt.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/key_prot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/pmap_clnt.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/pmap_prot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/pmap_rmt.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/rpc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/rpc_com.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/rpc_msg.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/svc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/svc_auth.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/types.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpc/xdr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/bootparam_prot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/crypt.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/key_prot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/klm_prot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/mount.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/nfs_prot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/nis.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/nis_cache.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/nis_callback.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/nis_db.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/nis_tags.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/nislib.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/nlm_prot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/rex.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/rnusers.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/rquota.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/rstat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/rwall.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/sm_inter.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/spray.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/yp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/yp_prot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/ypclnt.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/yppasswd.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/ypupdate_prot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/rpcsvc/ypxfrd.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/security/_pam_compat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/security/_pam_macros.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/security/_pam_types.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/security/pam_appl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/security/pam_malloc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/security/pam_misc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/security/pam_mod_misc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/security/pam_modules.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ss/mit-sipb-copyright.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ss/ss.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/ss/ss_err.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/_posix.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ata.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/acct.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/acl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/agpio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/aio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/assym.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/blist.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/buf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/bus.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/bus_private.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/callout.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ccdvar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/cdefs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/cdio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/cdrio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/chio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/clist.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/endian.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/conf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/cons.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/consio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/copyright.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ctype.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/dir.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/dataacq.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/link_elf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/device_port.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/devicestat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/dirent.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/disk.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/disklabel.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/diskslice.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/dkstat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/dmap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/domain.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/dvdio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/elf32.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/elf64.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/elf_common.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/elf_generic.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/errno.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/event.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/eventhandler.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/eventvar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/exec.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/extattr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/fbio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/fcntl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/file.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/filedesc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/filio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/gmon.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/imgact.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/imgact_aout.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/imgact_elf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/inflate.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/interrupt.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/inttypes.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ioccom.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ioctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ioctl_compat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ipc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/jail.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/joystick.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/kbio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/kernel.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/kthread.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ktrace.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/libkern.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/linker.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/linker_set.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/lock.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/lockf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/malloc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/mbuf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/md5.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/memrange.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/mman.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/module.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/mount.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/msg.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/msgbuf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/mtio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/namei.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/param.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/pciio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/pioctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/pipe.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/poll.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/proc.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/procfs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/protosw.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ptio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ptrace.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/queue.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/random.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/reboot.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/resource.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/resourcevar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/rman.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/rtprio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/sbuf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/select.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/sem.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/shm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/signal.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/signalvar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/snoop.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/socket.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/socketvar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/sockio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/soundcard.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/stat.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/syscall-hide.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/syscall.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/sysctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/sysent.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/syslimits.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/syslog.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/sysproto.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/systm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/taskqueue.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/termios.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/time.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/timeb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/timepps.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/timers.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/times.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/timex.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/tprintf.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/tty.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ttychars.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ttycom.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ttydefaults.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ttydev.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/types.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ucontext.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/ucred.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/uio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/un.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/unistd.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/unpcb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/user.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/utsname.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/vmmeter.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/vnioctl.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/vnode.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/wait.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/wormio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/xrpuio.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/kobj.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/link_aout.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/nlist_aout.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/mchain.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/fnv_hash.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/iconv.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/sys/md4.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/pmap.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/swap_pager.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vm.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vm_extern.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vm_kern.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vm_map.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vm_object.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vm_page.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vm_pageout.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vm_pager.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vm_param.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vm_zone.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/vm/vnode_pager.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/complex.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/stdbool.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/langinfo.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netsmb/netbios.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netsmb/smb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netsmb/smb_conn.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netsmb/smb_dev.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netsmb/smb_rq.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netsmb/smb_subr.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netsmb/smb_tran.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netsmb/smb_trantcp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/g2c.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/telnet.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/elf-hints.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/libusbhid.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/radlib_vs.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/readpassphrase.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/wchar.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/wctype.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/crypto/cast.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/crypto/castsb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/crypto/cryptodev.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/crypto/cryptosoft.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/crypto/deflate.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/crypto/rijndael.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/crypto/rmd160.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/crypto/skipjack.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/crypto/xform.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/ah.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/ah_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/esp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/esp_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/ipcomp.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/ipcomp_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/ipip_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/ipsec.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/ipsec6.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/key.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/key_debug.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/key_var.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/keydb.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/keysock.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/netipsec/xform.ph
OLD_FILES+=usr/libdata/perl/5.00503/mach/bzlib.ph
OLD_FILES+=usr/libdata/perl/5.00503/pod/perl.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perl5004delta.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlapio.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlbook.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlbot.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlcall.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perldata.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perldebug.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perldelta.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perldiag.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perldsc.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlembed.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfaq.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfaq1.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfaq2.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfaq3.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfaq4.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfaq5.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfaq6.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfaq7.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlipc.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfaq8.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfaq9.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlform.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlfunc.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlguts.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlhist.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perllocale.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perllol.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlmod.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlmodinstall.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlmodlib.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlobj.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlop.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlopentut.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlpod.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlport.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlre.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlref.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlreftut.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlrun.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlsec.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlstyle.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlsub.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlsyn.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlthrtut.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perltie.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perltoc.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perltoot.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perltrap.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlvar.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlxs.pod
OLD_FILES+=usr/libdata/perl/5.00503/pod/perlxstut.pod
OLD_FILES+=usr/libdata/perl/5.00503/AnyDBM_File.pm
OLD_FILES+=usr/libdata/perl/5.00503/AutoLoader.pm
OLD_FILES+=usr/libdata/perl/5.00503/AutoSplit.pm
OLD_FILES+=usr/libdata/perl/5.00503/Benchmark.pm
OLD_FILES+=usr/libdata/perl/5.00503/CGI.pm
OLD_FILES+=usr/libdata/perl/5.00503/CPAN.pm
OLD_FILES+=usr/libdata/perl/5.00503/Carp.pm
OLD_FILES+=usr/libdata/perl/5.00503/Cwd.pm
OLD_FILES+=usr/libdata/perl/5.00503/DirHandle.pm
OLD_FILES+=usr/libdata/perl/5.00503/Dumpvalue.pm
OLD_FILES+=usr/libdata/perl/5.00503/English.pm
OLD_FILES+=usr/libdata/perl/5.00503/Env.pm
OLD_FILES+=usr/libdata/perl/5.00503/Exporter.pm
OLD_FILES+=usr/libdata/perl/5.00503/Fatal.pm
OLD_FILES+=usr/libdata/perl/5.00503/FileCache.pm
OLD_FILES+=usr/libdata/perl/5.00503/FileHandle.pm
OLD_FILES+=usr/libdata/perl/5.00503/FindBin.pm
OLD_FILES+=usr/libdata/perl/5.00503/SelectSaver.pm
OLD_FILES+=usr/libdata/perl/5.00503/SelfLoader.pm
OLD_FILES+=usr/libdata/perl/5.00503/Shell.pm
OLD_FILES+=usr/libdata/perl/5.00503/Symbol.pm
OLD_FILES+=usr/libdata/perl/5.00503/Test.pm
OLD_FILES+=usr/libdata/perl/5.00503/abbrev.pl
OLD_FILES+=usr/libdata/perl/5.00503/UNIVERSAL.pm
OLD_FILES+=usr/libdata/perl/5.00503/assert.pl
OLD_FILES+=usr/libdata/perl/5.00503/autouse.pm
OLD_FILES+=usr/libdata/perl/5.00503/base.pm
OLD_FILES+=usr/libdata/perl/5.00503/bigfloat.pl
OLD_FILES+=usr/libdata/perl/5.00503/bigint.pl
OLD_FILES+=usr/libdata/perl/5.00503/bigrat.pl
OLD_FILES+=usr/libdata/perl/5.00503/blib.pm
OLD_FILES+=usr/libdata/perl/5.00503/cacheout.pl
OLD_FILES+=usr/libdata/perl/5.00503/chat2.pl
OLD_FILES+=usr/libdata/perl/5.00503/complete.pl
OLD_FILES+=usr/libdata/perl/5.00503/constant.pm
OLD_FILES+=usr/libdata/perl/5.00503/ctime.pl
OLD_FILES+=usr/libdata/perl/5.00503/diagnostics.pm
OLD_FILES+=usr/libdata/perl/5.00503/dotsh.pl
OLD_FILES+=usr/libdata/perl/5.00503/dumpvar.pl
OLD_FILES+=usr/libdata/perl/5.00503/exceptions.pl
OLD_FILES+=usr/libdata/perl/5.00503/fastcwd.pl
OLD_FILES+=usr/libdata/perl/5.00503/fields.pm
OLD_FILES+=usr/libdata/perl/5.00503/find.pl
OLD_FILES+=usr/libdata/perl/5.00503/finddepth.pl
OLD_FILES+=usr/libdata/perl/5.00503/flush.pl
OLD_FILES+=usr/libdata/perl/5.00503/ftp.pl
OLD_FILES+=usr/libdata/perl/5.00503/getcwd.pl
OLD_FILES+=usr/libdata/perl/5.00503/getopt.pl
OLD_FILES+=usr/libdata/perl/5.00503/getopts.pl
OLD_FILES+=usr/libdata/perl/5.00503/hostname.pl
OLD_FILES+=usr/libdata/perl/5.00503/importenv.pl
OLD_FILES+=usr/libdata/perl/5.00503/integer.pm
OLD_FILES+=usr/libdata/perl/5.00503/less.pm
OLD_FILES+=usr/libdata/perl/5.00503/lib.pm
OLD_FILES+=usr/libdata/perl/5.00503/locale.pm
OLD_FILES+=usr/libdata/perl/5.00503/look.pl
OLD_FILES+=usr/libdata/perl/5.00503/newgetopt.pl
OLD_FILES+=usr/libdata/perl/5.00503/open2.pl
OLD_FILES+=usr/libdata/perl/5.00503/open3.pl
OLD_FILES+=usr/libdata/perl/5.00503/overload.pm
OLD_FILES+=usr/libdata/perl/5.00503/perl5db.pl
OLD_FILES+=usr/libdata/perl/5.00503/pwd.pl
OLD_FILES+=usr/libdata/perl/5.00503/shellwords.pl
OLD_FILES+=usr/libdata/perl/5.00503/sigtrap.pm
OLD_FILES+=usr/libdata/perl/5.00503/stat.pl
OLD_FILES+=usr/libdata/perl/5.00503/strict.pm
OLD_FILES+=usr/libdata/perl/5.00503/subs.pm
OLD_FILES+=usr/libdata/perl/5.00503/syslog.pl
OLD_FILES+=usr/libdata/perl/5.00503/tainted.pl
OLD_FILES+=usr/libdata/perl/5.00503/termcap.pl
OLD_FILES+=usr/libdata/perl/5.00503/timelocal.pl
OLD_FILES+=usr/libdata/perl/5.00503/validate.pl
OLD_FILES+=usr/libdata/perl/5.00503/vars.pm
OLD_FILES+=usr/libdata/perl/5.00503/re.pm
OLD_FILES+=usr/libdata/perl/5.00503/Config.pm
OLD_FILES+=usr/libdata/perl/5.00503/.exists
OLD_FILES+=usr/libdata/perl/5.00503/DynaLoader.pm
OLD_FILES+=usr/share/perl/man/man3/AnyDBM_File.3.gz
OLD_FILES+=usr/share/perl/man/man3/AutoLoader.3.gz
OLD_FILES+=usr/share/perl/man/man3/AutoSplit.3.gz
OLD_FILES+=usr/share/perl/man/man3/B.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Asmdata.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Assembler.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Bblock.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Bytecode.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::C.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::CC.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Debug.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Deparse.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Disassembler.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Lint.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Showlex.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Stackobj.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Terse.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Xref.3.gz
OLD_FILES+=usr/share/perl/man/man3/Benchmark.3.gz
OLD_FILES+=usr/share/perl/man/man3/CGI.3.gz
OLD_FILES+=usr/share/perl/man/man3/CGI::Apache.3.gz
OLD_FILES+=usr/share/perl/man/man3/CGI::Carp.3.gz
OLD_FILES+=usr/share/perl/man/man3/CGI::Cookie.3.gz
OLD_FILES+=usr/share/perl/man/man3/CGI::Fast.3.gz
OLD_FILES+=usr/share/perl/man/man3/CGI::Push.3.gz
OLD_FILES+=usr/share/perl/man/man3/CGI::Switch.3.gz
OLD_FILES+=usr/share/perl/man/man3/CPAN.3.gz
OLD_FILES+=usr/share/perl/man/man3/CPAN::FirstTime.3.gz
OLD_FILES+=usr/share/perl/man/man3/CPAN::Nox.3.gz
OLD_FILES+=usr/share/perl/man/man3/Carp.3.gz
OLD_FILES+=usr/share/perl/man/man3/Class::Struct.3.gz
OLD_FILES+=usr/share/perl/man/man3/Config.3.gz
OLD_FILES+=usr/share/perl/man/man3/Cwd.3.gz
OLD_FILES+=usr/share/perl/man/man3/DB_File.3.gz
OLD_FILES+=usr/share/perl/man/man3/Data::Dumper.3.gz
OLD_FILES+=usr/share/perl/man/man3/Devel::SelfStubber.3.gz
OLD_FILES+=usr/share/perl/man/man3/DirHandle.3.gz
OLD_FILES+=usr/share/perl/man/man3/Dumpvalue.3.gz
OLD_FILES+=usr/share/perl/man/man3/DynaLoader.3.gz
OLD_FILES+=usr/share/perl/man/man3/English.3.gz
OLD_FILES+=usr/share/perl/man/man3/Env.3.gz
OLD_FILES+=usr/share/perl/man/man3/Exporter.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::Command.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::Embed.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::Install.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::Installed.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::Liblist.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::MM_OS2.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::MM_Unix.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::MM_VMS.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::MM_Win32.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::MakeMaker.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::Manifest.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::Mkbootstrap.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::Mksymlists.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::Packlist.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::testlib.3.gz
OLD_FILES+=usr/share/perl/man/man3/Fatal.3.gz
OLD_FILES+=usr/share/perl/man/man3/Fcntl.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Basename.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::CheckTree.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Compare.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Copy.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::DosGlob.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Find.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Path.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Spec.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Spec::Mac.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Spec::OS2.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Spec::Unix.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Spec::VMS.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Spec::Win32.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::stat.3.gz
OLD_FILES+=usr/share/perl/man/man3/FileCache.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO.3.gz
OLD_FILES+=usr/share/perl/man/man3/FileHandle.3.gz
OLD_FILES+=usr/share/perl/man/man3/FindBin.3.gz
OLD_FILES+=usr/share/perl/man/man3/GDBM_File.3.gz
OLD_FILES+=usr/share/perl/man/man3/Getopt::Long.3.gz
OLD_FILES+=usr/share/perl/man/man3/Getopt::Std.3.gz
OLD_FILES+=usr/share/perl/man/man3/I18N::Collate.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO::File.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO::Handle.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO::Pipe.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO::Seekable.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO::Select.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO::Socket.3.gz
OLD_FILES+=usr/share/perl/man/man3/IPC::Msg.3.gz
OLD_FILES+=usr/share/perl/man/man3/IPC::Open2.3.gz
OLD_FILES+=usr/share/perl/man/man3/IPC::Open3.3.gz
OLD_FILES+=usr/share/perl/man/man3/IPC::Semaphore.3.gz
OLD_FILES+=usr/share/perl/man/man3/IPC::SysV.3.gz
OLD_FILES+=usr/share/perl/man/man3/Math::BigFloat.3.gz
OLD_FILES+=usr/share/perl/man/man3/Math::BigInt.3.gz
OLD_FILES+=usr/share/perl/man/man3/Math::Complex.3.gz
OLD_FILES+=usr/share/perl/man/man3/Math::Trig.3.gz
OLD_FILES+=usr/share/perl/man/man3/NDBM_File.3.gz
OLD_FILES+=usr/share/perl/man/man3/Net::Ping.3.gz
OLD_FILES+=usr/share/perl/man/man3/Net::hostent.3.gz
OLD_FILES+=usr/share/perl/man/man3/Net::netent.3.gz
OLD_FILES+=usr/share/perl/man/man3/Net::protoent.3.gz
OLD_FILES+=usr/share/perl/man/man3/Net::servent.3.gz
OLD_FILES+=usr/share/perl/man/man3/O.3.gz
OLD_FILES+=usr/share/perl/man/man3/ODBM_File.3.gz
OLD_FILES+=usr/share/perl/man/man3/Opcode.3.gz
OLD_FILES+=usr/share/perl/man/man3/POSIX.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Html.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Text.3.gz
OLD_FILES+=usr/share/perl/man/man3/SDBM_File.3.gz
OLD_FILES+=usr/share/perl/man/man3/Safe.3.gz
OLD_FILES+=usr/share/perl/man/man3/Search::Dict.3.gz
OLD_FILES+=usr/share/perl/man/man3/SelectSaver.3.gz
OLD_FILES+=usr/share/perl/man/man3/SelfLoader.3.gz
OLD_FILES+=usr/share/perl/man/man3/Shell.3.gz
OLD_FILES+=usr/share/perl/man/man3/Socket.3.gz
OLD_FILES+=usr/share/perl/man/man3/Symbol.3.gz
OLD_FILES+=usr/share/perl/man/man3/re.3.gz
OLD_FILES+=usr/share/perl/man/man3/Sys::Hostname.3.gz
OLD_FILES+=usr/share/perl/man/man3/Sys::Syslog.3.gz
OLD_FILES+=usr/share/perl/man/man3/Term::Cap.3.gz
OLD_FILES+=usr/share/perl/man/man3/Term::Complete.3.gz
OLD_FILES+=usr/share/perl/man/man3/Term::ReadLine.3.gz
OLD_FILES+=usr/share/perl/man/man3/Test.3.gz
OLD_FILES+=usr/share/perl/man/man3/Test::Harness.3.gz
OLD_FILES+=usr/share/perl/man/man3/Text::Abbrev.3.gz
OLD_FILES+=usr/share/perl/man/man3/Text::ParseWords.3.gz
OLD_FILES+=usr/share/perl/man/man3/Text::Soundex.3.gz
OLD_FILES+=usr/share/perl/man/man3/Text::Tabs.3.gz
OLD_FILES+=usr/share/perl/man/man3/Text::Wrap.3.gz
OLD_FILES+=usr/share/perl/man/man3/Thread.3.gz
OLD_FILES+=usr/share/perl/man/man3/Thread::Queue.3.gz
OLD_FILES+=usr/share/perl/man/man3/Thread::Semaphore.3.gz
OLD_FILES+=usr/share/perl/man/man3/Thread::Signal.3.gz
OLD_FILES+=usr/share/perl/man/man3/Thread::Specific.3.gz
OLD_FILES+=usr/share/perl/man/man3/Tie::Array.3.gz
OLD_FILES+=usr/share/perl/man/man3/Tie::Handle.3.gz
OLD_FILES+=usr/share/perl/man/man3/Tie::Hash.3.gz
OLD_FILES+=usr/share/perl/man/man3/Tie::RefHash.3.gz
OLD_FILES+=usr/share/perl/man/man3/Tie::Scalar.3.gz
OLD_FILES+=usr/share/perl/man/man3/Tie::SubstrHash.3.gz
OLD_FILES+=usr/share/perl/man/man3/Time::Local.3.gz
OLD_FILES+=usr/share/perl/man/man3/Time::gmtime.3.gz
OLD_FILES+=usr/share/perl/man/man3/Time::localtime.3.gz
OLD_FILES+=usr/share/perl/man/man3/Time::tm.3.gz
OLD_FILES+=usr/share/perl/man/man3/UNIVERSAL.3.gz
OLD_FILES+=usr/share/perl/man/man3/User::grent.3.gz
OLD_FILES+=usr/share/perl/man/man3/User::pwent.3.gz
OLD_FILES+=usr/share/perl/man/man3/attrs.3.gz
OLD_FILES+=usr/share/perl/man/man3/autouse.3.gz
OLD_FILES+=usr/share/perl/man/man3/base.3.gz
OLD_FILES+=usr/share/perl/man/man3/blib.3.gz
OLD_FILES+=usr/share/perl/man/man3/constant.3.gz
OLD_FILES+=usr/share/perl/man/man3/diagnostics.3.gz
OLD_FILES+=usr/share/perl/man/man3/fields.3.gz
OLD_FILES+=usr/share/perl/man/man3/integer.3.gz
OLD_FILES+=usr/share/perl/man/man3/less.3.gz
OLD_FILES+=usr/share/perl/man/man3/lib.3.gz
OLD_FILES+=usr/share/perl/man/man3/locale.3.gz
OLD_FILES+=usr/share/perl/man/man3/ops.3.gz
OLD_FILES+=usr/share/perl/man/man3/overload.3.gz
OLD_FILES+=usr/share/perl/man/man3/sigtrap.3.gz
OLD_FILES+=usr/share/perl/man/man3/strict.3.gz
OLD_FILES+=usr/share/perl/man/man3/subs.3.gz
OLD_FILES+=usr/share/perl/man/man3/vars.3.gz
OLD_FILES+=usr/share/perl/man/man3/B::Stash.3.gz
OLD_FILES+=usr/share/perl/man/man3/ByteLoader.3.gz
OLD_FILES+=usr/share/perl/man/man3/CGI::Pretty.3.gz
OLD_FILES+=usr/share/perl/man/man3/Carp::Heavy.3.gz
OLD_FILES+=usr/share/perl/man/man3/DB.3.gz
OLD_FILES+=usr/share/perl/man/man3/DProf::DProf.3.gz
OLD_FILES+=usr/share/perl/man/man3/Exporter::Heavy.3.gz
OLD_FILES+=usr/share/perl/man/man3/ExtUtils::MM_Cygwin.3.gz
OLD_FILES+=usr/share/perl/man/man3/File::Glob.3.gz
OLD_FILES+=usr/share/perl/man/man3/Glob::Glob.3.gz
OLD_FILES+=usr/share/perl/man/man3/Hostname::Hostname.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO::Dir.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO::Poll.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO::Socket::INET.3.gz
OLD_FILES+=usr/share/perl/man/man3/IO::Socket::UNIX.3.gz
OLD_FILES+=usr/share/perl/man/man3/Peek::Peek.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Checker.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Find.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::InputObjects.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Man.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::ParseUtils.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Parser.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Plainer.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Select.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Text::Color.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Text::Termcap.3.gz
OLD_FILES+=usr/share/perl/man/man3/Pod::Usage.3.gz
OLD_FILES+=usr/share/perl/man/man3/Syslog::Syslog.3.gz
OLD_FILES+=usr/share/perl/man/man3/Term::ANSIColor.3.gz
OLD_FILES+=usr/share/perl/man/man3/XSLoader.3.gz
OLD_FILES+=usr/share/perl/man/man3/attributes.3.gz
OLD_FILES+=usr/share/perl/man/man3/bytes.3.gz
OLD_FILES+=usr/share/perl/man/man3/charnames.3.gz
OLD_FILES+=usr/share/perl/man/man3/filetest.3.gz
OLD_FILES+=usr/share/perl/man/man3/open.3.gz
OLD_FILES+=usr/share/perl/man/man3/utf8.3.gz
OLD_FILES+=usr/share/perl/man/man3/warnings.3.gz
OLD_FILES+=usr/share/perl/man/man3/warnings::register.3.gz
OLD_FILES+=usr/share/perl/man/whatis
OLD_FILES+=usr/share/man/man1/CA.pl.1.gz
OLD_FILES+=usr/share/man/man1/asn1parse.1.gz
OLD_FILES+=usr/share/man/man1/ca.1.gz
OLD_FILES+=usr/share/man/man1/ciphers.1.gz
OLD_FILES+=usr/share/man/man1/config.1.gz
OLD_FILES+=usr/share/man/man1/crl.1.gz
OLD_FILES+=usr/share/man/man1/crl2pkcs7.1.gz
OLD_FILES+=usr/share/man/man1/dgst.1.gz
OLD_FILES+=usr/share/man/man1/dhparam.1.gz
OLD_FILES+=usr/share/man/man1/doscmd.1.gz
OLD_FILES+=usr/share/man/man1/dsa.1.gz
OLD_FILES+=usr/share/man/man1/dsaparam.1.gz
OLD_FILES+=usr/share/man/man1/enc.1.gz
OLD_FILES+=usr/share/man/man1/gendsa.1.gz
OLD_FILES+=usr/share/man/man1/genrsa.1.gz
OLD_FILES+=usr/share/man/man1/getNAME.1.gz
OLD_FILES+=usr/share/man/man1/nseq.1.gz
OLD_FILES+=usr/share/man/man1/ocsp.1.gz
OLD_FILES+=usr/share/man/man1/openssl.1.gz
OLD_FILES+=usr/share/man/man1/perl.1.gz
OLD_FILES+=usr/share/man/man1/perl5004delta.1.gz
OLD_FILES+=usr/share/man/man1/perlapio.1.gz
OLD_FILES+=usr/share/man/man1/perlbook.1.gz
OLD_FILES+=usr/share/man/man1/perlbot.1.gz
OLD_FILES+=usr/share/man/man1/perlcall.1.gz
OLD_FILES+=usr/share/man/man1/perldata.1.gz
OLD_FILES+=usr/share/man/man1/perldebug.1.gz
OLD_FILES+=usr/share/man/man1/perldelta.1.gz
OLD_FILES+=usr/share/man/man1/perldiag.1.gz
OLD_FILES+=usr/share/man/man1/perldsc.1.gz
OLD_FILES+=usr/share/man/man1/perlembed.1.gz
OLD_FILES+=usr/share/man/man1/perlfaq.1.gz
OLD_FILES+=usr/share/man/man1/perlfaq1.1.gz
OLD_FILES+=usr/share/man/man1/perlfaq2.1.gz
OLD_FILES+=usr/share/man/man1/perlfaq3.1.gz
OLD_FILES+=usr/share/man/man1/perlfaq4.1.gz
OLD_FILES+=usr/share/man/man1/perlfaq5.1.gz
OLD_FILES+=usr/share/man/man1/perlfaq6.1.gz
OLD_FILES+=usr/share/man/man1/perlfaq7.1.gz
OLD_FILES+=usr/share/man/man1/perlfaq8.1.gz
OLD_FILES+=usr/share/man/man1/perlfaq9.1.gz
OLD_FILES+=usr/share/man/man1/perlform.1.gz
OLD_FILES+=usr/share/man/man1/perlfunc.1.gz
OLD_FILES+=usr/share/man/man1/perlguts.1.gz
OLD_FILES+=usr/share/man/man1/perlhist.1.gz
OLD_FILES+=usr/share/man/man1/perlipc.1.gz
OLD_FILES+=usr/share/man/man1/perllocale.1.gz
OLD_FILES+=usr/share/man/man1/perllol.1.gz
OLD_FILES+=usr/share/man/man1/perlmod.1.gz
OLD_FILES+=usr/share/man/man1/perlmodinstall.1.gz
OLD_FILES+=usr/share/man/man1/perlmodlib.1.gz
OLD_FILES+=usr/share/man/man1/perlobj.1.gz
OLD_FILES+=usr/share/man/man1/perlop.1.gz
OLD_FILES+=usr/share/man/man1/perlopentut.1.gz
OLD_FILES+=usr/share/man/man1/perlpod.1.gz
OLD_FILES+=usr/share/man/man1/perlport.1.gz
OLD_FILES+=usr/share/man/man1/perlre.1.gz
OLD_FILES+=usr/share/man/man1/perlref.1.gz
OLD_FILES+=usr/share/man/man1/perlreftut.1.gz
OLD_FILES+=usr/share/man/man1/perlrun.1.gz
OLD_FILES+=usr/share/man/man1/perlsec.1.gz
OLD_FILES+=usr/share/man/man1/perlstyle.1.gz
OLD_FILES+=usr/share/man/man1/perlsub.1.gz
OLD_FILES+=usr/share/man/man1/perlsyn.1.gz
OLD_FILES+=usr/share/man/man1/perlthrtut.1.gz
OLD_FILES+=usr/share/man/man1/perltie.1.gz
OLD_FILES+=usr/share/man/man1/perltoc.1.gz
OLD_FILES+=usr/share/man/man1/perltoot.1.gz
OLD_FILES+=usr/share/man/man1/perltrap.1.gz
OLD_FILES+=usr/share/man/man1/perlvar.1.gz
OLD_FILES+=usr/share/man/man1/perlxs.1.gz
OLD_FILES+=usr/share/man/man1/perlxstut.1.gz
OLD_FILES+=usr/share/man/man1/perlbug.1.gz
OLD_FILES+=usr/share/man/man1/perlcc.1.gz
OLD_FILES+=usr/share/man/man1/perldoc.1.gz
OLD_FILES+=usr/share/man/man1/perl5005delta.1.gz
OLD_FILES+=usr/share/man/man1/perlfork.1.gz
OLD_FILES+=usr/share/man/man1/perlboot.1.gz
OLD_FILES+=usr/share/man/man1/perltootc.1.gz
OLD_FILES+=usr/share/man/man1/perldbmfilter.1.gz
OLD_FILES+=usr/share/man/man1/perldebguts.1.gz
OLD_FILES+=usr/share/man/man1/perlnumber.1.gz
OLD_FILES+=usr/share/man/man1/perlcompile.1.gz
OLD_FILES+=usr/share/man/man1/perltodo.1.gz
OLD_FILES+=usr/share/man/man1/perlapi.1.gz
OLD_FILES+=usr/share/man/man1/perlintern.1.gz
OLD_FILES+=usr/share/man/man1/perlhack.1.gz
OLD_FILES+=usr/share/man/man1/perlbc.1.gz
OLD_FILES+=usr/share/man/man1/pkcs12.1.gz
OLD_FILES+=usr/share/man/man1/pkcs7.1.gz
OLD_FILES+=usr/share/man/man1/pkcs8.1.gz
OLD_FILES+=usr/share/man/man1/rand.1.gz
OLD_FILES+=usr/share/man/man1/req.1.gz
OLD_FILES+=usr/share/man/man1/rsa.1.gz
OLD_FILES+=usr/share/man/man1/rsautl.1.gz
OLD_FILES+=usr/share/man/man1/s_client.1.gz
OLD_FILES+=usr/share/man/man1/s_server.1.gz
OLD_FILES+=usr/share/man/man1/sess_id.1.gz
OLD_FILES+=usr/share/man/man1/smime.1.gz
OLD_FILES+=usr/share/man/man1/speed.1.gz
OLD_FILES+=usr/share/man/man1/spkac.1.gz
OLD_FILES+=usr/share/man/man1/verify.1.gz
OLD_FILES+=usr/share/man/man1/version.1.gz
OLD_FILES+=usr/share/man/man1/x509.1.gz
OLD_FILES+=usr/share/man/man3/SSL_COMP_add_compression_method.3.gz
OLD_FILES+=usr/share/man/man3/SSL_CTX_get_ex_new_index.3.gz
OLD_FILES+=usr/share/man/man3/archive_entry_dup.3.gz
OLD_FILES+=usr/share/man/man3/archive_entry_set_tartype.3.gz
OLD_FILES+=usr/share/man/man3/archive_entry_tartype.3.gz
OLD_FILES+=usr/share/man/man3/archive_read_data_into_file.3.gz
OLD_FILES+=usr/share/man/man3/archive_read_open_tar.3.gz
OLD_FILES+=usr/share/man/man3/archive_read_support_format_gnutar.3.gz
OLD_FILES+=usr/share/man/man3/cipher.3.gz
OLD_FILES+=usr/share/man/man3/des_cipher.3.gz
OLD_FILES+=usr/share/man/man3/des_setkey.3.gz
OLD_FILES+=usr/share/man/man3/encrypt.3.gz
OLD_FILES+=usr/share/man/man3/endvfsent.3.gz
OLD_FILES+=usr/share/man/man3/getvfsbytype.3.gz
OLD_FILES+=usr/share/man/man3/getvfsent.3.gz
OLD_FILES+=usr/share/man/man3/isnanf.3.gz
OLD_FILES+=usr/share/man/man3/libautofs.3.gz
OLD_FILES+=usr/share/man/man3/pthread_attr_setsstack.3.gz
OLD_FILES+=usr/share/man/man3/pthread_getcancelstate.3.gz
OLD_FILES+=usr/share/man/man3/pthread_mutexattr_getpshared.3.gz
OLD_FILES+=usr/share/man/man3/pthread_mutexattr_setpshared.3.gz
OLD_FILES+=usr/share/man/man3/set_assertion_failure_callback.3.gz
OLD_FILES+=usr/share/man/man3/setkey.3.gz
OLD_FILES+=usr/share/man/man3/setvfsent.3.gz
OLD_FILES+=usr/share/man/man3/ssl.3.gz
OLD_FILES+=usr/share/man/man3/vfsisloadable.3.gz
OLD_FILES+=usr/share/man/man3/vfsload.3.gz
OLD_FILES+=usr/share/man/man4/als4000.4.gz
OLD_FILES+=usr/share/man/man4/csa.4.gz
OLD_FILES+=usr/share/man/man4/emu10k1.4.gz
OLD_FILES+=usr/share/man/man4/euc.4.gz
OLD_FILES+=usr/share/man/man4/gusc.4.gz
OLD_FILES+=usr/share/man/man4/if_fwp.4.gz
OLD_FILES+=usr/share/man/man4/lomac.4.gz
OLD_FILES+=usr/share/man/man4/maestro3.4.gz
OLD_FILES+=usr/share/man/man4/raid.4.gz
OLD_FILES+=usr/share/man/man4/sbc.4.gz
OLD_FILES+=usr/share/man/man4/sd.4.gz
OLD_FILES+=usr/share/man/man4/snc.4.gz
OLD_FILES+=usr/share/man/man4/st.4.gz
OLD_FILES+=usr/share/man/man4/uaudio.4.gz
OLD_FILES+=usr/share/man/man4/utf2.4.gz
OLD_FILES+=usr/share/man/man4/vinumdebug.4.gz
OLD_FILES+=usr/share/man/man5/disklabel.5.gz
OLD_FILES+=usr/share/man/man5/dm.conf.5.gz
OLD_FILES+=usr/share/man/man5/ranlib.5.gz
OLD_FILES+=usr/share/man/man5/utf2.5.gz
OLD_FILES+=usr/share/man/man7/groff_mwww.7.gz
OLD_FILES+=usr/share/man/man7/mmroff.7.gz
OLD_FILES+=usr/share/man/man7/mwww.7.gz
OLD_FILES+=usr/share/man/man7/style.perl.7.gz
OLD_FILES+=usr/share/man/man8/apm.8.gz
OLD_FILES+=usr/share/man/man8/apmconf.8.gz
OLD_FILES+=usr/share/man/man8/apmd.8.gz
OLD_FILES+=usr/share/man/man8/dm.8.gz
OLD_FILES+=usr/share/man/man8/pam_ftp.8.gz
OLD_FILES+=usr/share/man/man8/pam_wheel.8.gz
OLD_FILES+=usr/share/man/man8/sconfig.8.gz
OLD_FILES+=usr/share/man/man8/ssl.8.gz
OLD_FILES+=usr/share/man/man8/wlconfig.8.gz
OLD_FILES+=usr/share/man/man9/CURSIG.9.gz
OLD_FILES+=usr/share/man/man9/VFS_INIT.9.gz
OLD_FILES+=usr/share/man/man9/at_exit.9.gz
OLD_FILES+=usr/share/man/man9/at_fork.9.gz
OLD_FILES+=usr/share/man/man9/cdevsw_add.9.gz
OLD_FILES+=usr/share/man/man9/cdevsw_remove.9.gz
OLD_FILES+=usr/share/man/man9/cv_waitq_empty.9.gz
OLD_FILES+=usr/share/man/man9/cv_waitq_remove.9.gz
OLD_FILES+=usr/share/man/man9/endtsleep.9.gz
OLD_FILES+=usr/share/man/man9/jumbo.9.gz
OLD_FILES+=usr/share/man/man9/jumbo_freem.9.gz
OLD_FILES+=usr/share/man/man9/jumbo_pg_alloc.9.gz
OLD_FILES+=usr/share/man/man9/jumbo_pg_free.9.gz
OLD_FILES+=usr/share/man/man9/jumbo_pg_steal.9.gz
OLD_FILES+=usr/share/man/man9/jumbo_phys_to_kva.9.gz
OLD_FILES+=usr/share/man/man9/jumbo_vm_init.9.gz
OLD_FILES+=usr/share/man/man9/mac_biba.9.gz
OLD_FILES+=usr/share/man/man9/mac_bsdextended.9.gz
OLD_FILES+=usr/share/man/man9/mono_time.9.gz
OLD_FILES+=usr/share/man/man9/p1003_1b.9.gz
OLD_FILES+=usr/share/man/man9/pmap_prefault.9.gz
OLD_FILES+=usr/share/man/man9/posix4.9.gz
OLD_FILES+=usr/share/man/man9/resource_query_name.9.gz
OLD_FILES+=usr/share/man/man9/resource_query_string.9.gz
OLD_FILES+=usr/share/man/man9/resource_query_unit.9.gz
OLD_FILES+=usr/share/man/man9/rm_at_exit.9.gz
OLD_FILES+=usr/share/man/man9/rm_at_fork.9.gz
OLD_FILES+=usr/share/man/man9/runtime.9.gz
OLD_FILES+=usr/share/man/man9/sleepinit.9.gz
OLD_FILES+=usr/share/man/man9/unsleep.9.gz
OLD_FILES+=usr/share/man/ja/man1/perl.1.gz
OLD_FILES+=usr/share/games/atc/Game_List
OLD_FILES+=usr/share/games/atc/Killer
OLD_FILES+=usr/share/games/atc/crossover
OLD_FILES+=usr/share/games/atc/default
OLD_FILES+=usr/share/games/atc/easy
OLD_FILES+=usr/share/games/atc/game_2
OLD_FILES+=usr/share/games/larn/larnmaze
OLD_FILES+=usr/share/games/larn/larnopts
OLD_FILES+=usr/share/games/larn/larn.help
OLD_FILES+=usr/share/games/quiz.db/africa
OLD_FILES+=usr/share/games/quiz.db/america
OLD_FILES+=usr/share/games/quiz.db/areas
OLD_FILES+=usr/share/games/quiz.db/arith
OLD_FILES+=usr/share/games/quiz.db/asia
OLD_FILES+=usr/share/games/quiz.db/babies
OLD_FILES+=usr/share/games/quiz.db/bard
OLD_FILES+=usr/share/games/quiz.db/chinese
OLD_FILES+=usr/share/games/quiz.db/collectives
OLD_FILES+=usr/share/games/quiz.db/ed
OLD_FILES+=usr/share/games/quiz.db/elements
OLD_FILES+=usr/share/games/quiz.db/europe
OLD_FILES+=usr/share/games/quiz.db/flowers
OLD_FILES+=usr/share/games/quiz.db/greek
OLD_FILES+=usr/share/games/quiz.db/inca
OLD_FILES+=usr/share/games/quiz.db/index
OLD_FILES+=usr/share/games/quiz.db/latin
OLD_FILES+=usr/share/games/quiz.db/locomotive
OLD_FILES+=usr/share/games/quiz.db/midearth
OLD_FILES+=usr/share/games/quiz.db/morse
OLD_FILES+=usr/share/games/quiz.db/murders
OLD_FILES+=usr/share/games/quiz.db/poetry
OLD_FILES+=usr/share/games/quiz.db/posneg
OLD_FILES+=usr/share/games/quiz.db/pres
OLD_FILES+=usr/share/games/quiz.db/province
OLD_FILES+=usr/share/games/quiz.db/seq-easy
OLD_FILES+=usr/share/games/quiz.db/seq-hard
OLD_FILES+=usr/share/games/quiz.db/sexes
OLD_FILES+=usr/share/games/quiz.db/sov
OLD_FILES+=usr/share/games/quiz.db/spell
OLD_FILES+=usr/share/games/quiz.db/state
OLD_FILES+=usr/share/games/quiz.db/trek
OLD_FILES+=usr/share/games/quiz.db/ucc
OLD_FILES+=usr/share/games/cribbage.instr
OLD_FILES+=usr/share/games/fish.instr
OLD_FILES+=usr/share/games/wump.info
OLD_FILES+=usr/games/hide/adventure
OLD_FILES+=usr/games/hide/arithmetic
OLD_FILES+=usr/games/hide/atc
OLD_FILES+=usr/games/hide/backgammon
OLD_FILES+=usr/games/hide/teachgammon
OLD_FILES+=usr/games/hide/battlestar
OLD_FILES+=usr/games/hide/bs
OLD_FILES+=usr/games/hide/canfield
OLD_FILES+=usr/games/hide/cribbage
OLD_FILES+=usr/games/hide/fish
OLD_FILES+=usr/games/hide/hack
OLD_FILES+=usr/games/hide/hangman
OLD_FILES+=usr/games/hide/larn
OLD_FILES+=usr/games/hide/mille
OLD_FILES+=usr/games/hide/phantasia
OLD_FILES+=usr/games/hide/quiz
OLD_FILES+=usr/games/hide/robots
OLD_FILES+=usr/games/hide/rogue
OLD_FILES+=usr/games/hide/sail
OLD_FILES+=usr/games/hide/snake
OLD_FILES+=usr/games/hide/trek
OLD_FILES+=usr/games/hide/worm
OLD_FILES+=usr/games/hide/wump
OLD_FILES+=usr/games/adventure
OLD_FILES+=usr/games/arithmetic
OLD_FILES+=usr/games/atc
OLD_FILES+=usr/games/backgammon
OLD_FILES+=usr/games/teachgammon
OLD_FILES+=usr/games/battlestar
OLD_FILES+=usr/games/bs
OLD_FILES+=usr/games/canfield
OLD_FILES+=usr/games/cfscores
OLD_FILES+=usr/games/cribbage
OLD_FILES+=usr/games/dm
OLD_FILES+=usr/games/fish
OLD_FILES+=usr/games/hack
OLD_FILES+=usr/games/hangman
OLD_FILES+=usr/games/larn
OLD_FILES+=usr/games/mille
OLD_FILES+=usr/games/phantasia
OLD_FILES+=usr/games/piano
OLD_FILES+=usr/games/pig
OLD_FILES+=usr/games/quiz
OLD_FILES+=usr/games/rain
OLD_FILES+=usr/games/robots
OLD_FILES+=usr/games/rogue
OLD_FILES+=usr/games/sail
OLD_FILES+=usr/games/snake
OLD_FILES+=usr/games/snscore
OLD_FILES+=usr/games/trek
OLD_FILES+=usr/games/wargames
OLD_FILES+=usr/games/worm
OLD_FILES+=usr/games/worms
OLD_FILES+=usr/games/wump
OLD_FILES+=sbin/mount_reiserfs
OLD_FILES+=usr/include/cam/cam_extend.h
OLD_FILES+=usr/include/dev/wi/wi_hostap.h
OLD_FILES+=usr/include/disktab.h
OLD_FILES+=usr/include/g++/FlexLexer.h
OLD_FILES+=usr/include/g++/PlotFile.h
OLD_FILES+=usr/include/g++/SFile.h
OLD_FILES+=usr/include/g++/_G_config.h
OLD_FILES+=usr/include/g++/algo.h
OLD_FILES+=usr/include/g++/algobase.h
OLD_FILES+=usr/include/g++/algorithm
OLD_FILES+=usr/include/g++/alloc.h
OLD_FILES+=usr/include/g++/bitset
OLD_FILES+=usr/include/g++/builtinbuf.h
OLD_FILES+=usr/include/g++/bvector.h
OLD_FILES+=usr/include/g++/cassert
OLD_FILES+=usr/include/g++/cctype
OLD_FILES+=usr/include/g++/cerrno
OLD_FILES+=usr/include/g++/cfloat
OLD_FILES+=usr/include/g++/ciso646
OLD_FILES+=usr/include/g++/climits
OLD_FILES+=usr/include/g++/clocale
OLD_FILES+=usr/include/g++/cmath
OLD_FILES+=usr/include/g++/complex
OLD_FILES+=usr/include/g++/complex.h
OLD_FILES+=usr/include/g++/csetjmp
OLD_FILES+=usr/include/g++/csignal
OLD_FILES+=usr/include/g++/cstdarg
OLD_FILES+=usr/include/g++/cstddef
OLD_FILES+=usr/include/g++/cstdio
OLD_FILES+=usr/include/g++/cstdlib
OLD_FILES+=usr/include/g++/cstring
OLD_FILES+=usr/include/g++/ctime
OLD_FILES+=usr/include/g++/cwchar
OLD_FILES+=usr/include/g++/cwctype
OLD_FILES+=usr/include/g++/defalloc.h
OLD_FILES+=usr/include/g++/deque
OLD_FILES+=usr/include/g++/deque.h
OLD_FILES+=usr/include/g++/editbuf.h
OLD_FILES+=usr/include/g++/exception
OLD_FILES+=usr/include/g++/floatio.h
OLD_FILES+=usr/include/g++/fstream
OLD_FILES+=usr/include/g++/fstream.h
OLD_FILES+=usr/include/g++/function.h
OLD_FILES+=usr/include/g++/functional
OLD_FILES+=usr/include/g++/hash_map
OLD_FILES+=usr/include/g++/hash_map.h
OLD_FILES+=usr/include/g++/hash_set
OLD_FILES+=usr/include/g++/hash_set.h
OLD_FILES+=usr/include/g++/hashtable.h
OLD_FILES+=usr/include/g++/heap.h
OLD_FILES+=usr/include/g++/indstream.h
OLD_FILES+=usr/include/g++/iolibio.h
OLD_FILES+=usr/include/g++/iomanip
OLD_FILES+=usr/include/g++/iomanip.h
OLD_FILES+=usr/include/g++/iosfwd
OLD_FILES+=usr/include/g++/iostdio.h
OLD_FILES+=usr/include/g++/iostream
OLD_FILES+=usr/include/g++/iostream.h
OLD_FILES+=usr/include/g++/iostreamP.h
OLD_FILES+=usr/include/g++/istream.h
OLD_FILES+=usr/include/g++/iterator
OLD_FILES+=usr/include/g++/iterator.h
OLD_FILES+=usr/include/g++/libio.h
OLD_FILES+=usr/include/g++/libioP.h
OLD_FILES+=usr/include/g++/list
OLD_FILES+=usr/include/g++/list.h
OLD_FILES+=usr/include/g++/map
OLD_FILES+=usr/include/g++/map.h
OLD_FILES+=usr/include/g++/memory
OLD_FILES+=usr/include/g++/multimap.h
OLD_FILES+=usr/include/g++/multiset.h
OLD_FILES+=usr/include/g++/new
OLD_FILES+=usr/include/g++/new.h
OLD_FILES+=usr/include/g++/numeric
OLD_FILES+=usr/include/g++/ostream.h
OLD_FILES+=usr/include/g++/pair.h
OLD_FILES+=usr/include/g++/parsestream.h
OLD_FILES+=usr/include/g++/pfstream.h
OLD_FILES+=usr/include/g++/procbuf.h
OLD_FILES+=usr/include/g++/pthread_alloc
OLD_FILES+=usr/include/g++/pthread_alloc.h
OLD_FILES+=usr/include/g++/queue
OLD_FILES+=usr/include/g++/rope
OLD_FILES+=usr/include/g++/rope.h
OLD_FILES+=usr/include/g++/ropeimpl.h
OLD_FILES+=usr/include/g++/set
OLD_FILES+=usr/include/g++/set.h
OLD_FILES+=usr/include/g++/slist
OLD_FILES+=usr/include/g++/slist.h
OLD_FILES+=usr/include/g++/sstream
OLD_FILES+=usr/include/g++/stack
OLD_FILES+=usr/include/g++/stack.h
OLD_FILES+=usr/include/g++/std/bastring.cc
OLD_FILES+=usr/include/g++/std/bastring.h
OLD_FILES+=usr/include/g++/std/complext.cc
OLD_FILES+=usr/include/g++/std/complext.h
OLD_FILES+=usr/include/g++/std/dcomplex.h
OLD_FILES+=usr/include/g++/std/fcomplex.h
OLD_FILES+=usr/include/g++/std/gslice.h
OLD_FILES+=usr/include/g++/std/gslice_array.h
OLD_FILES+=usr/include/g++/std/indirect_array.h
OLD_FILES+=usr/include/g++/std/ldcomplex.h
OLD_FILES+=usr/include/g++/std/mask_array.h
OLD_FILES+=usr/include/g++/std/slice.h
OLD_FILES+=usr/include/g++/std/slice_array.h
OLD_FILES+=usr/include/g++/std/std_valarray.h
OLD_FILES+=usr/include/g++/std/straits.h
OLD_FILES+=usr/include/g++/std/valarray_array.h
OLD_FILES+=usr/include/g++/std/valarray_array.tcc
OLD_FILES+=usr/include/g++/std/valarray_meta.h
OLD_FILES+=usr/include/g++/stdexcept
OLD_FILES+=usr/include/g++/stdiostream.h
OLD_FILES+=usr/include/g++/stl.h
OLD_FILES+=usr/include/g++/stl_algo.h
OLD_FILES+=usr/include/g++/stl_algobase.h
OLD_FILES+=usr/include/g++/stl_alloc.h
OLD_FILES+=usr/include/g++/stl_bvector.h
OLD_FILES+=usr/include/g++/stl_config.h
OLD_FILES+=usr/include/g++/stl_construct.h
OLD_FILES+=usr/include/g++/stl_deque.h
OLD_FILES+=usr/include/g++/stl_function.h
OLD_FILES+=usr/include/g++/stl_hash_fun.h
OLD_FILES+=usr/include/g++/stl_hash_map.h
OLD_FILES+=usr/include/g++/stl_hash_set.h
OLD_FILES+=usr/include/g++/stl_hashtable.h
OLD_FILES+=usr/include/g++/stl_heap.h
OLD_FILES+=usr/include/g++/stl_iterator.h
OLD_FILES+=usr/include/g++/stl_list.h
OLD_FILES+=usr/include/g++/stl_map.h
OLD_FILES+=usr/include/g++/stl_multimap.h
OLD_FILES+=usr/include/g++/stl_multiset.h
OLD_FILES+=usr/include/g++/stl_numeric.h
OLD_FILES+=usr/include/g++/stl_pair.h
OLD_FILES+=usr/include/g++/stl_queue.h
OLD_FILES+=usr/include/g++/stl_raw_storage_iter.h
OLD_FILES+=usr/include/g++/stl_relops.h
OLD_FILES+=usr/include/g++/stl_rope.h
OLD_FILES+=usr/include/g++/stl_set.h
OLD_FILES+=usr/include/g++/stl_slist.h
OLD_FILES+=usr/include/g++/stl_stack.h
OLD_FILES+=usr/include/g++/stl_tempbuf.h
OLD_FILES+=usr/include/g++/stl_tree.h
OLD_FILES+=usr/include/g++/stl_uninitialized.h
OLD_FILES+=usr/include/g++/stl_vector.h
OLD_FILES+=usr/include/g++/stream.h
OLD_FILES+=usr/include/g++/streambuf.h
OLD_FILES+=usr/include/g++/strfile.h
OLD_FILES+=usr/include/g++/string
OLD_FILES+=usr/include/g++/strstream
OLD_FILES+=usr/include/g++/strstream.h
OLD_FILES+=usr/include/g++/tempbuf.h
OLD_FILES+=usr/include/g++/tree.h
OLD_FILES+=usr/include/g++/type_traits.h
OLD_FILES+=usr/include/g++/typeinfo
OLD_FILES+=usr/include/g++/utility
OLD_FILES+=usr/include/g++/valarray
OLD_FILES+=usr/include/g++/vector
OLD_FILES+=usr/include/g++/vector.h
OLD_FILES+=usr/include/gmp.h
OLD_FILES+=usr/include/isc/assertions.h
OLD_FILES+=usr/include/isc/ctl.h
OLD_FILES+=usr/include/isc/dst.h
OLD_FILES+=usr/include/isc/eventlib.h
OLD_FILES+=usr/include/isc/heap.h
OLD_FILES+=usr/include/isc/irpmarshall.h
OLD_FILES+=usr/include/isc/list.h
OLD_FILES+=usr/include/isc/logging.h
OLD_FILES+=usr/include/isc/memcluster.h
OLD_FILES+=usr/include/isc/misc.h
OLD_FILES+=usr/include/isc/tree.h
OLD_FILES+=usr/include/machine/ansi.h
OLD_FILES+=usr/include/machine/apic.h
OLD_FILES+=usr/include/machine/asc_ioctl.h
OLD_FILES+=usr/include/machine/asnames.h
OLD_FILES+=usr/include/machine/bus_at386.h
OLD_FILES+=usr/include/machine/bus_memio.h
OLD_FILES+=usr/include/machine/bus_pc98.h
OLD_FILES+=usr/include/machine/bus_pio.h
OLD_FILES+=usr/include/machine/cdk.h
OLD_FILES+=usr/include/machine/comstats.h
OLD_FILES+=usr/include/machine/console.h
OLD_FILES+=usr/include/machine/critical.h
OLD_FILES+=usr/include/machine/cronyx.h
OLD_FILES+=usr/include/machine/dvcfg.h
OLD_FILES+=usr/include/machine/globaldata.h
OLD_FILES+=usr/include/machine/globals.h
OLD_FILES+=usr/include/machine/gsc.h
OLD_FILES+=usr/include/machine/i4b_isppp.h
OLD_FILES+=usr/include/machine/if_wavelan_ieee.h
OLD_FILES+=usr/include/machine/iic.h
OLD_FILES+=usr/include/machine/ioctl_ctx.h
OLD_FILES+=usr/include/machine/ioctl_fd.h
OLD_FILES+=usr/include/machine/ipl.h
OLD_FILES+=usr/include/machine/lock.h
OLD_FILES+=usr/include/machine/mouse.h
OLD_FILES+=usr/include/machine/mpapic.h
OLD_FILES+=usr/include/machine/mtpr.h
OLD_FILES+=usr/include/machine/pc/msdos.h
OLD_FILES+=usr/include/machine/physio_proc.h
OLD_FILES+=usr/include/machine/smb.h
OLD_FILES+=usr/include/machine/spigot.h
OLD_FILES+=usr/include/machine/types.h
OLD_FILES+=usr/include/machine/uc_device.h
OLD_FILES+=usr/include/machine/ultrasound.h
OLD_FILES+=usr/include/machine/wtio.h
OLD_FILES+=usr/include/msdosfs/bootsect.h
OLD_FILES+=usr/include/msdosfs/bpb.h
OLD_FILES+=usr/include/msdosfs/denode.h
OLD_FILES+=usr/include/msdosfs/direntry.h
OLD_FILES+=usr/include/msdosfs/fat.h
OLD_FILES+=usr/include/msdosfs/msdosfsmount.h
OLD_FILES+=usr/include/net/hostcache.h
OLD_FILES+=usr/include/net/if_faith.h
OLD_FILES+=usr/include/net/if_ieee80211.h
OLD_FILES+=usr/include/net/if_tunvar.h
OLD_FILES+=usr/include/net/intrq.h
OLD_FILES+=usr/include/netatm/kern_include.h
OLD_FILES+=usr/include/netinet/if_fddi.h
OLD_FILES+=usr/include/netinet/in_hostcache.h
OLD_FILES+=usr/include/netinet/ip_flow.h
OLD_FILES+=usr/include/netinet/ip_fw2.h
OLD_FILES+=usr/include/netinet6/in6_prefix.h
OLD_FILES+=usr/include/netns/idp.h
OLD_FILES+=usr/include/netns/idp_var.h
OLD_FILES+=usr/include/netns/ns.h
OLD_FILES+=usr/include/netns/ns_error.h
OLD_FILES+=usr/include/netns/ns_if.h
OLD_FILES+=usr/include/netns/ns_pcb.h
OLD_FILES+=usr/include/netns/sp.h
OLD_FILES+=usr/include/netns/spidp.h
OLD_FILES+=usr/include/netns/spp_debug.h
OLD_FILES+=usr/include/netns/spp_timer.h
OLD_FILES+=usr/include/netns/spp_var.h
OLD_FILES+=usr/include/nfs/nfs.h
OLD_FILES+=usr/include/nfs/nfsm_subs.h
OLD_FILES+=usr/include/nfs/nfsmount.h
OLD_FILES+=usr/include/nfs/nfsnode.h
OLD_FILES+=usr/include/nfs/nfsrtt.h
OLD_FILES+=usr/include/nfs/nfsrvcache.h
OLD_FILES+=usr/include/nfs/nfsv2.h
OLD_FILES+=usr/include/nfs/nqnfs.h
OLD_FILES+=usr/include/ntfs/ntfs.h
OLD_FILES+=usr/include/ntfs/ntfs_compr.h
OLD_FILES+=usr/include/ntfs/ntfs_ihash.h
OLD_FILES+=usr/include/ntfs/ntfs_inode.h
OLD_FILES+=usr/include/ntfs/ntfs_subr.h
OLD_FILES+=usr/include/ntfs/ntfs_vfsops.h
OLD_FILES+=usr/include/ntfs/ntfsmount.h
OLD_FILES+=usr/include/nwfs/nwfs.h
OLD_FILES+=usr/include/nwfs/nwfs_mount.h
OLD_FILES+=usr/include/nwfs/nwfs_node.h
OLD_FILES+=usr/include/nwfs/nwfs_subr.h
OLD_FILES+=usr/include/posix4/_semaphore.h
OLD_FILES+=usr/include/posix4/aio.h
OLD_FILES+=usr/include/posix4/ksem.h
OLD_FILES+=usr/include/posix4/mqueue.h
OLD_FILES+=usr/include/posix4/posix4.h
OLD_FILES+=usr/include/posix4/sched.h
OLD_FILES+=usr/include/posix4/semaphore.h
OLD_DIRS+=usr/include/posix4
OLD_FILES+=usr/include/security/_pam_compat.h
OLD_FILES+=usr/include/security/_pam_macros.h
OLD_FILES+=usr/include/security/_pam_types.h
OLD_FILES+=usr/include/security/pam_malloc.h
OLD_FILES+=usr/include/security/pam_misc.h
OLD_FILES+=usr/include/skey.h
OLD_FILES+=usr/include/strhash.h
OLD_FILES+=usr/include/struct.h
OLD_FILES+=usr/include/sys/_label.h
OLD_FILES+=usr/include/sys/_posix.h
OLD_FILES+=usr/include/sys/bus_private.h
OLD_FILES+=usr/include/sys/ccdvar.h
OLD_FILES+=usr/include/sys/diskslice.h
OLD_FILES+=usr/include/sys/dmap.h
OLD_FILES+=usr/include/sys/inttypes.h
OLD_FILES+=usr/include/sys/jumbo.h
OLD_FILES+=usr/include/sys/mac_policy.h
OLD_FILES+=usr/include/sys/pbioio.h
OLD_FILES+=usr/include/sys/syscall-hide.h
OLD_FILES+=usr/include/sys/tprintf.h
OLD_FILES+=usr/include/sys/vnioctl.h
OLD_FILES+=usr/include/sys/wormio.h
OLD_FILES+=usr/include/telnet.h
OLD_FILES+=usr/include/ufs/mfs/mfs_extern.h
OLD_FILES+=usr/include/ufs/mfs/mfsnode.h
OLD_FILES+=usr/include/values.h
OLD_FILES+=usr/include/vm/vm_zone.h
OLD_FILES+=usr/share/examples/etc/usbd.conf
OLD_FILES+=usr/share/examples/meteor/README
OLD_FILES+=usr/share/examples/meteor/rgb16.c
OLD_FILES+=usr/share/examples/meteor/rgb24.c
OLD_FILES+=usr/share/examples/meteor/test-n.c
OLD_FILES+=usr/share/examples/meteor/yuvpk.c
OLD_FILES+=usr/share/examples/meteor/yuvpl.c
OLD_FILES+=usr/share/examples/worm/README
OLD_FILES+=usr/share/examples/worm/makecdfs.sh
OLD_FILES+=usr/share/groff_font/devlj4/Makefile
OLD_FILES+=usr/share/groff_font/devlj4/text.map
OLD_FILES+=usr/share/groff_font/devlj4/special.map
OLD_FILES+=usr/share/misc/nslookup.help
OLD_FILES+=usr/share/sendmail/cf/feature/nodns.m4
OLD_FILES+=usr/share/syscons/keymaps/lat-amer.kbd
OLD_FILES+=usr/share/vi/catalog/ru_SU.KOI8-R
OLD_FILES+=usr/share/zoneinfo/Africa/Timbuktu
OLD_FILES+=usr/share/zoneinfo/Africa/Asmera
OLD_FILES+=usr/share/zoneinfo/America/Buenos_Aires
OLD_FILES+=usr/share/zoneinfo/America/Cordoba
OLD_FILES+=usr/share/zoneinfo/America/Jujuy
OLD_FILES+=usr/share/zoneinfo/America/Catamarca
OLD_FILES+=usr/share/zoneinfo/America/Mendoza
OLD_FILES+=usr/share/zoneinfo/America/Indianapolis
OLD_FILES+=usr/share/zoneinfo/America/Louisville
OLD_FILES+=usr/share/zoneinfo/America/Argentina/ComodRivadavia
OLD_FILES+=usr/share/zoneinfo/Atlantic/Faeroe
OLD_FILES+=usr/share/zoneinfo/Europe/Belfast
OLD_FILES+=usr/share/zoneinfo/Pacific/Yap
OLD_FILES+=usr/share/zoneinfo/SystemV/YST9
OLD_FILES+=usr/share/zoneinfo/SystemV/PST8
OLD_FILES+=usr/share/zoneinfo/SystemV/EST5EDT
OLD_FILES+=usr/share/zoneinfo/SystemV/CST6CDT
OLD_FILES+=usr/share/zoneinfo/SystemV/MST7MDT
OLD_FILES+=usr/share/zoneinfo/SystemV/PST8PDT
OLD_FILES+=usr/share/zoneinfo/SystemV/YST9YDT
OLD_FILES+=usr/share/zoneinfo/SystemV/HST10
OLD_FILES+=usr/share/zoneinfo/SystemV/MST7
OLD_FILES+=usr/share/zoneinfo/SystemV/EST5
OLD_FILES+=usr/share/zoneinfo/SystemV/AST4ADT
OLD_FILES+=usr/share/zoneinfo/SystemV/CST6
OLD_FILES+=usr/share/zoneinfo/SystemV/AST4
OLD_FILES+=usr/share/doc/ntp/accopt.htm
OLD_FILES+=usr/share/doc/ntp/assoc.htm
OLD_FILES+=usr/share/doc/ntp/audio.htm
OLD_FILES+=usr/share/doc/ntp/authopt.htm
OLD_FILES+=usr/share/doc/ntp/biblio.htm
OLD_FILES+=usr/share/doc/ntp/build.htm
OLD_FILES+=usr/share/doc/ntp/clockopt.htm
OLD_FILES+=usr/share/doc/ntp/config.htm
OLD_FILES+=usr/share/doc/ntp/confopt.htm
OLD_FILES+=usr/share/doc/ntp/copyright.htm
OLD_FILES+=usr/share/doc/ntp/debug.htm
OLD_FILES+=usr/share/doc/ntp/driver1.htm
OLD_FILES+=usr/share/doc/ntp/driver10.htm
OLD_FILES+=usr/share/doc/ntp/driver11.htm
OLD_FILES+=usr/share/doc/ntp/driver12.htm
OLD_FILES+=usr/share/doc/ntp/driver16.htm
OLD_FILES+=usr/share/doc/ntp/driver18.htm
OLD_FILES+=usr/share/doc/ntp/driver19.htm
OLD_FILES+=usr/share/doc/ntp/driver2.htm
OLD_FILES+=usr/share/doc/ntp/driver20.htm
OLD_FILES+=usr/share/doc/ntp/driver22.htm
OLD_FILES+=usr/share/doc/ntp/driver23.htm
OLD_FILES+=usr/share/doc/ntp/driver24.htm
OLD_FILES+=usr/share/doc/ntp/driver26.htm
OLD_FILES+=usr/share/doc/ntp/driver27.htm
OLD_FILES+=usr/share/doc/ntp/driver28.htm
OLD_FILES+=usr/share/doc/ntp/driver29.htm
OLD_FILES+=usr/share/doc/ntp/driver3.htm
OLD_FILES+=usr/share/doc/ntp/driver30.htm
OLD_FILES+=usr/share/doc/ntp/driver32.htm
OLD_FILES+=usr/share/doc/ntp/driver33.htm
OLD_FILES+=usr/share/doc/ntp/driver34.htm
OLD_FILES+=usr/share/doc/ntp/driver35.htm
OLD_FILES+=usr/share/doc/ntp/driver36.htm
OLD_FILES+=usr/share/doc/ntp/driver37.htm
OLD_FILES+=usr/share/doc/ntp/driver4.htm
OLD_FILES+=usr/share/doc/ntp/driver5.htm
OLD_FILES+=usr/share/doc/ntp/driver6.htm
OLD_FILES+=usr/share/doc/ntp/driver7.htm
OLD_FILES+=usr/share/doc/ntp/driver8.htm
OLD_FILES+=usr/share/doc/ntp/driver9.htm
OLD_FILES+=usr/share/doc/ntp/exec.htm
OLD_FILES+=usr/share/doc/ntp/extern.htm
OLD_FILES+=usr/share/doc/ntp/gadget.htm
OLD_FILES+=usr/share/doc/ntp/hints.htm
OLD_FILES+=usr/share/doc/ntp/howto.htm
OLD_FILES+=usr/share/doc/ntp/htmlprimer.htm
OLD_FILES+=usr/share/doc/ntp/index.htm
OLD_FILES+=usr/share/doc/ntp/kern.htm
OLD_FILES+=usr/share/doc/ntp/kernpps.htm
OLD_FILES+=usr/share/doc/ntp/ldisc.htm
OLD_FILES+=usr/share/doc/ntp/measure.htm
OLD_FILES+=usr/share/doc/ntp/miscopt.htm
OLD_FILES+=usr/share/doc/ntp/monopt.htm
OLD_FILES+=usr/share/doc/ntp/mx4200data.htm
OLD_FILES+=usr/share/doc/ntp/notes.htm
OLD_FILES+=usr/share/doc/ntp/ntpd.htm
OLD_FILES+=usr/share/doc/ntp/ntpdate.htm
OLD_FILES+=usr/share/doc/ntp/ntpdc.htm
OLD_FILES+=usr/share/doc/ntp/ntpq.htm
OLD_FILES+=usr/share/doc/ntp/ntptime.htm
OLD_FILES+=usr/share/doc/ntp/ntptrace.htm
OLD_FILES+=usr/share/doc/ntp/parsedata.htm
OLD_FILES+=usr/share/doc/ntp/parsenew.htm
OLD_FILES+=usr/share/doc/ntp/patches.htm
OLD_FILES+=usr/share/doc/ntp/porting.htm
OLD_FILES+=usr/share/doc/ntp/pps.htm
OLD_FILES+=usr/share/doc/ntp/prefer.htm
OLD_FILES+=usr/share/doc/ntp/qth.htm
OLD_FILES+=usr/share/doc/ntp/quick.htm
OLD_FILES+=usr/share/doc/ntp/rdebug.htm
OLD_FILES+=usr/share/doc/ntp/refclock.htm
OLD_FILES+=usr/share/doc/ntp/release.htm
OLD_FILES+=usr/share/doc/ntp/tickadj.htm
OLD_FILES+=usr/share/doc/papers/nqnfs.ascii.gz
OLD_FILES+=usr/share/doc/papers/px.ascii.gz
OLD_FILES+=usr/share/man/man3/exp10.3.gz
OLD_FILES+=usr/share/man/man3/exp10f.3.gz
OLD_FILES+=usr/share/man/man3/fpsetsticky.3.gz
OLD_FILES+=usr/share/man/man3/gss_krb5_compat_des3_mic.3.gz
OLD_FILES+=usr/share/man/man3/gss_krb5_copy_ccache.3.gz
OLD_FILES+=usr/share/man/man3/mac_is_present_np.3.gz
OLD_FILES+=usr/share/man/man3/mbmb.3.gz
OLD_FILES+=usr/share/man/man3/setrunelocale.3.gz
OLD_FILES+=usr/share/man/man5/usbd.conf.5.gz
.if ${TARGET_ARCH} != "i386" && ${TARGET_ARCH} != "amd64"
OLD_FILES+=usr/share/man/man8/boot_i386.8.gz
.endif
.if ${TARGET_ARCH} != "powerpc" && ${TARGET_ARCH} != "powerpc64" && ${TARGET_ARCH} != "sparc64"
OLD_FILES+=usr/share/man/man8/ofwdump.8.gz
.endif
OLD_FILES+=usr/share/man/man8/mount_reiserfs.8.gz
OLD_FILES+=usr/share/man/man9/VFS_START.9.gz
OLD_FILES+=usr/share/man/man9/cpu_critical_exit.9.gz
OLD_FILES+=usr/share/man/man9/cpu_critical_enter.9.gz
OLD_FILES+=usr/share/info/annotate.info.gz
OLD_FILES+=usr/share/info/tar.info.gz
OLD_FILES+=usr/share/bsnmp/defs/tree.def
OLD_FILES+=usr/share/bsnmp/defs/mibII_tree.def
OLD_FILES+=usr/share/bsnmp/defs/netgraph_tree.def
OLD_FILES+=usr/share/bsnmp/mibs/FOKUS-MIB.txt
OLD_FILES+=usr/share/bsnmp/mibs/BEGEMOT-MIB.txt
OLD_FILES+=usr/share/bsnmp/mibs/BEGEMOT-SNMPD.txt
OLD_FILES+=usr/share/bsnmp/mibs/BEGEMOT-NETGRAPH.txt
OLD_FILES+=usr/libdata/ldscripts/elf64_sparc.x
OLD_FILES+=usr/libdata/ldscripts/elf64_sparc.xbn
OLD_FILES+=usr/libdata/ldscripts/elf64_sparc.xn
OLD_FILES+=usr/libdata/ldscripts/elf64_sparc.xr
OLD_FILES+=usr/libdata/ldscripts/elf64_sparc.xs
OLD_FILES+=usr/libdata/ldscripts/elf64_sparc.xu
OLD_FILES+=usr/libdata/ldscripts/elf64_sparc.xc
OLD_FILES+=usr/libdata/ldscripts/elf64_sparc.xsc
OLD_FILES+=usr/libdata/ldscripts/elf32_sparc.x
OLD_FILES+=usr/libdata/ldscripts/elf32_sparc.xbn
OLD_FILES+=usr/libdata/ldscripts/elf32_sparc.xn
OLD_FILES+=usr/libdata/ldscripts/elf32_sparc.xr
OLD_FILES+=usr/libdata/ldscripts/elf32_sparc.xs
OLD_FILES+=usr/libdata/ldscripts/elf32_sparc.xu
OLD_FILES+=usr/libdata/ldscripts/elf32_sparc.xc
OLD_FILES+=usr/libdata/ldscripts/elf32_sparc.xsc
OLD_FILES+=usr/libdata/msdosfs/iso22dos
OLD_FILES+=usr/libdata/msdosfs/iso72dos
OLD_FILES+=usr/libdata/msdosfs/koi2dos
OLD_FILES+=usr/libdata/msdosfs/koi8u2dos
# The following files are *not* obsolete, they just don't get touched at
# install, so don't add them:
# - boot/loader.rc
# - usr/share/tmac/man.local
# - usr/share/tmac/mm/locale
# - usr/share/tmac/mm/se_locale
# - var/yp/Makefile
# 20071120: shared library version bump
OLD_LIBS+=usr/lib/libasn1.so.8
OLD_LIBS+=usr/lib/libgssapi.so.8
OLD_LIBS+=usr/lib/libgssapi_krb5.so.8
OLD_LIBS+=usr/lib/libhdb.so.8
OLD_LIBS+=usr/lib/libkadm5clnt.so.8
OLD_LIBS+=usr/lib/libkadm5srv.so.8
OLD_LIBS+=usr/lib/libkafs5.so.8
OLD_LIBS+=usr/lib/libkrb5.so.8
OLD_LIBS+=usr/lib/libobjc.so.2
OLD_LIBS+=usr/lib32/libgssapi.so.8
OLD_LIBS+=usr/lib32/libobjc.so.2
# 20070519: GCC 4.2
OLD_LIBS+=usr/lib/libg2c.a
OLD_LIBS+=usr/lib/libg2c.so
OLD_LIBS+=usr/lib/libg2c.so.2
OLD_LIBS+=usr/lib/libg2c_p.a
OLD_LIBS+=usr/lib/libgcc_pic.a
OLD_LIBS+=usr/lib32/libg2c.a
OLD_LIBS+=usr/lib32/libg2c.so
OLD_LIBS+=usr/lib32/libg2c.so.2
OLD_LIBS+=usr/lib32/libg2c_p.a
OLD_LIBS+=usr/lib32/libgcc_pic.a
# 20060729: OpenSSL 0.9.7e -> 0.9.8b upgrade
OLD_LIBS+=lib/libcrypto.so.4
OLD_LIBS+=usr/lib/libssl.so.4
OLD_LIBS+=usr/lib32/libcrypto.so.4
OLD_LIBS+=usr/lib32/libssl.so.4
# 20060521: gethostbyaddr(3) ABI change
OLD_LIBS+=usr/lib/libroken.so.8
OLD_LIBS+=lib/libatm.so.3
OLD_LIBS+=lib/libc.so.6
OLD_LIBS+=lib/libutil.so.5
OLD_LIBS+=usr/lib32/libatm.so.3
OLD_LIBS+=usr/lib32/libc.so.6
OLD_LIBS+=usr/lib32/libutil.so.5
# 20060413: shared library moved to /usr/lib
OLD_LIBS+=lib/libgpib.so.1
# 20060413: libpcap.so.4 moved to /lib/
OLD_LIBS+=usr/lib/libpcap.so.4
# 20060412: libpthread.so.2 moved to /lib/
OLD_LIBS+=usr/lib/libpthread.so.2
# 20060127: revert libdisk to static-only
OLD_LIBS+=usr/lib/libdisk.so.3
# 20051027: libc_r discontinued (removed 20101113)
OLD_LIBS+=usr/lib/libc_r.a
OLD_LIBS+=usr/lib/libc_r.so
OLD_LIBS+=usr/lib/libc_r.so.7
OLD_LIBS+=usr/lib/libc_r_p.a
OLD_LIBS+=usr/lib32/libc_r.a
OLD_LIBS+=usr/lib32/libc_r.so
OLD_LIBS+=usr/lib32/libc_r.so.7
OLD_LIBS+=usr/lib32/libc_r_p.a
# 20050722: bump for 6.0-RELEASE
OLD_LIBS+=lib/libalias.so.4
OLD_LIBS+=lib/libatm.so.2
OLD_LIBS+=lib/libbegemot.so.1
OLD_LIBS+=lib/libbsdxml.so.1
OLD_LIBS+=lib/libbsnmp.so.2
OLD_LIBS+=lib/libc.so.5
OLD_LIBS+=lib/libcam.so.2
OLD_LIBS+=lib/libcrypt.so.2
OLD_LIBS+=lib/libcrypto.so.3
OLD_LIBS+=lib/libdevstat.so.4
OLD_LIBS+=lib/libedit.so.4
OLD_LIBS+=lib/libgeom.so.2
OLD_LIBS+=lib/libgpib.so.0
OLD_LIBS+=lib/libipsec.so.1
OLD_LIBS+=lib/libipx.so.2
OLD_LIBS+=lib/libkiconv.so.1
OLD_LIBS+=lib/libkvm.so.2
OLD_LIBS+=lib/libm.so.3
OLD_LIBS+=lib/libmd.so.2
OLD_LIBS+=lib/libncurses.so.5
OLD_LIBS+=lib/libreadline.so.5
OLD_LIBS+=lib/libsbuf.so.2
OLD_LIBS+=lib/libufs.so.2
OLD_LIBS+=lib/libutil.so.4
OLD_LIBS+=lib/libz.so.2
OLD_LIBS+=usr/lib/libarchive.so.1
OLD_LIBS+=usr/lib/libasn1.so.7
OLD_LIBS+=usr/lib/libbluetooth.so.1
OLD_LIBS+=usr/lib/libbz2.so.1
OLD_LIBS+=usr/lib/libc_r.so.5
OLD_LIBS+=usr/lib/libcalendar.so.2
OLD_LIBS+=usr/lib/libcom_err.so.2
OLD_LIBS+=usr/lib/libdevinfo.so.2
OLD_LIBS+=usr/lib/libdialog.so.4
OLD_LIBS+=usr/lib/libfetch.so.3
OLD_LIBS+=usr/lib/libform.so.2
OLD_LIBS+=usr/lib/libftpio.so.5
OLD_LIBS+=usr/lib/libg2c.so.1
OLD_LIBS+=usr/lib/libgnuregex.so.2
OLD_LIBS+=usr/lib/libgssapi.so.7
OLD_LIBS+=usr/lib/libhdb.so.7
OLD_LIBS+=usr/lib/libhistory.so.5
OLD_LIBS+=usr/lib/libkadm5clnt.so.7
OLD_LIBS+=usr/lib/libkadm5srv.so.7
OLD_LIBS+=usr/lib/libkafs5.so.7
OLD_LIBS+=usr/lib/libkrb5.so.7
OLD_LIBS+=usr/lib/libmagic.so.1
OLD_LIBS+=usr/lib/libmenu.so.2
OLD_LIBS+=usr/lib/libmilter.so.2
OLD_LIBS+=usr/lib/libmp.so.4
OLD_LIBS+=usr/lib/libncp.so.1
OLD_LIBS+=usr/lib/libnetgraph.so.1
OLD_LIBS+=usr/lib/libngatm.so.1
OLD_LIBS+=usr/lib/libobjc.so.1
OLD_LIBS+=usr/lib/libopie.so.3
OLD_LIBS+=usr/lib/libpam.so.2
OLD_LIBS+=usr/lib/libpanel.so.2
OLD_LIBS+=usr/lib/libpcap.so.3
OLD_LIBS+=usr/lib/libpmc.so.2
OLD_LIBS+=usr/lib/libpthread.so.1
OLD_LIBS+=usr/lib/libradius.so.1
OLD_LIBS+=usr/lib/libroken.so.7
OLD_LIBS+=usr/lib/librpcsvc.so.2
OLD_LIBS+=usr/lib/libsdp.so.1
OLD_LIBS+=usr/lib/libsmb.so.1
OLD_LIBS+=usr/lib/libssh.so.2
OLD_LIBS+=usr/lib/libssl.so.3
OLD_LIBS+=usr/lib/libstdc++.so.4
OLD_LIBS+=usr/lib/libtacplus.so.1
OLD_LIBS+=usr/lib/libthr.so.1
OLD_LIBS+=usr/lib/libthread_db.so.1
OLD_LIBS+=usr/lib/libugidfw.so.1
OLD_LIBS+=usr/lib/libusbhid.so.1
OLD_LIBS+=usr/lib/libvgl.so.3
OLD_LIBS+=usr/lib/libwrap.so.3
OLD_LIBS+=usr/lib/libypclnt.so.1
OLD_LIBS+=usr/lib/pam_chroot.so.2
OLD_LIBS+=usr/lib/pam_deny.so.2
OLD_LIBS+=usr/lib/pam_echo.so.2
OLD_LIBS+=usr/lib/pam_exec.so.2
OLD_LIBS+=usr/lib/pam_ftpusers.so.2
OLD_LIBS+=usr/lib/pam_group.so.2
OLD_LIBS+=usr/lib/pam_guest.so.2
OLD_LIBS+=usr/lib/pam_krb5.so.2
OLD_LIBS+=usr/lib/pam_ksu.so.2
OLD_LIBS+=usr/lib/pam_lastlog.so.2
OLD_LIBS+=usr/lib/pam_login_access.so.2
OLD_LIBS+=usr/lib/pam_nologin.so.2
OLD_LIBS+=usr/lib/pam_opie.so.2
OLD_LIBS+=usr/lib/pam_opieaccess.so.2
OLD_LIBS+=usr/lib/pam_passwdqc.so.2
OLD_LIBS+=usr/lib/pam_permit.so.2
OLD_LIBS+=usr/lib/pam_radius.so.2
OLD_LIBS+=usr/lib/pam_rhosts.so.2
OLD_LIBS+=usr/lib/pam_rootok.so.2
OLD_LIBS+=usr/lib/pam_securetty.so.2
OLD_LIBS+=usr/lib/pam_self.so.2
OLD_LIBS+=usr/lib/pam_ssh.so.2
OLD_LIBS+=usr/lib/pam_tacplus.so.2
OLD_LIBS+=usr/lib/pam_unix.so.2
OLD_LIBS+=usr/lib/snmp_atm.so.3
OLD_LIBS+=usr/lib/snmp_mibII.so.3
OLD_LIBS+=usr/lib/snmp_netgraph.so.3
OLD_LIBS+=usr/lib/snmp_pf.so.3
# 200505XX: ?
OLD_LIBS+=usr/lib/snmp_atm.so.2
OLD_LIBS+=usr/lib/snmp_mibII.so.2
OLD_LIBS+=usr/lib/snmp_netgraph.so.2
OLD_LIBS+=usr/lib/snmp_pf.so.2
# 2005XXXX: not ready for primetime yet
OLD_LIBS+=usr/lib/libautofs.so.1
# 200411XX: libxpg4 removal
OLD_LIBS+=usr/lib/libxpg4.so.3
# 200410XX: libm compatibility fix
OLD_LIBS+=lib/libm.so.2
# 20041001: version bump
OLD_LIBS+=lib/libreadline.so.4
OLD_LIBS+=usr/lib/libhistory.so.4
OLD_LIBS+=usr/lib/libopie.so.2
OLD_LIBS+=usr/lib/libpcap.so.2
# 20040925: bind9 import
OLD_LIBS+=usr/lib/libisc.so.1
# 200408XX
OLD_LIBS+=usr/lib/snmp_netgraph.so.1
# 200404XX
OLD_LIBS+=usr/lib/libsnmp.so.1
OLD_LIBS+=usr/lib/snmp_mibII.so.1
# 200309XX
OLD_LIBS+=usr/lib/libasn1.so.6
OLD_LIBS+=usr/lib/libhdb.so.6
OLD_LIBS+=usr/lib/libkadm5clnt.so.6
OLD_LIBS+=usr/lib/libkadm5srv.so.6
OLD_LIBS+=usr/lib/libkrb5.so.6
OLD_LIBS+=usr/lib/libroken.so.6
# 200304XX
OLD_LIBS+=usr/lib/libc.so.4
OLD_LIBS+=usr/lib/libc_r.so.4
OLD_LIBS+=usr/lib/libdevstat.so.2
OLD_LIBS+=usr/lib/libedit.so.3
OLD_LIBS+=usr/lib/libgmp.so.3
OLD_LIBS+=usr/lib/libmp.so.3
OLD_LIBS+=usr/lib/libpam.so.1
OLD_LIBS+=usr/lib/libposix1e.so.2
OLD_LIBS+=usr/lib/libskey.so.2
OLD_LIBS+=usr/lib/libusbhid.so.0
OLD_LIBS+=usr/lib/libvgl.so.2
# 200302XX
OLD_LIBS+=usr/lib/libacl.so.3
OLD_LIBS+=usr/lib/libasn1.so.5
OLD_LIBS+=usr/lib/libcrypto.so.2
OLD_LIBS+=usr/lib/libgssapi.so.5
OLD_LIBS+=usr/lib/libhdb.so.5
OLD_LIBS+=usr/lib/libkadm.so.3
OLD_LIBS+=usr/lib/libkadm5clnt.so.5
OLD_LIBS+=usr/lib/libkadm5srv.so.5
OLD_LIBS+=usr/lib/libkafs.so.3
OLD_LIBS+=usr/lib/libkafs5.so.5
OLD_LIBS+=usr/lib/libkdb.so.3
OLD_LIBS+=usr/lib/libkrb.so.3
OLD_LIBS+=usr/lib/libroken.so.
OLD_LIBS+=usr/lib/libssl.so.2
OLD_LIBS+=usr/lib/pam_kerberosIV.so
# 200208XX
OLD_LIBS+=usr/lib/libgssapi.so.4
# 200203XX
OLD_LIBS+=usr/lib/libss.so.3
OLD_LIBS+=usr/lib/libusb.so.0
# 200112XX
OLD_LIBS+=usr/lib/libfetch.so.2
# 200110XX
OLD_LIBS+=usr/lib/libgssapi.so.3
# 200104XX
OLD_LIBS+=usr/lib/libdescrypt.so.2
OLD_LIBS+=usr/lib/libscrypt.so.2
# 200102XX
OLD_LIBS+=usr/lib/libcrypto.so.1
OLD_LIBS+=usr/lib/libssl.so.1
# 200009XX
OLD_LIBS+=usr/lib/libRSAglue.so.1
OLD_LIBS+=usr/lib/librsaINTL.so.1
OLD_LIBS+=usr/lib/librsaUSA.so.1
# 200006XX
OLD_LIBS+=usr/lib/libalias.so.3
OLD_LIBS+=usr/lib/libfetch.so.1
OLD_LIBS+=usr/lib/libipsec.so.0
# 200005XX
OLD_LIBS+=usr/lib/libxpg4.so.2
# 200002XX
OLD_LIBS+=usr/lib/libc.so.3
OLD_LIBS+=usr/lib/libcurses.so.2
OLD_LIBS+=usr/lib/libdialog.so.3
OLD_LIBS+=usr/lib/libedit.so.2
OLD_LIBS+=usr/lib/libf2c.so.2
OLD_LIBS+=usr/lib/libftpio.so.4
OLD_LIBS+=usr/lib/libg++.so.4
OLD_LIBS+=usr/lib/libhistory.so.3
OLD_LIBS+=usr/lib/libmytinfo.so.2
OLD_LIBS+=usr/lib/libncurses.so.3
OLD_LIBS+=usr/lib/libreadline.so.3
OLD_LIBS+=usr/lib/libss.so.2
OLD_LIBS+=usr/lib/libtermcap.so.2
OLD_LIBS+=usr/lib/libutil.so.2
OLD_LIBS+=usr/lib/libvgl.so.1
OLD_LIBS+=usr/lib/libwrap.so.2
# 199909XX
OLD_LIBS+=usr/lib/libc_r.so.3
# ???
OLD_LIBS+=usr/lib/libarchive.so.2
OLD_LIBS+=usr/lib/libbsnmp.so.1
OLD_LIBS+=usr/lib/libc_r.so.6
OLD_LIBS+=usr/lib32/libarchive.so.2
OLD_LIBS+=usr/lib32/libc_r.so.6
OLD_LIBS+=usr/lib/libcipher.so.2
OLD_LIBS+=usr/lib/libgssapi.so.6
OLD_LIBS+=usr/lib/libkse.so.1
OLD_LIBS+=usr/lib/liblwres.so.3
OLD_LIBS+=usr/lib/pam_ftp.so.2
# 20131013: Removal of the ATF tools
OLD_DIRS+=etc/atf
OLD_DIRS+=usr/share/examples/atf
OLD_DIRS+=usr/share/xml/atf
OLD_DIRS+=usr/share/xml
OLD_DIRS+=usr/share/xsl/atf
OLD_DIRS+=usr/share/xsl
# 20040925: bind9 import
OLD_DIRS+=usr/share/doc/bind/html
OLD_DIRS+=usr/share/doc/bind/misc
OLD_DIRS+=usr/share/doc/bind/
# ???
OLD_DIRS+=usr/include/g++/std
OLD_DIRS+=usr/include/msdosfs
OLD_DIRS+=usr/include/ntfs
OLD_DIRS+=usr/include/nwfs
OLD_DIRS+=usr/include/ufs/mfs
# 20011001: UUCP migration to ports
OLD_DIRS+=usr/libexec/uucp
.include "tools/build/mk/OptionalObsoleteFiles.inc"
Index: head/UPDATING
===================================================================
--- head/UPDATING (revision 265924)
+++ head/UPDATING (revision 265925)
@@ -1,904 +1,907 @@
Updating Information for FreeBSD current users
This file is maintained and copyrighted by M. Warner Losh <imp@freebsd.org>.
See end of file for further details. For commonly done items, please see the
COMMON ITEMS: section later in the file. These instructions assume that you
basically know what you are doing. If not, then please consult the FreeBSD
handbook:
http://www.freebsd.org/doc/en_US.ISO8859-1/books/handbook/makeworld.html
Items affecting the ports and packages system can be found in
/usr/ports/UPDATING. Please read that file before running portupgrade.
NOTE: FreeBSD has switched from gcc to clang. If you have trouble bootstrapping
from older versions of FreeBSD, try WITHOUT_CLANG and WITH_GCC to bootstrap to
the tip of head, and then rebuild without this option. The bootstrap process from
older version of current across the gcc/clang cutover is a bit fragile.
NOTE TO PEOPLE WHO THINK THAT FreeBSD 11.x IS SLOW:
FreeBSD 11.x has many debugging features turned on, in both the kernel
and userland. These features attempt to detect incorrect use of
system primitives, and encourage loud failure through extra sanity
checking and fail stop semantics. They also substantially impact
system performance. If you want to do performance measurement,
benchmarking, and optimization, you'll want to turn them off. This
includes various WITNESS- related kernel options, INVARIANTS, malloc
debugging flags in userland, and various verbose features in the
kernel. Many developers choose to disable these features on build
machines to maximize performance. (To completely disable malloc
debugging, define MALLOC_PRODUCTION in /etc/make.conf, or to merely
disable the most expensive debugging functionality run
"ln -s 'abort:false,junk:false' /etc/malloc.conf".)
+20140512:
+ Clang and llvm have been upgraded to 3.4.1 release.
+
20140508:
We bogusly installed src.opts.mk in /usr/share/mk. This file should
be removed to avoid issues in the future (and has been added to
ObsoleteFiles.inc).
20140505:
/etc/src.conf now affects only builds of the FreeBSD src tree. In the
past, it affected all builds that used the bsd.*.mk files. The old
behavior was a bug, but people may have relied upon it. To get this
behavior back, you can .include /etc/src.conf from /etc/make.conf
(which is still global and isn't changed). This also changes the
behavior of incremental builds inside the tree of individual
directories. Set MAKESYSPATH to ".../share/mk" to do that.
Although this has survived make universe and some upgrade scenarios,
other upgrade scenarios may have broken. At least one form of
temporary breakage was fixed with MAKESYSPATH settings for buildworld
as well...
One side effect of all this cleaning up is that bsd.compiler.mk
is no longer implicitly included by bsd.own.mk. If you wish to
use COMPILER_TYPE, you must now explicitly include bsd.compiler.mk
as well.
20140430:
The lindev device has been removed since /dev/full has been made a
standard device. __FreeBSD_version has been bumped.
20140418:
The YES_HESIOD knob has been removed. It has been obsolete for
a decade. Please move to using WITH_HESIOD instead or your builds
will silently lack HESIOD.
20140405:
The uart(4) driver has been changed with respect to its handling
of the low-level console. Previously the uart(4) driver prevented
any process from changing the baudrate or the CLOCAL and HUPCL
control flags. By removing the restrictions, operators can make
changes to the serial console port without having to reboot.
However, when getty(8) is started on the serial device that is
associated with the low-level console, a misconfigured terminal
line in /etc/ttys will now have a real impact.
Before upgrading the kernel, make sure that /etc/ttys has the
serial console device configured as 3wire without baudrate to
preserve the previous behaviour. E.g:
ttyu0 "/usr/libexec/getty 3wire" vt100 on secure
20140306:
Support for libwrap (TCP wrappers) in rpcbind was disabled by default
to improve performance. To re-enable it, if needed, run rpcbind
with command line option -W.
20140226:
Switched back to the GPL dtc compiler due to updates in the upstream
dts files not being supported by the BSDL dtc compiler. You will need
to rebuild your kernel toolchain to pick up the new compiler. Core dumps
may result while building dtb files during a kernel build if you fail
to do so. Set WITHOUT_GPL_DTC if you require the BSDL compiler.
20140216:
Clang and llvm have been upgraded to 3.4 release.
20140216:
The nve(4) driver has been removed. Please use the nfe(4) driver
for NVIDIA nForce MCP Ethernet adapters instead.
20140212:
An ABI incompatibility crept into the libc++ 3.4 import in r261283.
This could cause certain C++ applications using shared libraries built
against the previous version of libc++ to crash. The incompatibility
has now been fixed, but any C++ applications or shared libraries built
between r261283 and r261801 should be recompiled.
20140204:
OpenSSH will now ignore errors caused by kernel lacking of Capsicum
capability mode support. Please note that enabling the feature in
kernel is still highly recommended.
20140131:
OpenSSH is now built with sandbox support, and will use sandbox as
the default privilege separation method. This requires Capsicum
capability mode support in kernel.
20140128:
The libelf and libdwarf libraries have been updated to newer
versions from upstream. Shared library version numbers for
these two libraries were bumped. Any ports or binaries
requiring these two libraries should be recompiled.
__FreeBSD_version is bumped to 1100006.
20140110:
If a Makefile in a tests/ directory was auto-generating a Kyuafile
instead of providing an explicit one, this would prevent such
Makefile from providing its own Kyuafile in the future during
NO_CLEAN builds. This has been fixed in the Makefiles but manual
intervention is needed to clean an objdir if you use NO_CLEAN:
# find /usr/obj -name Kyuafile | xargs rm -f
20131213:
The behavior of gss_pseudo_random() for the krb5 mechanism
has changed, for applications requesting a longer random string
than produced by the underlying enctype's pseudo-random() function.
In particular, the random string produced from a session key of
enctype aes256-cts-hmac-sha1-96 or aes256-cts-hmac-sha1-96 will
be different at the 17th octet and later, after this change.
The counter used in the PRF+ construction is now encoded as a
big-endian integer in accordance with RFC 4402.
__FreeBSD_version is bumped to 1100004.
20131108:
The WITHOUT_ATF build knob has been removed and its functionality
has been subsumed into the more generic WITHOUT_TESTS. If you were
using the former to disable the build of the ATF libraries, you
should change your settings to use the latter.
20131025:
The default version of mtree is nmtree which is obtained from
NetBSD. The output is generally the same, but may vary
slightly. If you found you need identical output adding
"-F freebsd9" to the command line should do the trick. For the
time being, the old mtree is available as fmtree.
20131014:
libbsdyml has been renamed to libyaml and moved to /usr/lib/private.
This will break ports-mgmt/pkg. Rebuild the port, or upgrade to pkg
1.1.4_8 and verify bsdyml not linked in, before running "make
delete-old-libs":
# make -C /usr/ports/ports-mgmt/pkg build deinstall install clean
or
# pkg install pkg; ldd /usr/local/sbin/pkg | grep bsdyml
20131010:
The rc.d/jail script has been updated to support jail(8)
configuration file. The "jail_<jname>_*" rc.conf(5) variables
for per-jail configuration are automatically converted to
/var/run/jail.<jname>.conf before the jail(8) utility is invoked.
This is transparently backward compatible. See below about some
incompatibilities and rc.conf(5) manual page for more details.
These variables are now deprecated in favor of jail(8) configuration
file. One can use "rc.d/jail config <jname>" command to generate
a jail(8) configuration file in /var/run/jail.<jname>.conf without
running the jail(8) utility. The default pathname of the
configuration file is /etc/jail.conf and can be specified by
using $jail_conf or $jail_<jname>_conf variables.
Please note that jail_devfs_ruleset accepts an integer at
this moment. Please consider to rewrite the ruleset name
with an integer.
20130930:
BIND has been removed from the base system. If all you need
is a local resolver, simply enable and start the local_unbound
service instead. Otherwise, several versions of BIND are
available in the ports tree. The dns/bind99 port is one example.
With this change, nslookup(1) and dig(1) are no longer in the base
system. Users should instead use host(1) and drill(1) which are
in the base system. Alternatively, nslookup and dig can
be obtained by installing the dns/bind-tools port.
20130916:
With the addition of unbound(8), a new unbound user is now
required during installworld. "mergemaster -p" can be used to
add the user prior to installworld, as documented in the handbook.
20130911:
OpenSSH is now built with DNSSEC support, and will by default
silently trust signed SSHFP records. This can be controlled with
the VerifyHostKeyDNS client configuration setting. DNSSEC support
can be disabled entirely with the WITHOUT_LDNS option in src.conf.
20130906:
The GNU Compiler Collection and C++ standard library (libstdc++)
are no longer built by default on platforms where clang is the system
compiler. You can enable them with the WITH_GCC and WITH_GNUCXX
options in src.conf.
20130905:
The PROCDESC kernel option is now part of the GENERIC kernel
configuration and is required for the rwhod(8) to work.
If you are using custom kernel configuration, you should include
'options PROCDESC'.
20130905:
The API and ABI related to the Capsicum framework was modified
in backward incompatible way. The userland libraries and programs
have to be recompiled to work with the new kernel. This includes the
following libraries and programs, but the whole buildworld is
advised: libc, libprocstat, dhclient, tcpdump, hastd, hastctl,
kdump, procstat, rwho, rwhod, uniq.
20130903:
AES-NI intrinsic support has been added to gcc. The AES-NI module
has been updated to use this support. A new gcc is required to build
the aesni module on both i386 and amd64.
20130821:
The PADLOCK_RNG and RDRAND_RNG kernel options are now devices.
Thus "device padlock_rng" and "device rdrand_rng" should be
used instead of "options PADLOCK_RNG" & "options RDRAND_RNG".
20130813:
WITH_ICONV has been split into two feature sets. WITH_ICONV now
enables just the iconv* functionality and is now on by default.
WITH_LIBICONV_COMPAT enables the libiconv api and link time
compatability. Set WITHOUT_ICONV to build the old way.
If you have been using WITH_ICONV before, you will very likely
need to turn on WITH_LIBICONV_COMPAT.
20130806:
INVARIANTS option now enables DEBUG for code with OpenSolaris and
Illumos origin, including ZFS. If you have INVARIANTS in your
kernel configuration, then there is no need to set DEBUG or ZFS_DEBUG
explicitly.
DEBUG used to enable witness(9) tracking of OpenSolaris (mostly ZFS)
locks if WITNESS option was set. Because that generated a lot of
witness(9) reports and all of them were believed to be false
positives, this is no longer done. New option OPENSOLARIS_WITNESS
can be used to achieve the previous behavior.
20130806:
Timer values in IPv6 data structures now use time_uptime instead
of time_second. Although this is not a user-visible functional
change, userland utilities which directly use them---ndp(8),
rtadvd(8), and rtsold(8) in the base system---need to be updated
to r253970 or later.
20130802:
find -delete can now delete the pathnames given as arguments,
instead of only files found below them or if the pathname did
not contain any slashes. Formerly, the following error message
would result:
find: -delete: <path>: relative path potentially not safe
Deleting the pathnames given as arguments can be prevented
without error messages using -mindepth 1 or by changing
directory and passing "." as argument to find. This works in the
old as well as the new version of find.
20130726:
Behavior of devfs rules path matching has been changed.
Pattern is now always matched against fully qualified devfs
path and slash characters must be explicitly matched by
slashes in pattern (FNM_PATHNAME). Rulesets involving devfs
subdirectories must be reviewed.
20130716:
The default ARM ABI has changed to the ARM EABI. The old ABI is
incompatible with the ARM EABI and all programs and modules will
need to be rebuilt to work with a new kernel.
To keep using the old ABI ensure the WITHOUT_ARM_EABI knob is set.
NOTE: Support for the old ABI will be removed in the future and
users are advised to upgrade.
20130709:
pkg_install has been disconnected from the build if you really need it
you should add WITH_PKGTOOLS in your src.conf(5).
20130709:
Most of network statistics structures were changed to be able
keep 64-bits counters. Thus all tools, that work with networking
statistics, must be rebuilt (netstat(1), bsnmpd(1), etc.)
20130629:
Fix targets that run multiple make's to use && rather than ;
so that subsequent steps depend on success of previous.
NOTE: if building 'universe' with -j* on stable/8 or stable/9
it would be better to start the build using bmake, to avoid
overloading the machine.
20130618:
Fix a bug that allowed a tracing process (e.g. gdb) to write
to a memory-mapped file in the traced process's address space
even if neither the traced process nor the tracing process had
write access to that file.
20130615:
CVS has been removed from the base system. An exact copy
of the code is available from the devel/cvs port.
20130613:
Some people report the following error after the switch to bmake:
make: illegal option -- J
usage: make [-BPSXeiknpqrstv] [-C directory] [-D variable]
...
*** [buildworld] Error code 2
this likely due to an old instance of make in
${MAKEPATH} (${MAKEOBJDIRPREFIX}${.CURDIR}/make.${MACHINE})
which src/Makefile will use that blindly, if it exists, so if
you see the above error:
rm -rf `make -V MAKEPATH`
should resolve it.
20130516:
Use bmake by default.
Whereas before one could choose to build with bmake via
-DWITH_BMAKE one must now use -DWITHOUT_BMAKE to use the old
make. The goal is to remove these knobs for 10-RELEASE.
It is worth noting that bmake (like gmake) treats the command
line as the unit of failure, rather than statements within the
command line. Thus '(cd some/where && dosomething)' is safer
than 'cd some/where; dosomething'. The '()' allows consistent
behavior in parallel build.
20130429:
Fix a bug that allows NFS clients to issue READDIR on files.
20130426:
The WITHOUT_IDEA option has been removed because
the IDEA patent expired.
20130426:
The sysctl which controls TRIM support under ZFS has been renamed
from vfs.zfs.trim_disable -> vfs.zfs.trim.enabled and has been
enabled by default.
20130425:
The mergemaster command now uses the default MAKEOBJDIRPREFIX
rather than creating it's own in the temporary directory in
order allow access to bootstrapped versions of tools such as
install and mtree. When upgrading from version of FreeBSD where
the install command does not support -l, you will need to
install a new mergemaster command if mergemaster -p is required.
This can be accomplished with the command (cd src/usr.sbin/mergemaster
&& make install).
20130404:
Legacy ATA stack, disabled and replaced by new CAM-based one since
FreeBSD 9.0, completely removed from the sources. Kernel modules
atadisk and atapi*, user-level tools atacontrol and burncd are
removed. Kernel option `options ATA_CAM` is now permanently enabled
and removed.
20130319:
SOCK_CLOEXEC and SOCK_NONBLOCK flags have been added to socket(2)
and socketpair(2). Software, in particular Kerberos, may
automatically detect and use these during building. The resulting
binaries will not work on older kernels.
20130308:
CTL_DISABLE has also been added to the sparc64 GENERIC (for further
information, see the respective 20130304 entry).
20130304:
Recent commits to callout(9) changed the size of struct callout,
so the KBI is probably heavily disturbed. Also, some functions
in callout(9)/sleep(9)/sleepqueue(9)/condvar(9) KPIs were replaced
by macros. Every kernel module using it won't load, so rebuild
is requested.
The ctl device has been re-enabled in GENERIC for i386 and amd64,
but does not initialize by default (because of the new CTL_DISABLE
option) to save memory. To re-enable it, remove the CTL_DISABLE
option from the kernel config file or set kern.cam.ctl.disable=0
in /boot/loader.conf.
20130301:
The ctl device has been disabled in GENERIC for i386 and amd64.
This was done due to the extra memory being allocated at system
initialisation time by the ctl driver which was only used if
a CAM target device was created. This makes a FreeBSD system
unusable on 128MB or less of RAM.
20130208:
A new compression method (lz4) has been merged to -HEAD. Please
refer to zpool-features(7) for more information.
Please refer to the "ZFS notes" section of this file for information
on upgrading boot ZFS pools.
20130129:
A BSD-licensed patch(1) variant has been added and is installed
as bsdpatch, being the GNU version the default patch.
To inverse the logic and use the BSD-licensed one as default,
while having the GNU version installed as gnupatch, rebuild
and install world with the WITH_BSD_PATCH knob set.
20130121:
Due to the use of the new -l option to install(1) during build
and install, you must take care not to directly set the INSTALL
make variable in your /etc/make.conf, /etc/src.conf, or on the
command line. If you wish to use the -C flag for all installs
you may be able to add INSTALL+=-C to /etc/make.conf or
/etc/src.conf.
20130118:
The install(1) option -M has changed meaning and now takes an
argument that is a file or path to append logs to. In the
unlikely event that -M was the last option on the command line
and the command line contained at least two files and a target
directory the first file will have logs appended to it. The -M
option served little practical purpose in the last decade so its
use is expected to be extremely rare.
20121223:
After switching to Clang as the default compiler some users of ZFS
on i386 systems started to experience stack overflow kernel panics.
Please consider using 'options KSTACK_PAGES=4' in such configurations.
20121222:
GEOM_LABEL now mangles label names read from file system metadata.
Mangling affect labels containing spaces, non-printable characters,
'%' or '"'. Device names in /etc/fstab and other places may need to
be updated.
20121217:
By default, only the 10 most recent kernel dumps will be saved. To
restore the previous behaviour (no limit on the number of kernel dumps
stored in the dump directory) add the following line to /etc/rc.conf:
savecore_flags=""
20121201:
With the addition of auditdistd(8), a new auditdistd user is now
required during installworld. "mergemaster -p" can be used to
add the user prior to installworld, as documented in the handbook.
20121117:
The sin6_scope_id member variable in struct sockaddr_in6 is now
filled by the kernel before passing the structure to the userland via
sysctl or routing socket. This means the KAME-specific embedded scope
id in sin6_addr.s6_addr[2] is always cleared in userland application.
This behavior can be controlled by net.inet6.ip6.deembed_scopeid.
__FreeBSD_version is bumped to 1000025.
20121105:
On i386 and amd64 systems WITH_CLANG_IS_CC is now the default.
This means that the world and kernel will be compiled with clang
and that clang will be installed as /usr/bin/cc, /usr/bin/c++,
and /usr/bin/cpp. To disable this behavior and revert to building
with gcc, compile with WITHOUT_CLANG_IS_CC. Really old versions
of current may need to bootstrap WITHOUT_CLANG first if the clang
build fails (its compatibility window doesn't extend to the 9 stable
branch point).
20121102:
The IPFIREWALL_FORWARD kernel option has been removed. Its
functionality now turned on by default.
20121023:
The ZERO_COPY_SOCKET kernel option has been removed and
split into SOCKET_SEND_COW and SOCKET_RECV_PFLIP.
NB: SOCKET_SEND_COW uses the VM page based copy-on-write
mechanism which is not safe and may result in kernel crashes.
NB: The SOCKET_RECV_PFLIP mechanism is useless as no current
driver supports disposeable external page sized mbuf storage.
Proper replacements for both zero-copy mechanisms are under
consideration and will eventually lead to complete removal
of the two kernel options.
20121023:
The IPv4 network stack has been converted to network byte
order. The following modules need to be recompiled together
with kernel: carp(4), divert(4), gif(4), siftr(4), gre(4),
pf(4), ipfw(4), ng_ipfw(4), stf(4).
20121022:
Support for non-MPSAFE filesystems was removed from VFS. The
VFS_VERSION was bumped, all filesystem modules shall be
recompiled.
20121018:
All the non-MPSAFE filesystems have been disconnected from
the build. The full list includes: codafs, hpfs, ntfs, nwfs,
portalfs, smbfs, xfs.
20121016:
The interface cloning API and ABI has changed. The following
modules need to be recompiled together with kernel:
ipfw(4), pfsync(4), pflog(4), usb(4), wlan(4), stf(4),
vlan(4), disc(4), edsc(4), if_bridge(4), gif(4), tap(4),
faith(4), epair(4), enc(4), tun(4), if_lagg(4), gre(4).
20121015:
The sdhci driver was split in two parts: sdhci (generic SD Host
Controller logic) and sdhci_pci (actual hardware driver).
No kernel config modifications are required, but if you
load sdhc as a module you must switch to sdhci_pci instead.
20121014:
Import the FUSE kernel and userland support into base system.
20121013:
The GNU sort(1) program has been removed since the BSD-licensed
sort(1) has been the default for quite some time and no serious
problems have been reported. The corresponding WITH_GNU_SORT
knob has also gone.
20121006:
The pfil(9) API/ABI for AF_INET family has been changed. Packet
filtering modules: pf(4), ipfw(4), ipfilter(4) need to be recompiled
with new kernel.
20121001:
The net80211(4) ABI has been changed to allow for improved driver
PS-POLL and power-save support. All wireless drivers need to be
recompiled to work with the new kernel.
20120913:
The random(4) support for the VIA hardware random number
generator (`PADLOCK') is no longer enabled unconditionally.
Add the padlock_rng device in the custom kernel config if
needed. The GENERIC kernels on i386 and amd64 do include the
device, so the change only affects the custom kernel
configurations.
20120908:
The pf(4) packet filter ABI has been changed. pfctl(8) and
snmp_pf module need to be recompiled to work with new kernel.
20120828:
A new ZFS feature flag "com.delphix:empty_bpobj" has been merged
to -HEAD. Pools that have empty_bpobj in active state can not be
imported read-write with ZFS implementations that do not support
this feature. For more information read the zpool-features(5)
manual page.
20120727:
The sparc64 ZFS loader has been changed to no longer try to auto-
detect ZFS providers based on diskN aliases but now requires these
to be explicitly listed in the OFW boot-device environment variable.
20120712:
The OpenSSL has been upgraded to 1.0.1c. Any binaries requiring
libcrypto.so.6 or libssl.so.6 must be recompiled. Also, there are
configuration changes. Make sure to merge /etc/ssl/openssl.cnf.
20120712:
The following sysctls and tunables have been renamed for consistency
with other variables:
kern.cam.da.da_send_ordered -> kern.cam.da.send_ordered
kern.cam.ada.ada_send_ordered -> kern.cam.ada.send_ordered
20120628:
The sort utility has been replaced with BSD sort. For now, GNU sort
is also available as "gnusort" or the default can be set back to
GNU sort by setting WITH_GNU_SORT. In this case, BSD sort will be
installed as "bsdsort".
20120611:
A new version of ZFS (pool version 5000) has been merged to -HEAD.
Starting with this version the old system of ZFS pool versioning
is superseded by "feature flags". This concept enables forward
compatibility against certain future changes in functionality of ZFS
pools. The first read-only compatible "feature flag" for ZFS pools
is named "com.delphix:async_destroy". For more information
read the new zpool-features(5) manual page.
Please refer to the "ZFS notes" section of this file for information
on upgrading boot ZFS pools.
20120417:
The malloc(3) implementation embedded in libc now uses sources imported
as contrib/jemalloc. The most disruptive API change is to
/etc/malloc.conf. If your system has an old-style /etc/malloc.conf,
delete it prior to installworld, and optionally re-create it using the
new format after rebooting. See malloc.conf(5) for details
(specifically the TUNING section and the "opt.*" entries in the MALLCTL
NAMESPACE section).
20120328:
Big-endian MIPS TARGET_ARCH values no longer end in "eb". mips64eb
is now spelled mips64. mipsn32eb is now spelled mipsn32. mipseb is
now spelled mips. This is to aid compatibility with third-party
software that expects this naming scheme in uname(3). Little-endian
settings are unchanged. If you are updating a big-endian mips64 machine
from before this change, you may need to set MACHINE_ARCH=mips64 in
your environment before the new build system will recognize your machine.
20120306:
Disable by default the option VFS_ALLOW_NONMPSAFE for all supported
platforms.
20120229:
Now unix domain sockets behave "as expected" on nullfs(5). Previously
nullfs(5) did not pass through all behaviours to the underlying layer,
as a result if we bound to a socket on the lower layer we could connect
only to the lower path; if we bound to the upper layer we could connect
only to the upper path. The new behavior is one can connect to both the
lower and the upper paths regardless what layer path one binds to.
20120211:
The getifaddrs upgrade path broken with 20111215 has been restored.
If you have upgraded in between 20111215 and 20120209 you need to
recompile libc again with your kernel. You still need to recompile
world to be able to configure CARP but this restriction already
comes from 20111215.
20120114:
The set_rcvar() function has been removed from /etc/rc.subr. All
base and ports rc.d scripts have been updated, so if you have a
port installed with a script in /usr/local/etc/rc.d you can either
hand-edit the rcvar= line, or reinstall the port.
An easy way to handle the mass-update of /etc/rc.d:
rm /etc/rc.d/* && mergemaster -i
20120109:
panic(9) now stops other CPUs in the SMP systems, disables interrupts
on the current CPU and prevents other threads from running.
This behavior can be reverted using the kern.stop_scheduler_on_panic
tunable/sysctl.
The new behavior can be incompatible with kern.sync_on_panic.
20111215:
The carp(4) facility has been changed significantly. Configuration
of the CARP protocol via ifconfig(8) has changed, as well as format
of CARP events submitted to devd(8) has changed. See manual pages
for more information. The arpbalance feature of carp(4) is currently
not supported anymore.
Size of struct in_aliasreq, struct in6_aliasreq has changed. User
utilities using SIOCAIFADDR, SIOCAIFADDR_IN6, e.g. ifconfig(8),
need to be recompiled.
20111122:
The acpi_wmi(4) status device /dev/wmistat has been renamed to
/dev/wmistat0.
20111108:
The option VFS_ALLOW_NONMPSAFE option has been added in order to
explicitely support non-MPSAFE filesystems.
It is on by default for all supported platform at this present
time.
20111101:
The broken amd(4) driver has been replaced with esp(4) in the amd64,
i386 and pc98 GENERIC kernel configuration files.
20110930:
sysinstall has been removed
20110923:
The stable/9 branch created in subversion. This corresponds to the
RELENG_9 branch in CVS.
COMMON ITEMS:
General Notes
-------------
Avoid using make -j when upgrading. While generally safe, there are
sometimes problems using -j to upgrade. If your upgrade fails with
-j, please try again without -j. From time to time in the past there
have been problems using -j with buildworld and/or installworld. This
is especially true when upgrading between "distant" versions (eg one
that cross a major release boundary or several minor releases, or when
several months have passed on the -current branch).
Sometimes, obscure build problems are the result of environment
poisoning. This can happen because the make utility reads its
environment when searching for values for global variables. To run
your build attempts in an "environmental clean room", prefix all make
commands with 'env -i '. See the env(1) manual page for more details.
When upgrading from one major version to another it is generally best
to upgrade to the latest code in the currently installed branch first,
then do an upgrade to the new branch. This is the best-tested upgrade
path, and has the highest probability of being successful. Please try
this approach before reporting problems with a major version upgrade.
When upgrading a live system, having a root shell around before
installing anything can help undo problems. Not having a root shell
around can lead to problems if pam has changed too much from your
starting point to allow continued authentication after the upgrade.
ZFS notes
---------
When upgrading the boot ZFS pool to a new version, always follow
these two steps:
1.) recompile and reinstall the ZFS boot loader and boot block
(this is part of "make buildworld" and "make installworld")
2.) update the ZFS boot block on your boot drive
The following example updates the ZFS boot block on the first
partition (freebsd-boot) of a GPT partitioned drive ad0:
"gpart bootcode -p /boot/gptzfsboot -i 1 ad0"
Non-boot pools do not need these updates.
To build a kernel
-----------------
If you are updating from a prior version of FreeBSD (even one just
a few days old), you should follow this procedure. It is the most
failsafe as it uses a /usr/obj tree with a fresh mini-buildworld,
make kernel-toolchain
make -DALWAYS_CHECK_MAKE buildkernel KERNCONF=YOUR_KERNEL_HERE
make -DALWAYS_CHECK_MAKE installkernel KERNCONF=YOUR_KERNEL_HERE
To test a kernel once
---------------------
If you just want to boot a kernel once (because you are not sure
if it works, or if you want to boot a known bad kernel to provide
debugging information) run
make installkernel KERNCONF=YOUR_KERNEL_HERE KODIR=/boot/testkernel
nextboot -k testkernel
To just build a kernel when you know that it won't mess you up
--------------------------------------------------------------
This assumes you are already running a CURRENT system. Replace
${arch} with the architecture of your machine (e.g. "i386",
"arm", "amd64", "ia64", "pc98", "sparc64", "powerpc", "mips", etc).
cd src/sys/${arch}/conf
config KERNEL_NAME_HERE
cd ../compile/KERNEL_NAME_HERE
make depend
make
make install
If this fails, go to the "To build a kernel" section.
To rebuild everything and install it on the current system.
-----------------------------------------------------------
# Note: sometimes if you are running current you gotta do more than
# is listed here if you are upgrading from a really old current.
<make sure you have good level 0 dumps>
make buildworld
make kernel KERNCONF=YOUR_KERNEL_HERE
[1]
<reboot in single user> [3]
mergemaster -Fp [5]
make installworld
mergemaster -Fi [4]
make delete-old [6]
<reboot>
To cross-install current onto a separate partition
--------------------------------------------------
# In this approach we use a separate partition to hold
# current's root, 'usr', and 'var' directories. A partition
# holding "/", "/usr" and "/var" should be about 2GB in
# size.
<make sure you have good level 0 dumps>
<boot into -stable>
make buildworld
make buildkernel KERNCONF=YOUR_KERNEL_HERE
<maybe newfs current's root partition>
<mount current's root partition on directory ${CURRENT_ROOT}>
make installworld DESTDIR=${CURRENT_ROOT} -DDB_FROM_SRC
make distribution DESTDIR=${CURRENT_ROOT} # if newfs'd
make installkernel KERNCONF=YOUR_KERNEL_HERE DESTDIR=${CURRENT_ROOT}
cp /etc/fstab ${CURRENT_ROOT}/etc/fstab # if newfs'd
<edit ${CURRENT_ROOT}/etc/fstab to mount "/" from the correct partition>
<reboot into current>
<do a "native" rebuild/install as described in the previous section>
<maybe install compatibility libraries from ports/misc/compat*>
<reboot>
To upgrade in-place from stable to current
----------------------------------------------
<make sure you have good level 0 dumps>
make buildworld [9]
make kernel KERNCONF=YOUR_KERNEL_HERE [8]
[1]
<reboot in single user> [3]
mergemaster -Fp [5]
make installworld
mergemaster -Fi [4]
make delete-old [6]
<reboot>
Make sure that you've read the UPDATING file to understand the
tweaks to various things you need. At this point in the life
cycle of current, things change often and you are on your own
to cope. The defaults can also change, so please read ALL of
the UPDATING entries.
Also, if you are tracking -current, you must be subscribed to
freebsd-current@freebsd.org. Make sure that before you update
your sources that you have read and understood all the recent
messages there. If in doubt, please track -stable which has
much fewer pitfalls.
[1] If you have third party modules, such as vmware, you
should disable them at this point so they don't crash your
system on reboot.
[3] From the bootblocks, boot -s, and then do
fsck -p
mount -u /
mount -a
cd src
adjkerntz -i # if CMOS is wall time
Also, when doing a major release upgrade, it is required that
you boot into single user mode to do the installworld.
[4] Note: This step is non-optional. Failure to do this step
can result in a significant reduction in the functionality of the
system. Attempting to do it by hand is not recommended and those
that pursue this avenue should read this file carefully, as well
as the archives of freebsd-current and freebsd-hackers mailing lists
for potential gotchas. The -U option is also useful to consider.
See mergemaster(8) for more information.
[5] Usually this step is a noop. However, from time to time
you may need to do this if you get unknown user in the following
step. It never hurts to do it all the time. You may need to
install a new mergemaster (cd src/usr.sbin/mergemaster && make
install) after the buildworld before this step if you last updated
from current before 20130425 or from -stable before 20130430.
[6] This only deletes old files and directories. Old libraries
can be deleted by "make delete-old-libs", but you have to make
sure that no program is using those libraries anymore.
[8] In order to have a kernel that can run the 4.x binaries needed to
do an installworld, you must include the COMPAT_FREEBSD4 option in
your kernel. Failure to do so may leave you with a system that is
hard to boot to recover. A similar kernel option COMPAT_FREEBSD5 is
required to run the 5.x binaries on more recent kernels. And so on
for COMPAT_FREEBSD6 and COMPAT_FREEBSD7.
Make sure that you merge any new devices from GENERIC since the
last time you updated your kernel config file.
[9] When checking out sources, you must include the -P flag to have
cvs prune empty directories.
If CPUTYPE is defined in your /etc/make.conf, make sure to use the
"?=" instead of the "=" assignment operator, so that buildworld can
override the CPUTYPE if it needs to.
MAKEOBJDIRPREFIX must be defined in an environment variable, and
not on the command line, or in /etc/make.conf. buildworld will
warn if it is improperly defined.
FORMAT:
This file contains a list, in reverse chronological order, of major
breakages in tracking -current. It is not guaranteed to be a complete
list of such breakages, and only contains entries since October 10, 2007.
If you need to see UPDATING entries from before that date, you will need
to fetch an UPDATING file from an older FreeBSD release.
Copyright information:
Copyright 1998-2009 M. Warner Losh. All Rights Reserved.
Redistribution, publication, translation and use, with or without
modification, in full or in part, in any form or format of this
document are permitted without further permission from the author.
THIS DOCUMENT IS PROVIDED BY WARNER LOSH ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL WARNER LOSH BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
Contact Warner Losh if you have any questions about your use of
this document.
$FreeBSD$
Index: head/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
===================================================================
--- head/contrib/llvm/include/llvm/IR/IntrinsicsX86.td (revision 265924)
+++ head/contrib/llvm/include/llvm/IR/IntrinsicsX86.td (revision 265925)
@@ -1,3125 +1,3125 @@
//===- IntrinsicsX86.td - Defines X86 intrinsics -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines all of the X86-specific intrinsics.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Interrupt traps
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_int : Intrinsic<[], [llvm_i8_ty]>;
}
//===----------------------------------------------------------------------===//
// 3DNow!
let TargetPrefix = "x86" in {
def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// 3DNow! extensions
let TargetPrefix = "x86" in {
def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnowa_pswapd :
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// SSE1
// Arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_add_ss : GCCBuiltin<"__builtin_ia32_addss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_sub_ss : GCCBuiltin<"__builtin_ia32_subss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_mul_ss : GCCBuiltin<"__builtin_ia32_mulss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_div_ss : GCCBuiltin<"__builtin_ia32_divss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_sse_sqrt_ps : GCCBuiltin<"__builtin_ia32_sqrtps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_sse_rcp_ps : GCCBuiltin<"__builtin_ia32_rcpps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_sse_rsqrt_ss : GCCBuiltin<"__builtin_ia32_rsqrtss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_sse_rsqrt_ps : GCCBuiltin<"__builtin_ia32_rsqrtps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_sse_min_ss : GCCBuiltin<"__builtin_ia32_minss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_min_ps : GCCBuiltin<"__builtin_ia32_minps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_max_ss : GCCBuiltin<"__builtin_ia32_maxss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_max_ps : GCCBuiltin<"__builtin_ia32_maxps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
}
// Comparison ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_sse_cmp_ps : GCCBuiltin<"__builtin_ia32_cmpps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_comilt_ss : GCCBuiltin<"__builtin_ia32_comilt">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_comile_ss : GCCBuiltin<"__builtin_ia32_comile">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_comigt_ss : GCCBuiltin<"__builtin_ia32_comigt">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_comige_ss : GCCBuiltin<"__builtin_ia32_comige">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_comineq_ss : GCCBuiltin<"__builtin_ia32_comineq">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_ucomieq_ss : GCCBuiltin<"__builtin_ia32_ucomieq">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_ucomilt_ss : GCCBuiltin<"__builtin_ia32_ucomilt">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_ucomile_ss : GCCBuiltin<"__builtin_ia32_ucomile">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_ucomigt_ss : GCCBuiltin<"__builtin_ia32_ucomigt">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_ucomige_ss : GCCBuiltin<"__builtin_ia32_ucomige">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_ucomineq_ss : GCCBuiltin<"__builtin_ia32_ucomineq">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
}
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_cvtss2si : GCCBuiltin<"__builtin_ia32_cvtss2si">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_cvtss2si64 : GCCBuiltin<"__builtin_ia32_cvtss2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_cvttss2si : GCCBuiltin<"__builtin_ia32_cvttss2si">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_cvttss2si64 : GCCBuiltin<"__builtin_ia32_cvttss2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_cvtsi2ss : GCCBuiltin<"__builtin_ia32_cvtsi2ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse_cvtsi642ss : GCCBuiltin<"__builtin_ia32_cvtsi642ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i64_ty], [IntrNoMem]>;
def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
}
// SIMD store ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_storeu_ps : GCCBuiltin<"__builtin_ia32_storeups">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v4f32_ty], [IntrReadWriteArgMem]>;
}
// Cacheability support ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_sfence : GCCBuiltin<"__builtin_ia32_sfence">,
Intrinsic<[], [], []>;
}
// Control register.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_stmxcsr :
Intrinsic<[], [llvm_ptr_ty], []>;
def int_x86_sse_ldmxcsr :
Intrinsic<[], [llvm_ptr_ty], []>;
}
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_movmsk_ps : GCCBuiltin<"__builtin_ia32_movmskps">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// SSE2
// FP arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_add_sd : GCCBuiltin<"__builtin_ia32_addsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_sub_sd : GCCBuiltin<"__builtin_ia32_subsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_mul_sd : GCCBuiltin<"__builtin_ia32_mulsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_div_sd : GCCBuiltin<"__builtin_ia32_divsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_sse2_sqrt_pd : GCCBuiltin<"__builtin_ia32_sqrtpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_min_pd : GCCBuiltin<"__builtin_ia32_minpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_max_sd : GCCBuiltin<"__builtin_ia32_maxsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_max_pd : GCCBuiltin<"__builtin_ia32_maxpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
}
// FP comparison ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_sse2_cmp_pd : GCCBuiltin<"__builtin_ia32_cmppd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_comilt_sd : GCCBuiltin<"__builtin_ia32_comisdlt">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_comile_sd : GCCBuiltin<"__builtin_ia32_comisdle">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_comigt_sd : GCCBuiltin<"__builtin_ia32_comisdgt">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_comige_sd : GCCBuiltin<"__builtin_ia32_comisdge">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_comineq_sd : GCCBuiltin<"__builtin_ia32_comisdneq">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_ucomieq_sd : GCCBuiltin<"__builtin_ia32_ucomisdeq">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_ucomilt_sd : GCCBuiltin<"__builtin_ia32_ucomisdlt">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_ucomile_sd : GCCBuiltin<"__builtin_ia32_ucomisdle">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_ucomigt_sd : GCCBuiltin<"__builtin_ia32_ucomisdgt">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_ucomige_sd : GCCBuiltin<"__builtin_ia32_ucomisdge">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_ucomineq_sd : GCCBuiltin<"__builtin_ia32_ucomisdneq">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
}
// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_sse2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
}
// Integer shift ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_psll_w : GCCBuiltin<"__builtin_ia32_psllw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_psll_d : GCCBuiltin<"__builtin_ia32_pslld128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi128_byteshift">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi128_byteshift">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
}
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvttpd2dq : GCCBuiltin<"__builtin_ia32_cvttpd2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtpd2ps : GCCBuiltin<"__builtin_ia32_cvtpd2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtps2pd : GCCBuiltin<"__builtin_ia32_cvtps2pd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvttsd2si : GCCBuiltin<"__builtin_ia32_cvttsd2si">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_cvttsd2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsi2sd : GCCBuiltin<"__builtin_ia32_cvtsi2sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsi642sd : GCCBuiltin<"__builtin_ia32_cvtsi642sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtss2sd : GCCBuiltin<"__builtin_ia32_cvtss2sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse_cvttpd2pi: GCCBuiltin<"__builtin_ia32_cvttpd2pi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse_cvtpi2pd : GCCBuiltin<"__builtin_ia32_cvtpi2pd">,
Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
}
// SIMD store ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_storeu_pd : GCCBuiltin<"__builtin_ia32_storeupd">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v2f64_ty], [IntrReadWriteArgMem]>;
def int_x86_sse2_storeu_dq : GCCBuiltin<"__builtin_ia32_storedqu">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v16i8_ty], [IntrReadWriteArgMem]>;
def int_x86_sse2_storel_dq : GCCBuiltin<"__builtin_ia32_storelv4si">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v4i32_ty], [IntrReadWriteArgMem]>;
}
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_packssdw_128 : GCCBuiltin<"__builtin_ia32_packssdw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_movmsk_pd : GCCBuiltin<"__builtin_ia32_movmskpd">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_pmovmskb_128 : GCCBuiltin<"__builtin_ia32_pmovmskb128">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">,
Intrinsic<[], [llvm_v16i8_ty,
llvm_v16i8_ty, llvm_ptr_ty], []>;
def int_x86_sse2_clflush : GCCBuiltin<"__builtin_ia32_clflush">,
Intrinsic<[], [llvm_ptr_ty], []>;
def int_x86_sse2_lfence : GCCBuiltin<"__builtin_ia32_lfence">,
Intrinsic<[], [], []>;
def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">,
Intrinsic<[], [], []>;
}
//===----------------------------------------------------------------------===//
// SSE3
// Addition / subtraction ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse3_addsub_ps : GCCBuiltin<"__builtin_ia32_addsubps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse3_addsub_pd : GCCBuiltin<"__builtin_ia32_addsubpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
}
// Horizontal ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse3_hadd_ps : GCCBuiltin<"__builtin_ia32_haddps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse3_hadd_pd : GCCBuiltin<"__builtin_ia32_haddpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse3_hsub_ps : GCCBuiltin<"__builtin_ia32_hsubps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse3_hsub_pd : GCCBuiltin<"__builtin_ia32_hsubpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
}
// Specialized unaligned load.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse3_ldu_dq : GCCBuiltin<"__builtin_ia32_lddqu">,
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
// Thread synchronization ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse3_monitor : GCCBuiltin<"__builtin_ia32_monitor">,
Intrinsic<[], [llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], []>;
def int_x86_sse3_mwait : GCCBuiltin<"__builtin_ia32_mwait">,
Intrinsic<[], [llvm_i32_ty,
llvm_i32_ty], []>;
}
//===----------------------------------------------------------------------===//
// SSSE3
// Horizontal arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_ssse3_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phadd_w_128 : GCCBuiltin<"__builtin_ia32_phaddw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_ssse3_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phadd_d_128 : GCCBuiltin<"__builtin_ia32_phaddd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_ssse3_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phadd_sw_128 : GCCBuiltin<"__builtin_ia32_phaddsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_ssse3_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phsub_w_128 : GCCBuiltin<"__builtin_ia32_phsubw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_ssse3_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phsub_d_128 : GCCBuiltin<"__builtin_ia32_phsubd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_ssse3_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phsub_sw_128 : GCCBuiltin<"__builtin_ia32_phsubsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_ssse3_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
}
// Packed multiply high with round and scale
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_ssse3_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_ssse3_pmul_hr_sw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
}
// Shuffle ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_ssse3_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
[IntrNoMem]>;
}
// Sign ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_ssse3_psign_b : GCCBuiltin<"__builtin_ia32_psignb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_psign_b_128 : GCCBuiltin<"__builtin_ia32_psignb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_ssse3_psign_w : GCCBuiltin<"__builtin_ia32_psignw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_psign_w_128 : GCCBuiltin<"__builtin_ia32_psignw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_ssse3_psign_d : GCCBuiltin<"__builtin_ia32_psignd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_psign_d_128 : GCCBuiltin<"__builtin_ia32_psignd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
}
// Absolute value ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_ssse3_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_pabs_b_128 : GCCBuiltin<"__builtin_ia32_pabsb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_ssse3_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_pabs_w_128 : GCCBuiltin<"__builtin_ia32_pabsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_ssse3_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_pabs_d_128 : GCCBuiltin<"__builtin_ia32_pabsd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// SSE4.1
// FP rounding ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_round_ss : GCCBuiltin<"__builtin_ia32_roundss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse41_round_ps : GCCBuiltin<"__builtin_ia32_roundps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse41_round_sd : GCCBuiltin<"__builtin_ia32_roundsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse41_round_pd : GCCBuiltin<"__builtin_ia32_roundpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem]>;
}
// Vector sign and zero extend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
}
// Vector min element
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_phminposuw : GCCBuiltin<"__builtin_ia32_phminposuw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
}
// Vector compare, min, max
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_pmaxsb : GCCBuiltin<"__builtin_ia32_pmaxsb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_pmaxsd : GCCBuiltin<"__builtin_ia32_pmaxsd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_pmaxud : GCCBuiltin<"__builtin_ia32_pmaxud128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_pmaxuw : GCCBuiltin<"__builtin_ia32_pmaxuw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_pminsb : GCCBuiltin<"__builtin_ia32_pminsb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_pminsd : GCCBuiltin<"__builtin_ia32_pminsd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_pminud : GCCBuiltin<"__builtin_ia32_pminud128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_pminuw : GCCBuiltin<"__builtin_ia32_pminuw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, Commutative]>;
}
// Advanced Encryption Standard (AES) Instructions
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_aesni_aesimc : GCCBuiltin<"__builtin_ia32_aesimc128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_aesni_aesenc : GCCBuiltin<"__builtin_ia32_aesenc128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_aesni_aesenclast : GCCBuiltin<"__builtin_ia32_aesenclast128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_aesni_aesdec : GCCBuiltin<"__builtin_ia32_aesdec128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_aesni_aesdeclast : GCCBuiltin<"__builtin_ia32_aesdeclast128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_aesni_aeskeygenassist :
GCCBuiltin<"__builtin_ia32_aeskeygenassist128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
}
// PCLMUL instruction
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_pclmulqdq : GCCBuiltin<"__builtin_ia32_pclmulqdq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
}
// Vector pack
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_packusdw : GCCBuiltin<"__builtin_ia32_packusdw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
}
// Vector multiply
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_pmuldq : GCCBuiltin<"__builtin_ia32_pmuldq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
}
// Vector extract
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_pextrb :
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_sse41_pextrd :
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_sse41_pextrq :
Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_sse41_extractps : GCCBuiltin<"__builtin_ia32_extractps128">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem]>;
}
// Vector insert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
[IntrNoMem]>;
}
// Vector blend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pblendw : GCCBuiltin<"__builtin_ia32_pblendw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_sse41_blendpd : GCCBuiltin<"__builtin_ia32_blendpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_sse41_blendps : GCCBuiltin<"__builtin_ia32_blendps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_sse41_blendvps : GCCBuiltin<"__builtin_ia32_blendvps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
}
// Vector dot product
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
[IntrNoMem, Commutative]>;
}
// Vector sum of absolute differences
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty],
[IntrNoMem, Commutative]>;
}
// Cacheability support ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa">,
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
// Test instruction with bitwise comparison.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">,
Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_sse41_ptestc : GCCBuiltin<"__builtin_ia32_ptestc128">,
Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_sse41_ptestnzc : GCCBuiltin<"__builtin_ia32_ptestnzc128">,
Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// SSE4.2
// Miscellaneous
// CRC Instruction
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse42_crc32_32_8 : GCCBuiltin<"__builtin_ia32_crc32qi">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_crc32_32_16 : GCCBuiltin<"__builtin_ia32_crc32hi">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_sse42_crc32_32_32 : GCCBuiltin<"__builtin_ia32_crc32si">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_sse42_crc32_64_64 : GCCBuiltin<"__builtin_ia32_crc32di">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
}
// String/text processing ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse42_pcmpistrm128 : GCCBuiltin<"__builtin_ia32_pcmpistrm128">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpistri128 : GCCBuiltin<"__builtin_ia32_pcmpistri128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpestrm128 : GCCBuiltin<"__builtin_ia32_pcmpestrm128">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpestri128 : GCCBuiltin<"__builtin_ia32_pcmpestri128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// SSE4A
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse4a_extrq : GCCBuiltin<"__builtin_ia32_extrq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_sse4a_insertq : GCCBuiltin<"__builtin_ia32_insertq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_sse4a_movnt_ss : GCCBuiltin<"__builtin_ia32_movntss">,
Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty], []>;
def int_x86_sse4a_movnt_sd : GCCBuiltin<"__builtin_ia32_movntsd">,
Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty], []>;
}
//===----------------------------------------------------------------------===//
// AVX
// Arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_addsub_pd_256 : GCCBuiltin<"__builtin_ia32_addsubpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_addsub_ps_256 : GCCBuiltin<"__builtin_ia32_addsubps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_max_pd_256 : GCCBuiltin<"__builtin_ia32_maxpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_rcp_ps_256 : GCCBuiltin<"__builtin_ia32_rcpps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_i32_ty], [IntrNoMem]>;
}
// Horizontal ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_hadd_pd_256 : GCCBuiltin<"__builtin_ia32_haddpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_hsub_ps_256 : GCCBuiltin<"__builtin_ia32_hsubps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_hsub_pd_256 : GCCBuiltin<"__builtin_ia32_hsubpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_hadd_ps_256 : GCCBuiltin<"__builtin_ia32_haddps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
}
// Vector permutation
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_vpermilvar_pd : GCCBuiltin<"__builtin_ia32_vpermilvarpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx_vpermilvar_ps : GCCBuiltin<"__builtin_ia32_vpermilvarps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx_vpermilvar_pd_256 :
GCCBuiltin<"__builtin_ia32_vpermilvarpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty], [IntrNoMem]>;
def int_x86_avx_vpermilvar_ps_256 :
GCCBuiltin<"__builtin_ia32_vpermilvarps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx_vperm2f128_pd_256 :
GCCBuiltin<"__builtin_ia32_vperm2f128_pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_vperm2f128_ps_256 :
GCCBuiltin<"__builtin_ia32_vperm2f128_ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_vperm2f128_si_256 :
GCCBuiltin<"__builtin_ia32_vperm2f128_si256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Vector blend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_blendv_ps_256 : GCCBuiltin<"__builtin_ia32_blendvps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
}
// Vector dot product
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
}
// Vector compare
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_cmp_pd_256 : GCCBuiltin<"__builtin_ia32_cmppd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_cmp_ps_256 : GCCBuiltin<"__builtin_ia32_cmpps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Vector extract and insert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_vextractf128_pd_256 :
GCCBuiltin<"__builtin_ia32_vextractf128_pd256">,
Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_vextractf128_ps_256 :
GCCBuiltin<"__builtin_ia32_vextractf128_ps256">,
Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_vextractf128_si_256 :
GCCBuiltin<"__builtin_ia32_vextractf128_si256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_vinsertf128_pd_256 :
GCCBuiltin<"__builtin_ia32_vinsertf128_pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_vinsertf128_ps_256 :
GCCBuiltin<"__builtin_ia32_vinsertf128_ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_vinsertf128_si_256 :
GCCBuiltin<"__builtin_ia32_vinsertf128_si256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Vector convert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_cvtdq2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtdq2pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_cvt_ps2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtps2pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
}
// Vector bit test
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_vtestz_pd : GCCBuiltin<"__builtin_ia32_vtestzpd">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx_vtestc_pd : GCCBuiltin<"__builtin_ia32_vtestcpd">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx_vtestnzc_pd : GCCBuiltin<"__builtin_ia32_vtestnzcpd">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx_vtestz_ps : GCCBuiltin<"__builtin_ia32_vtestzps">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx_vtestc_ps : GCCBuiltin<"__builtin_ia32_vtestcps">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx_vtestnzc_ps : GCCBuiltin<"__builtin_ia32_vtestnzcps">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx_vtestz_pd_256 : GCCBuiltin<"__builtin_ia32_vtestzpd256">,
Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_vtestc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestcpd256">,
Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_vtestnzc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestnzcpd256">,
Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_vtestz_ps_256 : GCCBuiltin<"__builtin_ia32_vtestzps256">,
Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_vtestc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestcps256">,
Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_vtestnzc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestnzcps256">,
Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_ptestz_256 : GCCBuiltin<"__builtin_ia32_ptestz256">,
Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
def int_x86_avx_ptestc_256 : GCCBuiltin<"__builtin_ia32_ptestc256">,
Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">,
Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
}
// Vector extract sign mask
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_movmsk_pd_256 : GCCBuiltin<"__builtin_ia32_movmskpd256">,
Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_movmsk_ps_256 : GCCBuiltin<"__builtin_ia32_movmskps256">,
Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
}
// Vector zero
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_vzeroall : GCCBuiltin<"__builtin_ia32_vzeroall">,
Intrinsic<[], [], []>;
def int_x86_avx_vzeroupper : GCCBuiltin<"__builtin_ia32_vzeroupper">,
Intrinsic<[], [], []>;
}
// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_vbroadcast_ss :
GCCBuiltin<"__builtin_ia32_vbroadcastss">,
Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_x86_avx_vbroadcast_sd_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_x86_avx_vbroadcast_ss_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_x86_avx_vbroadcastf128_pd_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastf128_pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_x86_avx_vbroadcastf128_ps_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastf128_ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
}
// SIMD load ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">,
Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
// SIMD store ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_storeu_pd_256 : GCCBuiltin<"__builtin_ia32_storeupd256">,
Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
def int_x86_avx_storeu_ps_256 : GCCBuiltin<"__builtin_ia32_storeups256">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
def int_x86_avx_storeu_dq_256 : GCCBuiltin<"__builtin_ia32_storedqu256">,
Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty], [IntrReadWriteArgMem]>;
}
// Conditional load ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty],
[IntrReadArgMem]>;
def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">,
Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty],
[IntrReadArgMem]>;
def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty],
[IntrReadArgMem]>;
def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty],
[IntrReadArgMem]>;
}
// Conditional store ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v2f64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>;
def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v4f32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
def int_x86_avx_maskstore_pd_256 :
GCCBuiltin<"__builtin_ia32_maskstorepd256">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v4f64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
def int_x86_avx_maskstore_ps_256 :
GCCBuiltin<"__builtin_ia32_maskstoreps256">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
}
//===----------------------------------------------------------------------===//
// AVX2
// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmul_dq : GCCBuiltin<"__builtin_ia32_pmuldq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
}
// Vector min, max
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmaxu_w : GCCBuiltin<"__builtin_ia32_pmaxuw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmaxs_b : GCCBuiltin<"__builtin_ia32_pmaxsb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pminu_w : GCCBuiltin<"__builtin_ia32_pminuw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pminu_d : GCCBuiltin<"__builtin_ia32_pminud256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmins_b : GCCBuiltin<"__builtin_ia32_pminsb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
}
// Integer shift ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx2_psll_q : GCCBuiltin<"__builtin_ia32_psllq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_avx2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx2_psra_w : GCCBuiltin<"__builtin_ia32_psraw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_avx2_psra_d : GCCBuiltin<"__builtin_ia32_psrad256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi256_byteshift">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi256_byteshift">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
}
// Pack ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_packsswb : GCCBuiltin<"__builtin_ia32_packsswb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_packssdw : GCCBuiltin<"__builtin_ia32_packssdw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_packusdw : GCCBuiltin<"__builtin_ia32_packusdw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
}
// Absolute value ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
}
// Horizontal arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
}
// Sign ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
}
// Packed multiply high with round and scale
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
}
// Vector sign and zero extend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
}
// Vector blend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_i32_ty], [IntrNoMem]>;
}
// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_vbroadcast_ss_ps :
GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx2_vbroadcast_sd_pd_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx2_vbroadcast_ss_ps_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx2_vbroadcasti128 :
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_x86_avx2_pbroadcastb_128 :
GCCBuiltin<"__builtin_ia32_pbroadcastb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_avx2_pbroadcastb_256 :
GCCBuiltin<"__builtin_ia32_pbroadcastb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_avx2_pbroadcastw_128 :
GCCBuiltin<"__builtin_ia32_pbroadcastw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_avx2_pbroadcastw_256 :
GCCBuiltin<"__builtin_ia32_pbroadcastw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_avx2_pbroadcastd_128 :
GCCBuiltin<"__builtin_ia32_pbroadcastd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx2_pbroadcastd_256 :
GCCBuiltin<"__builtin_ia32_pbroadcastd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx2_pbroadcastq_128 :
GCCBuiltin<"__builtin_ia32_pbroadcastq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx2_pbroadcastq_256 :
GCCBuiltin<"__builtin_ia32_pbroadcastq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
}
// Vector permutation
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_permd : GCCBuiltin<"__builtin_ia32_permvarsi256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Vector extract and insert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Conditional load ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty],
[IntrReadArgMem]>;
def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">,
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty],
[IntrReadArgMem]>;
def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
[IntrReadArgMem]>;
def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
[IntrReadArgMem]>;
}
// Conditional store ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">,
Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">,
Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx2_maskstore_d_256 :
GCCBuiltin<"__builtin_ia32_maskstored256">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx2_maskstore_q_256 :
GCCBuiltin<"__builtin_ia32_maskstoreq256">,
Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty],
[IntrReadWriteArgMem]>;
}
// Variable bit shift ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_psllv_d : GCCBuiltin<"__builtin_ia32_psllv4si">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_psllv_d_256 : GCCBuiltin<"__builtin_ia32_psllv8si">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_psllv_q : GCCBuiltin<"__builtin_ia32_psllv2di">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_avx2_psllv_q_256 : GCCBuiltin<"__builtin_ia32_psllv4di">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
[IntrNoMem]>;
def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_psrlv_d_256 : GCCBuiltin<"__builtin_ia32_psrlv8si">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv2di">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_avx2_psrlv_q_256 : GCCBuiltin<"__builtin_ia32_psrlv4di">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
[IntrNoMem]>;
def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_psrav_d_256 : GCCBuiltin<"__builtin_ia32_psrav8si">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
}
// Gather ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
}
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
llvm_i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
//===----------------------------------------------------------------------===//
// FMA3 and FMA4
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_fma_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubps512">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmaddps512">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmsubps512">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmaddsub_ps_256 :
GCCBuiltin<"__builtin_ia32_vfmaddsubps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmaddsub_pd_256 :
GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsubadd_ps_256 :
GCCBuiltin<"__builtin_ia32_vfmsubaddps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsubadd_pd_256 :
GCCBuiltin<"__builtin_ia32_vfmsubaddpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsubadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddps512">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// XOP
def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_xop_vpermil2pd_256 :
GCCBuiltin<"__builtin_ia32_vpermil2pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_xop_vpermil2ps_256 :
GCCBuiltin<"__builtin_ia32_vpermil2ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_xop_vfrcz_ss : GCCBuiltin<"__builtin_ia32_vfrczss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_xop_vfrcz_pd_256 : GCCBuiltin<"__builtin_ia32_vfrczpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_xop_vpcmov :
GCCBuiltin<"__builtin_ia32_vpcmov">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_xop_vpcmov_256 :
GCCBuiltin<"__builtin_ia32_vpcmov_256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
[IntrNoMem]>;
def int_x86_xop_vpcomb : GCCBuiltin<"__builtin_ia32_vpcomb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomw : GCCBuiltin<"__builtin_ia32_vpcomw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomd : GCCBuiltin<"__builtin_ia32_vpcomd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomq : GCCBuiltin<"__builtin_ia32_vpcomq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomub : GCCBuiltin<"__builtin_ia32_vpcomub">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomuw : GCCBuiltin<"__builtin_ia32_vpcomuw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomud : GCCBuiltin<"__builtin_ia32_vpcomud">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomuq : GCCBuiltin<"__builtin_ia32_vpcomuq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddbd :
GCCBuiltin<"__builtin_ia32_vphaddbd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddbq :
GCCBuiltin<"__builtin_ia32_vphaddbq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddbw :
GCCBuiltin<"__builtin_ia32_vphaddbw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphadddq :
GCCBuiltin<"__builtin_ia32_vphadddq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_xop_vphaddubd :
GCCBuiltin<"__builtin_ia32_vphaddubd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddubq :
GCCBuiltin<"__builtin_ia32_vphaddubq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddubw :
GCCBuiltin<"__builtin_ia32_vphaddubw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddudq :
GCCBuiltin<"__builtin_ia32_vphaddudq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_xop_vphadduwd :
GCCBuiltin<"__builtin_ia32_vphadduwd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_xop_vphadduwq :
GCCBuiltin<"__builtin_ia32_vphadduwq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_xop_vphaddwd :
GCCBuiltin<"__builtin_ia32_vphaddwd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_xop_vphaddwq :
GCCBuiltin<"__builtin_ia32_vphaddwq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_xop_vphsubbw :
GCCBuiltin<"__builtin_ia32_vphsubbw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphsubdq :
GCCBuiltin<"__builtin_ia32_vphsubdq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_xop_vphsubwd :
GCCBuiltin<"__builtin_ia32_vphsubwd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_xop_vpmacsdd :
GCCBuiltin<"__builtin_ia32_vpmacsdd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vpmacsdqh :
GCCBuiltin<"__builtin_ia32_vpmacsdqh">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_xop_vpmacsdql :
GCCBuiltin<"__builtin_ia32_vpmacsdql">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_xop_vpmacssdd :
GCCBuiltin<"__builtin_ia32_vpmacssdd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vpmacssdqh :
GCCBuiltin<"__builtin_ia32_vpmacssdqh">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_xop_vpmacssdql :
GCCBuiltin<"__builtin_ia32_vpmacssdql">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_xop_vpmacsswd :
GCCBuiltin<"__builtin_ia32_vpmacsswd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vpmacssww :
GCCBuiltin<"__builtin_ia32_vpmacssww">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_xop_vpmacswd :
GCCBuiltin<"__builtin_ia32_vpmacswd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vpmacsww :
GCCBuiltin<"__builtin_ia32_vpmacsww">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_xop_vpmadcsswd :
GCCBuiltin<"__builtin_ia32_vpmadcsswd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vpmadcswd :
GCCBuiltin<"__builtin_ia32_vpmadcswd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vpperm :
GCCBuiltin<"__builtin_ia32_vpperm">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_xop_vprotb : GCCBuiltin<"__builtin_ia32_vprotb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_xop_vprotd : GCCBuiltin<"__builtin_ia32_vprotd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vprotq : GCCBuiltin<"__builtin_ia32_vprotq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_xop_vprotw : GCCBuiltin<"__builtin_ia32_vprotw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_xop_vprotbi : GCCBuiltin<"__builtin_ia32_vprotbi">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_xop_vprotdi : GCCBuiltin<"__builtin_ia32_vprotdi">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_xop_vprotqi : GCCBuiltin<"__builtin_ia32_vprotqi">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_xop_vprotwi : GCCBuiltin<"__builtin_ia32_vprotwi">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_xop_vpshab :
GCCBuiltin<"__builtin_ia32_vpshab">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_xop_vpshad :
GCCBuiltin<"__builtin_ia32_vpshad">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vpshaq :
GCCBuiltin<"__builtin_ia32_vpshaq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_xop_vpshaw :
GCCBuiltin<"__builtin_ia32_vpshaw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_xop_vpshlb :
GCCBuiltin<"__builtin_ia32_vpshlb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_xop_vpshld :
GCCBuiltin<"__builtin_ia32_vpshld">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vpshlq :
GCCBuiltin<"__builtin_ia32_vpshlq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_xop_vpshlw :
GCCBuiltin<"__builtin_ia32_vpshlw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
//===----------------------------------------------------------------------===//
// MMX
// Empty MMX state op.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_emms : GCCBuiltin<"__builtin_ia32_emms">,
Intrinsic<[], [], []>;
def int_x86_mmx_femms : GCCBuiltin<"__builtin_ia32_femms">,
Intrinsic<[], [], []>;
}
// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Addition
def int_x86_mmx_padd_b : GCCBuiltin<"__builtin_ia32_paddb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_padd_w : GCCBuiltin<"__builtin_ia32_paddw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_padd_d : GCCBuiltin<"__builtin_ia32_paddd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_padd_q : GCCBuiltin<"__builtin_ia32_paddq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_padds_w : GCCBuiltin<"__builtin_ia32_paddsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
// Subtraction
def int_x86_mmx_psub_b : GCCBuiltin<"__builtin_ia32_psubb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_psub_w : GCCBuiltin<"__builtin_ia32_psubw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_psub_d : GCCBuiltin<"__builtin_ia32_psubd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_psub_q : GCCBuiltin<"__builtin_ia32_psubq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
// Multiplication
def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pmull_w : GCCBuiltin<"__builtin_ia32_pmullw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
// Bitwise operations
def int_x86_mmx_pand : GCCBuiltin<"__builtin_ia32_pand">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_pandn : GCCBuiltin<"__builtin_ia32_pandn">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_por : GCCBuiltin<"__builtin_ia32_por">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_pxor : GCCBuiltin<"__builtin_ia32_pxor">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
// Averages
def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
// Maximum
def int_x86_mmx_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
// Minimum
def int_x86_mmx_pminu_b : GCCBuiltin<"__builtin_ia32_pminub">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
// Packed sum of absolute differences
def int_x86_mmx_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
}
// Integer shift ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Shift left logical
def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
}
// Pack ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_packsswb : GCCBuiltin<"__builtin_ia32_packsswb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_packssdw : GCCBuiltin<"__builtin_ia32_packssdw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_packuswb : GCCBuiltin<"__builtin_ia32_packuswb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
}
// Unpacking ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_punpckhbw : GCCBuiltin<"__builtin_ia32_punpckhbw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_punpckhwd : GCCBuiltin<"__builtin_ia32_punpckhwd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_punpckhdq : GCCBuiltin<"__builtin_ia32_punpckhdq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_punpcklbw : GCCBuiltin<"__builtin_ia32_punpcklbw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_punpcklwd : GCCBuiltin<"__builtin_ia32_punpcklwd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_mmx_punpckldq : GCCBuiltin<"__builtin_ia32_punpckldq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
}
// Integer comparison ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
}
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_maskmovq : GCCBuiltin<"__builtin_ia32_maskmovq">,
Intrinsic<[], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_ptr_ty], []>;
def int_x86_mmx_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb">,
Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_movnt_dq : GCCBuiltin<"__builtin_ia32_movntq">,
Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>;
def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">,
Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// BMI
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_bmi_bextr_32 : GCCBuiltin<"__builtin_ia32_bextr_u32">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_bmi_bextr_64 : GCCBuiltin<"__builtin_ia32_bextr_u64">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_x86_bmi_bzhi_32 : GCCBuiltin<"__builtin_ia32_bzhi_si">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_bmi_bzhi_64 : GCCBuiltin<"__builtin_ia32_bzhi_di">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_x86_bmi_pdep_32 : GCCBuiltin<"__builtin_ia32_pdep_si">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_bmi_pdep_64 : GCCBuiltin<"__builtin_ia32_pdep_di">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_x86_bmi_pext_32 : GCCBuiltin<"__builtin_ia32_pext_si">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_bmi_pext_64 : GCCBuiltin<"__builtin_ia32_pext_di">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// FS/GS Base
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_rdfsbase_32 : GCCBuiltin<"__builtin_ia32_rdfsbase32">,
Intrinsic<[llvm_i32_ty], []>;
def int_x86_rdgsbase_32 : GCCBuiltin<"__builtin_ia32_rdgsbase32">,
Intrinsic<[llvm_i32_ty], []>;
def int_x86_rdfsbase_64 : GCCBuiltin<"__builtin_ia32_rdfsbase64">,
Intrinsic<[llvm_i64_ty], []>;
def int_x86_rdgsbase_64 : GCCBuiltin<"__builtin_ia32_rdgsbase64">,
Intrinsic<[llvm_i64_ty], []>;
def int_x86_wrfsbase_32 : GCCBuiltin<"__builtin_ia32_wrfsbase32">,
Intrinsic<[], [llvm_i32_ty]>;
def int_x86_wrgsbase_32 : GCCBuiltin<"__builtin_ia32_wrgsbase32">,
Intrinsic<[], [llvm_i32_ty]>;
def int_x86_wrfsbase_64 : GCCBuiltin<"__builtin_ia32_wrfsbase64">,
Intrinsic<[], [llvm_i64_ty]>;
def int_x86_wrgsbase_64 : GCCBuiltin<"__builtin_ia32_wrgsbase64">,
Intrinsic<[], [llvm_i64_ty]>;
}
//===----------------------------------------------------------------------===//
// Half float conversion
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// TBM
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_tbm_bextri_u32 : GCCBuiltin<"__builtin_ia32_bextri_u32">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_tbm_bextri_u64 : GCCBuiltin<"__builtin_ia32_bextri_u64">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// RDRAND intrinsics - Return a random value and whether it is valid.
// RDSEED intrinsics - Return a NIST SP800-90B & C compliant random value and
// whether it is valid.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// These are declared side-effecting so they don't get eliminated by CSE or
// LICM.
def int_x86_rdrand_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>;
def int_x86_rdrand_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>;
def int_x86_rdrand_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
def int_x86_rdseed_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>;
def int_x86_rdseed_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>;
def int_x86_rdseed_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
}
//===----------------------------------------------------------------------===//
// RTM intrinsics. Transactional Memory support.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_xbegin : GCCBuiltin<"__builtin_ia32_xbegin">,
Intrinsic<[llvm_i32_ty], [], []>;
def int_x86_xend : GCCBuiltin<"__builtin_ia32_xend">,
Intrinsic<[], [], []>;
def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">,
Intrinsic<[], [llvm_i8_ty], [IntrNoReturn]>;
def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">,
Intrinsic<[llvm_i32_ty], [], []>;
}
//===----------------------------------------------------------------------===//
// AVX512
// Mask ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Mask instructions
// 16-bit mask
def int_x86_kadd_v16i1 : GCCBuiltin<"__builtin_ia32_kaddw">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
[IntrNoMem]>;
def int_x86_kand_v16i1 : GCCBuiltin<"__builtin_ia32_kandw">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
[IntrNoMem]>;
def int_x86_kandn_v16i1 : GCCBuiltin<"__builtin_ia32_kandnw">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
[IntrNoMem]>;
def int_x86_knot_v16i1 : GCCBuiltin<"__builtin_ia32_knotw">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty], [IntrNoMem]>;
def int_x86_kor_v16i1 : GCCBuiltin<"__builtin_ia32_korw">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
[IntrNoMem]>;
def int_x86_kxor_v16i1 : GCCBuiltin<"__builtin_ia32_kxorw">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
[IntrNoMem]>;
def int_x86_kxnor_v16i1 : GCCBuiltin<"__builtin_ia32_kxnorw">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
[IntrNoMem]>;
def int_x86_mask2int_v16i1 : GCCBuiltin<"__builtin_ia32_mask2intw">,
Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty], [IntrNoMem]>;
def int_x86_int2mask_v16i1 : GCCBuiltin<"__builtin_ia32_int2maskw">,
Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_x86_kunpck_v16i1 : GCCBuiltin<"__builtin_ia32_kunpckbw">,
Intrinsic<[llvm_v16i1_ty], [llvm_v8i1_ty, llvm_v8i1_ty],
[IntrNoMem]>;
def int_x86_avx512_kortestz : GCCBuiltin<"__builtin_ia32_kortestz">,
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_kortestc : GCCBuiltin<"__builtin_ia32_kortestc">,
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
}
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_cvtss2usi : GCCBuiltin<"__builtin_ia32_cvtss2usi">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_cvttss2usi">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi642ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i64_ty], [IntrNoMem]>;
def int_x86_avx512_cvtsd2usi : GCCBuiltin<"__builtin_ia32_cvtsd2usi">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_cvttsd2usi">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i64_ty], [IntrNoMem]>;
def int_x86_avx512_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx512_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty],
[IntrNoMem]>;
}
// Vector convert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_cvt_ps2dq_512 : GCCBuiltin<"__builtin_ia32_cvtps2dq512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtdq2_ps_512 : GCCBuiltin<"__builtin_ia32_cvtdq2ps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty], [IntrNoMem]>;
}
// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_vbroadcast_ss_512 :
GCCBuiltin<"__builtin_ia32_vbroadcastss512">,
Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_x86_avx512_vbroadcast_ss_ps_512 :
GCCBuiltin<"__builtin_ia32_vbroadcastss_ps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx512_vbroadcast_sd_512 :
GCCBuiltin<"__builtin_ia32_vbroadcastsd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_x86_avx512_vbroadcast_sd_pd_512 :
GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx512_pbroadcastd_512 :
GCCBuiltin<"__builtin_ia32_pbroadcastd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512_pbroadcastd_i32_512 :
Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_pbroadcastq_512 :
GCCBuiltin<"__builtin_ia32_pbroadcastq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_pbroadcastq_i64_512 :
Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>;
}
// Vector sign and zero extend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx512_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty],
[IntrNoMem]>;
def int_x86_avx512_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx512_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_avx512_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty],
[IntrNoMem]>;
}
// Arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
llvm_v16f32_ty], [IntrNoMem]>;
def int_x86_avx512_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
llvm_v8f64_ty], [IntrNoMem]>;
def int_x86_avx512_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
llvm_v16f32_ty], [IntrNoMem]>;
def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
llvm_v8f64_ty], [IntrNoMem]>;
def int_x86_avx512_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_pmaxu_q : GCCBuiltin<"__builtin_ia32_pmaxuq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
def int_x86_avx512_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_pmaxs_q : GCCBuiltin<"__builtin_ia32_pmaxsq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
def int_x86_avx512_pminu_d : GCCBuiltin<"__builtin_ia32_pminud512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_pminu_q : GCCBuiltin<"__builtin_ia32_pminuq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
def int_x86_avx512_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_pmins_q : GCCBuiltin<"__builtin_ia32_pminsq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
def int_x86_avx512_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtrndss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_avx512_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtrndsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_avx512_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], [IntrNoMem]>;
def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], [IntrNoMem]>;
def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
[IntrNoMem]>;
def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
[IntrNoMem]>;
def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_avx512_rcp28_ps_512 : GCCBuiltin<"__builtin_ia32_rcp28ps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_avx512_rcp28_pd_512 : GCCBuiltin<"__builtin_ia32_rcp28pd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
[IntrNoMem]>;
def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_avx512_rsqrt28_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt28ps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_avx512_rsqrt28_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt28pd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
[IntrNoMem]>;
def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
[IntrNoMem]>;
}
// Integer shift ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi512_byteshift">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi512_byteshift">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;
}
// Gather and Scatter ops
let TargetPrefix = "x86" in {
def int_x86_avx512_gather_dpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx512_gather_dps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i16_ty,
llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx512_gather_qpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx512_gather_qps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqps512">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherdpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_ptr_ty,
llvm_i32_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx512_gather_dps_512 : GCCBuiltin<"__builtin_ia32_gatherdps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
llvm_i32_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx512_gather_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherqpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qps_512 : GCCBuiltin<"__builtin_ia32_gatherqps512">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx512_gather_dpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx512_gather_dpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i16_ty,
llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpi512">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem]>;
+ [IntrReadArgMem]>;
def int_x86_avx512_gather_dpq_512 : GCCBuiltin<"__builtin_ia32_gatherdpq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_dpi_512 : GCCBuiltin<"__builtin_ia32_gatherdpi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpq_512 : GCCBuiltin<"__builtin_ia32_gatherqpq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpi_512 : GCCBuiltin<"__builtin_ia32_gatherqpi512">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
// scatter
def int_x86_avx512_scatter_dpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdpd512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdps512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty,
llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqpd512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqps512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scatterdpd512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f64_ty,
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dps_512 : GCCBuiltin<"__builtin_ia32_scatterdps512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_v16f32_ty,
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterqpd512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8f64_ty,
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qps_512 : GCCBuiltin<"__builtin_ia32_scatterqps512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8f32_ty,
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdpq512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty,
llvm_v8i64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdpi512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty,
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqpq512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqpi512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_v8i32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dpq_512 : GCCBuiltin<"__builtin_ia32_scatterdpq512">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty],
[]>;
def int_x86_avx512_scatter_dpi_512 : GCCBuiltin<"__builtin_ia32_scatterdpi512">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
[]>;
def int_x86_avx512_scatter_qpq_512 : GCCBuiltin<"__builtin_ia32_scatterqpq512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8i64_ty,
llvm_i32_ty],
[]>;
def int_x86_avx512_scatter_qpi_512 : GCCBuiltin<"__builtin_ia32_scatterqpi512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8i32_ty,
llvm_i32_ty],
[]>;
}
// AVX-512 conflict detection
let TargetPrefix = "x86" in {
def int_x86_avx512_conflict_d_512 : GCCBuiltin<"__builtin_ia32_conflictd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty],
[]>;
def int_x86_avx512_conflict_d_mask_512 :
GCCBuiltin<"__builtin_ia32_mask_conflictd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i1_ty, llvm_v16i32_ty],
[]>;
def int_x86_avx512_conflict_d_maskz_512:
GCCBuiltin<"__builtin_ia32_maskz_conflictd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i1_ty, llvm_v16i32_ty],
[]>;
def int_x86_avx512_conflict_q_512 : GCCBuiltin<"__builtin_ia32_conflictq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty],
[]>;
def int_x86_avx512_conflict_q_mask_512 :
GCCBuiltin<"__builtin_ia32_mask_conflictq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i1_ty, llvm_v8i64_ty],
[]>;
def int_x86_avx512_conflict_q_maskz_512:
GCCBuiltin<"__builtin_ia32_maskz_conflictq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i1_ty, llvm_v8i64_ty],
[]>;
}
// Vector blend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mskblend_ps_512 : GCCBuiltin<"__builtin_ia32_mskblendps512">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16i1_ty, llvm_v16f32_ty, llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_avx512_mskblend_pd_512 : GCCBuiltin<"__builtin_ia32_mskblendpd512">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8i1_ty, llvm_v8f64_ty, llvm_v8f64_ty],
[IntrNoMem]>;
def int_x86_avx512_mskblend_d_512 : GCCBuiltin<"__builtin_ia32_mskblendd512">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i1_ty, llvm_v16i32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mskblend_q_512 : GCCBuiltin<"__builtin_ia32_mskblendq512">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i1_ty, llvm_v8i64_ty, llvm_v8i64_ty],
[IntrNoMem]>;
}
// Misc.
let TargetPrefix = "x86" in {
def int_x86_avx512_cmpeq_pi_512 : GCCBuiltin<"__builtin_ia32_cmpeqpi512">,
Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
def int_x86_avx512_and_pi : GCCBuiltin<"__builtin_ia32_andpi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// SHA intrinsics
let TargetPrefix = "x86" in {
def int_x86_sha1rnds4 : GCCBuiltin<"__builtin_ia32_sha1rnds4">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sha1nexte : GCCBuiltin<"__builtin_ia32_sha1nexte">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sha1msg1 : GCCBuiltin<"__builtin_ia32_sha1msg1">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sha1msg2 : GCCBuiltin<"__builtin_ia32_sha1msg2">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sha256rnds2 : GCCBuiltin<"__builtin_ia32_sha256rnds2">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_sha256msg1 : GCCBuiltin<"__builtin_ia32_sha256msg1">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sha256msg2 : GCCBuiltin<"__builtin_ia32_sha256msg2">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
}
Index: head/contrib/llvm/include/llvm/MC/MCAsmInfo.h
===================================================================
--- head/contrib/llvm/include/llvm/MC/MCAsmInfo.h (revision 265924)
+++ head/contrib/llvm/include/llvm/MC/MCAsmInfo.h (revision 265925)
@@ -1,545 +1,558 @@
//===-- llvm/MC/MCAsmInfo.h - Asm info --------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a class to be used as the basis for target specific
// asm writers. This class primarily takes care of global printing constants,
// which are used in very similar ways across all targets.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCASMINFO_H
#define LLVM_MC_MCASMINFO_H
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MachineLocation.h"
#include <cassert>
#include <vector>
namespace llvm {
class MCExpr;
class MCSection;
class MCStreamer;
class MCSymbol;
class MCContext;
namespace ExceptionHandling {
enum ExceptionsType { None, DwarfCFI, SjLj, ARM, Win64 };
}
namespace LCOMM {
enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment };
}
/// MCAsmInfo - This class is intended to be used as a base class for asm
/// properties and features specific to the target.
class MCAsmInfo {
protected:
//===------------------------------------------------------------------===//
// Properties to be set by the target writer, used to configure asm printer.
//
/// PointerSize - Pointer size in bytes.
/// Default is 4.
unsigned PointerSize;
/// CalleeSaveStackSlotSize - Size of the stack slot reserved for
/// callee-saved registers, in bytes.
/// Default is same as pointer size.
unsigned CalleeSaveStackSlotSize;
/// IsLittleEndian - True if target is little endian.
/// Default is true.
bool IsLittleEndian;
/// StackGrowsUp - True if target stack grow up.
/// Default is false.
bool StackGrowsUp;
/// HasSubsectionsViaSymbols - True if this target has the MachO
/// .subsections_via_symbols directive.
bool HasSubsectionsViaSymbols; // Default is false.
/// HasMachoZeroFillDirective - True if this is a MachO target that supports
/// the macho-specific .zerofill directive for emitting BSS Symbols.
bool HasMachoZeroFillDirective; // Default is false.
/// HasMachoTBSSDirective - True if this is a MachO target that supports
/// the macho-specific .tbss directive for emitting thread local BSS Symbols
bool HasMachoTBSSDirective; // Default is false.
/// HasStaticCtorDtorReferenceInStaticMode - True if the compiler should
/// emit a ".reference .constructors_used" or ".reference .destructors_used"
/// directive after the a static ctor/dtor list. This directive is only
/// emitted in Static relocation model.
bool HasStaticCtorDtorReferenceInStaticMode; // Default is false.
/// LinkerRequiresNonEmptyDwarfLines - True if the linker has a bug and
/// requires that the debug_line section be of a minimum size. In practice
/// such a linker requires a non empty line sequence if a file is present.
bool LinkerRequiresNonEmptyDwarfLines; // Default to false.
/// MaxInstLength - This is the maximum possible length of an instruction,
/// which is needed to compute the size of an inline asm.
unsigned MaxInstLength; // Defaults to 4.
/// MinInstAlignment - Every possible instruction length is a multiple of
/// this value. Factored out in .debug_frame and .debug_line.
unsigned MinInstAlignment; // Defaults to 1.
/// DollarIsPC - The '$' token, when not referencing an identifier or
/// constant, refers to the current PC.
bool DollarIsPC; // Defaults to false.
/// SeparatorString - This string, if specified, is used to separate
/// instructions from each other when on the same line.
const char *SeparatorString; // Defaults to ';'
/// CommentColumn - This indicates the comment num (zero-based) at
/// which asm comments should be printed.
unsigned CommentColumn; // Defaults to 40
/// CommentString - This indicates the comment character used by the
/// assembler.
const char *CommentString; // Defaults to "#"
/// LabelSuffix - This is appended to emitted labels.
const char *LabelSuffix; // Defaults to ":"
/// LabelSuffix - This is appended to emitted labels.
const char *DebugLabelSuffix; // Defaults to ":"
/// GlobalPrefix - If this is set to a non-empty string, it is prepended
/// onto all global symbols. This is often used for "_" or ".".
const char *GlobalPrefix; // Defaults to ""
/// PrivateGlobalPrefix - This prefix is used for globals like constant
/// pool entries that are completely private to the .s file and should not
/// have names in the .o file. This is often "." or "L".
const char *PrivateGlobalPrefix; // Defaults to "."
/// LinkerPrivateGlobalPrefix - This prefix is used for symbols that should
/// be passed through the assembler but be removed by the linker. This
/// is "l" on Darwin, currently used for some ObjC metadata.
const char *LinkerPrivateGlobalPrefix; // Defaults to ""
/// InlineAsmStart/End - If these are nonempty, they contain a directive to
/// emit before and after an inline assembly statement.
const char *InlineAsmStart; // Defaults to "#APP\n"
const char *InlineAsmEnd; // Defaults to "#NO_APP\n"
/// Code16Directive, Code32Directive, Code64Directive - These are assembly
/// directives that tells the assembler to interpret the following
/// instructions differently.
const char *Code16Directive; // Defaults to ".code16"
const char *Code32Directive; // Defaults to ".code32"
const char *Code64Directive; // Defaults to ".code64"
/// AssemblerDialect - Which dialect of an assembler variant to use.
unsigned AssemblerDialect; // Defaults to 0
/// \brief This is true if the assembler allows @ characters in symbol
/// names. Defaults to false.
bool AllowAtInName;
/// UseDataRegionDirectives - This is true if data region markers should
/// be printed as ".data_region/.end_data_region" directives. If false,
/// use "$d/$a" labels instead.
bool UseDataRegionDirectives;
//===--- Data Emission Directives -------------------------------------===//
/// ZeroDirective - this should be set to the directive used to get some
/// number of zero bytes emitted to the current section. Common cases are
/// "\t.zero\t" and "\t.space\t". If this is set to null, the
/// Data*bitsDirective's will be used to emit zero bytes.
const char *ZeroDirective; // Defaults to "\t.zero\t"
/// AsciiDirective - This directive allows emission of an ascii string with
/// the standard C escape characters embedded into it.
const char *AsciiDirective; // Defaults to "\t.ascii\t"
/// AscizDirective - If not null, this allows for special handling of
/// zero terminated strings on this target. This is commonly supported as
/// ".asciz". If a target doesn't support this, it can be set to null.
const char *AscizDirective; // Defaults to "\t.asciz\t"
/// DataDirectives - These directives are used to output some unit of
/// integer data to the current section. If a data directive is set to
/// null, smaller data directives will be used to emit the large sizes.
const char *Data8bitsDirective; // Defaults to "\t.byte\t"
const char *Data16bitsDirective; // Defaults to "\t.short\t"
const char *Data32bitsDirective; // Defaults to "\t.long\t"
const char *Data64bitsDirective; // Defaults to "\t.quad\t"
/// GPRel64Directive - if non-null, a directive that is used to emit a word
/// which should be relocated as a 64-bit GP-relative offset, e.g. .gpdword
/// on Mips.
const char *GPRel64Directive; // Defaults to NULL.
/// GPRel32Directive - if non-null, a directive that is used to emit a word
/// which should be relocated as a 32-bit GP-relative offset, e.g. .gpword
/// on Mips or .gprel32 on Alpha.
const char *GPRel32Directive; // Defaults to NULL.
/// SunStyleELFSectionSwitchSyntax - This is true if this target uses "Sun
/// Style" syntax for section switching ("#alloc,#write" etc) instead of the
/// normal ELF syntax (,"a,w") in .section directives.
bool SunStyleELFSectionSwitchSyntax; // Defaults to false.
/// UsesELFSectionDirectiveForBSS - This is true if this target uses ELF
/// '.section' directive before the '.bss' one. It's used for PPC/Linux
/// which doesn't support the '.bss' directive only.
bool UsesELFSectionDirectiveForBSS; // Defaults to false.
/// HasMicrosoftFastStdCallMangling - True if this target uses microsoft
/// style mangling for functions with X86_StdCall/X86_FastCall calling
/// convention.
bool HasMicrosoftFastStdCallMangling; // Defaults to false.
bool NeedsDwarfSectionOffsetDirective;
//===--- Alignment Information ----------------------------------------===//
/// AlignDirective - The directive used to emit round up to an alignment
/// boundary.
///
const char *AlignDirective; // Defaults to "\t.align\t"
/// AlignmentIsInBytes - If this is true (the default) then the asmprinter
/// emits ".align N" directives, where N is the number of bytes to align to.
/// Otherwise, it emits ".align log2(N)", e.g. 3 to align to an 8 byte
/// boundary.
bool AlignmentIsInBytes; // Defaults to true
/// TextAlignFillValue - If non-zero, this is used to fill the executable
/// space created as the result of a alignment directive.
unsigned TextAlignFillValue; // Defaults to 0
//===--- Global Variable Emission Directives --------------------------===//
/// GlobalDirective - This is the directive used to declare a global entity.
///
const char *GlobalDirective; // Defaults to NULL.
/// HasSetDirective - True if the assembler supports the .set directive.
bool HasSetDirective; // Defaults to true.
/// HasAggressiveSymbolFolding - False if the assembler requires that we use
/// Lc = a - b
/// .long Lc
/// instead of
/// .long a - b
bool HasAggressiveSymbolFolding; // Defaults to true.
/// COMMDirectiveAlignmentIsInBytes - True is .comm's and .lcomms optional
/// alignment is to be specified in bytes instead of log2(n).
bool COMMDirectiveAlignmentIsInBytes; // Defaults to true;
/// LCOMMDirectiveAlignment - Describes if the .lcomm directive for the
/// target supports an alignment argument and how it is interpreted.
LCOMM::LCOMMType LCOMMDirectiveAlignmentType; // Defaults to NoAlignment.
/// HasDotTypeDotSizeDirective - True if the target has .type and .size
/// directives, this is true for most ELF targets.
bool HasDotTypeDotSizeDirective; // Defaults to true.
/// HasSingleParameterDotFile - True if the target has a single parameter
/// .file directive, this is true for ELF targets.
bool HasSingleParameterDotFile; // Defaults to true.
/// hasIdentDirective - True if the target has a .ident directive, this is
/// true for ELF targets.
bool HasIdentDirective; // Defaults to false.
/// HasNoDeadStrip - True if this target supports the MachO .no_dead_strip
/// directive.
bool HasNoDeadStrip; // Defaults to false.
/// WeakRefDirective - This directive, if non-null, is used to declare a
/// global as being a weak undefined symbol.
const char *WeakRefDirective; // Defaults to NULL.
- /// WeakDefDirective - This directive, if non-null, is used to declare a
- /// global as being a weak defined symbol.
- const char *WeakDefDirective; // Defaults to NULL.
+ /// True if we have a directive to declare a global as being a weak
+ /// defined symbol.
+ bool HasWeakDefDirective; // Defaults to false.
- /// LinkOnceDirective - This directive, if non-null is used to declare a
- /// global as being a weak defined symbol. This is used on cygwin/mingw.
- const char *LinkOnceDirective; // Defaults to NULL.
+ /// True if we have a directive to declare a global as being a weak
+ /// defined symbol that can be hidden (unexported).
+ bool HasWeakDefCanBeHiddenDirective; // Defaults to false.
+ /// True if we have a .linkonce directive. This is used on cygwin/mingw.
+ bool HasLinkOnceDirective; // Defaults to false.
+
/// HiddenVisibilityAttr - This attribute, if not MCSA_Invalid, is used to
/// declare a symbol as having hidden visibility.
MCSymbolAttr HiddenVisibilityAttr; // Defaults to MCSA_Hidden.
/// HiddenDeclarationVisibilityAttr - This attribute, if not MCSA_Invalid,
/// is used to declare an undefined symbol as having hidden visibility.
MCSymbolAttr HiddenDeclarationVisibilityAttr; // Defaults to MCSA_Hidden.
/// ProtectedVisibilityAttr - This attribute, if not MCSA_Invalid, is used
/// to declare a symbol as having protected visibility.
MCSymbolAttr ProtectedVisibilityAttr; // Defaults to MCSA_Protected
//===--- Dwarf Emission Directives -----------------------------------===//
/// HasLEB128 - True if target asm supports leb128 directives.
bool HasLEB128; // Defaults to false.
/// SupportsDebugInformation - True if target supports emission of debugging
/// information.
bool SupportsDebugInformation; // Defaults to false.
/// SupportsExceptionHandling - True if target supports exception handling.
ExceptionHandling::ExceptionsType ExceptionsType; // Defaults to None
/// DwarfUsesRelocationsAcrossSections - True if Dwarf2 output generally
/// uses relocations for references to other .debug_* sections.
bool DwarfUsesRelocationsAcrossSections;
+ /// DwarfFDESymbolsUseAbsDiff - true if DWARF FDE symbol reference
+ /// relocations should be replaced by an absolute difference.
+ bool DwarfFDESymbolsUseAbsDiff;
+
/// DwarfRegNumForCFI - True if dwarf register numbers are printed
/// instead of symbolic register names in .cfi_* directives.
bool DwarfRegNumForCFI; // Defaults to false;
//===--- Prologue State ----------------------------------------------===//
std::vector<MCCFIInstruction> InitialFrameState;
public:
explicit MCAsmInfo();
virtual ~MCAsmInfo();
// FIXME: move these methods to DwarfPrinter when the JIT stops using them.
static unsigned getSLEB128Size(int64_t Value);
static unsigned getULEB128Size(uint64_t Value);
/// getPointerSize - Get the pointer size in bytes.
unsigned getPointerSize() const {
return PointerSize;
}
/// getCalleeSaveStackSlotSize - Get the callee-saved register stack slot
/// size in bytes.
unsigned getCalleeSaveStackSlotSize() const {
return CalleeSaveStackSlotSize;
}
/// isLittleEndian - True if the target is little endian.
bool isLittleEndian() const {
return IsLittleEndian;
}
/// isStackGrowthDirectionUp - True if target stack grow up.
bool isStackGrowthDirectionUp() const {
return StackGrowsUp;
}
bool hasSubsectionsViaSymbols() const { return HasSubsectionsViaSymbols; }
// Data directive accessors.
//
const char *getData8bitsDirective() const {
return Data8bitsDirective;
}
const char *getData16bitsDirective() const {
return Data16bitsDirective;
}
const char *getData32bitsDirective() const {
return Data32bitsDirective;
}
const char *getData64bitsDirective() const {
return Data64bitsDirective;
}
const char *getGPRel64Directive() const { return GPRel64Directive; }
const char *getGPRel32Directive() const { return GPRel32Directive; }
/// getNonexecutableStackSection - Targets can implement this method to
/// specify a section to switch to if the translation unit doesn't have any
/// trampolines that require an executable stack.
virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const{
return 0;
}
virtual const MCExpr *
getExprForPersonalitySymbol(const MCSymbol *Sym,
unsigned Encoding,
MCStreamer &Streamer) const;
virtual const MCExpr *
getExprForFDESymbol(const MCSymbol *Sym,
unsigned Encoding,
MCStreamer &Streamer) const;
bool usesSunStyleELFSectionSwitchSyntax() const {
return SunStyleELFSectionSwitchSyntax;
}
bool usesELFSectionDirectiveForBSS() const {
return UsesELFSectionDirectiveForBSS;
}
bool hasMicrosoftFastStdCallMangling() const {
return HasMicrosoftFastStdCallMangling;
}
bool needsDwarfSectionOffsetDirective() const {
return NeedsDwarfSectionOffsetDirective;
}
// Accessors.
//
bool hasMachoZeroFillDirective() const { return HasMachoZeroFillDirective; }
bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; }
bool hasStaticCtorDtorReferenceInStaticMode() const {
return HasStaticCtorDtorReferenceInStaticMode;
}
bool getLinkerRequiresNonEmptyDwarfLines() const {
return LinkerRequiresNonEmptyDwarfLines;
}
unsigned getMaxInstLength() const {
return MaxInstLength;
}
unsigned getMinInstAlignment() const {
return MinInstAlignment;
}
bool getDollarIsPC() const {
return DollarIsPC;
}
const char *getSeparatorString() const {
return SeparatorString;
}
unsigned getCommentColumn() const {
return CommentColumn;
}
const char *getCommentString() const {
return CommentString;
}
const char *getLabelSuffix() const {
return LabelSuffix;
}
const char *getDebugLabelSuffix() const {
return DebugLabelSuffix;
}
const char *getGlobalPrefix() const {
return GlobalPrefix;
}
const char *getPrivateGlobalPrefix() const {
return PrivateGlobalPrefix;
}
const char *getLinkerPrivateGlobalPrefix() const {
return LinkerPrivateGlobalPrefix;
}
const char *getInlineAsmStart() const {
return InlineAsmStart;
}
const char *getInlineAsmEnd() const {
return InlineAsmEnd;
}
const char *getCode16Directive() const {
return Code16Directive;
}
const char *getCode32Directive() const {
return Code32Directive;
}
const char *getCode64Directive() const {
return Code64Directive;
}
unsigned getAssemblerDialect() const {
return AssemblerDialect;
}
bool doesAllowAtInName() const {
return AllowAtInName;
}
bool doesSupportDataRegionDirectives() const {
return UseDataRegionDirectives;
}
const char *getZeroDirective() const {
return ZeroDirective;
}
const char *getAsciiDirective() const {
return AsciiDirective;
}
const char *getAscizDirective() const {
return AscizDirective;
}
const char *getAlignDirective() const {
return AlignDirective;
}
bool getAlignmentIsInBytes() const {
return AlignmentIsInBytes;
}
unsigned getTextAlignFillValue() const {
return TextAlignFillValue;
}
const char *getGlobalDirective() const {
return GlobalDirective;
}
bool hasSetDirective() const { return HasSetDirective; }
bool hasAggressiveSymbolFolding() const {
return HasAggressiveSymbolFolding;
}
bool getCOMMDirectiveAlignmentIsInBytes() const {
return COMMDirectiveAlignmentIsInBytes;
}
LCOMM::LCOMMType getLCOMMDirectiveAlignmentType() const {
return LCOMMDirectiveAlignmentType;
}
bool hasDotTypeDotSizeDirective() const {return HasDotTypeDotSizeDirective;}
bool hasSingleParameterDotFile() const { return HasSingleParameterDotFile; }
bool hasIdentDirective() const { return HasIdentDirective; }
bool hasNoDeadStrip() const { return HasNoDeadStrip; }
const char *getWeakRefDirective() const { return WeakRefDirective; }
- const char *getWeakDefDirective() const { return WeakDefDirective; }
- const char *getLinkOnceDirective() const { return LinkOnceDirective; }
+ bool hasWeakDefDirective() const { return HasWeakDefDirective; }
+ bool hasWeakDefCanBeHiddenDirective() const {
+ return HasWeakDefCanBeHiddenDirective;
+ }
+ bool hasLinkOnceDirective() const { return HasLinkOnceDirective; }
MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr;}
MCSymbolAttr getHiddenDeclarationVisibilityAttr() const {
return HiddenDeclarationVisibilityAttr;
}
MCSymbolAttr getProtectedVisibilityAttr() const {
return ProtectedVisibilityAttr;
}
bool hasLEB128() const {
return HasLEB128;
}
bool doesSupportDebugInformation() const {
return SupportsDebugInformation;
}
bool doesSupportExceptionHandling() const {
return ExceptionsType != ExceptionHandling::None;
}
ExceptionHandling::ExceptionsType getExceptionHandlingType() const {
return ExceptionsType;
}
bool isExceptionHandlingDwarf() const {
return
(ExceptionsType == ExceptionHandling::DwarfCFI ||
ExceptionsType == ExceptionHandling::ARM ||
ExceptionsType == ExceptionHandling::Win64);
}
bool doesDwarfUseRelocationsAcrossSections() const {
return DwarfUsesRelocationsAcrossSections;
+ }
+ bool doDwarfFDESymbolsUseAbsDiff() const {
+ return DwarfFDESymbolsUseAbsDiff;
}
bool useDwarfRegNumForCFI() const {
return DwarfRegNumForCFI;
}
void addInitialFrameState(const MCCFIInstruction &Inst) {
InitialFrameState.push_back(Inst);
}
const std::vector<MCCFIInstruction> &getInitialFrameState() const {
return InitialFrameState;
}
};
}
#endif
Index: head/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
===================================================================
--- head/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp (revision 265924)
+++ head/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp (revision 265925)
@@ -1,1309 +1,1399 @@
//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the primary stateless implementation of the
// Alias Analysis interface that implements identities (two different
// globals cannot alias, etc), but does no stateful analysis.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/Passes.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include <algorithm>
using namespace llvm;
+/// Cutoff after which to stop analysing a set of phi nodes potentially involved
+/// in a cycle. Because we are analysing 'through' phi nodes we need to be
+/// careful with value equivalence. We use reachability to make sure a value
+/// cannot be involved in a cycle.
+const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
+
//===----------------------------------------------------------------------===//
// Useful predicates
//===----------------------------------------------------------------------===//
/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
/// object that never escapes from the function.
static bool isNonEscapingLocalObject(const Value *V) {
// If this is a local allocation, check to see if it escapes.
if (isa<AllocaInst>(V) || isNoAliasCall(V))
// Set StoreCaptures to True so that we can assume in our callers that the
// pointer is not the result of a load instruction. Currently
// PointerMayBeCaptured doesn't have any special analysis for the
// StoreCaptures=false case; if it did, our callers could be refined to be
// more precise.
return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
// If this is an argument that corresponds to a byval or noalias argument,
// then it has not escaped before entering the function. Check if it escapes
// inside the function.
if (const Argument *A = dyn_cast<Argument>(V))
if (A->hasByValAttr() || A->hasNoAliasAttr())
// Note even if the argument is marked nocapture we still need to check
// for copies made inside the function. The nocapture attribute only
// specifies that there are no copies made that outlive the function.
return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
return false;
}
/// isEscapeSource - Return true if the pointer is one which would have
/// been considered an escape by isNonEscapingLocalObject.
static bool isEscapeSource(const Value *V) {
if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
return true;
// The load case works because isNonEscapingLocalObject considers all
// stores to be escapes (it passes true for the StoreCaptures argument
// to PointerMayBeCaptured).
if (isa<LoadInst>(V))
return true;
return false;
}
/// getObjectSize - Return the size of the object specified by V, or
/// UnknownSize if unknown.
static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
const TargetLibraryInfo &TLI,
bool RoundToAlign = false) {
uint64_t Size;
if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign))
return Size;
return AliasAnalysis::UnknownSize;
}
/// isObjectSmallerThan - Return true if we can prove that the object specified
/// by V is smaller than Size.
static bool isObjectSmallerThan(const Value *V, uint64_t Size,
const DataLayout &TD,
const TargetLibraryInfo &TLI) {
// Note that the meanings of the "object" are slightly different in the
// following contexts:
// c1: llvm::getObjectSize()
// c2: llvm.objectsize() intrinsic
// c3: isObjectSmallerThan()
// c1 and c2 share the same meaning; however, the meaning of "object" in c3
// refers to the "entire object".
//
// Consider this example:
// char *p = (char*)malloc(100)
// char *q = p+80;
//
// In the context of c1 and c2, the "object" pointed by q refers to the
// stretch of memory of q[0:19]. So, getObjectSize(q) should return 20.
//
// However, in the context of c3, the "object" refers to the chunk of memory
// being allocated. So, the "object" has 100 bytes, and q points to the middle
// the "object". In case q is passed to isObjectSmallerThan() as the 1st
// parameter, before the llvm::getObjectSize() is called to get the size of
// entire object, we should:
// - either rewind the pointer q to the base-address of the object in
// question (in this case rewind to p), or
// - just give up. It is up to caller to make sure the pointer is pointing
// to the base address the object.
//
// We go for 2nd option for simplicity.
if (!isIdentifiedObject(V))
return false;
// This function needs to use the aligned object size because we allow
// reads a bit past the end given sufficient alignment.
uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
}
/// isObjectSize - Return true if we can prove that the object specified
/// by V has size Size.
static bool isObjectSize(const Value *V, uint64_t Size,
const DataLayout &TD, const TargetLibraryInfo &TLI) {
uint64_t ObjectSize = getObjectSize(V, TD, TLI);
return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
}
/// isIdentifiedFunctionLocal - Return true if V is umabigously identified
/// at the function-level. Different IdentifiedFunctionLocals can't alias.
/// Further, an IdentifiedFunctionLocal can not alias with any function
/// arguments other than itself, which is not neccessarily true for
/// IdentifiedObjects.
static bool isIdentifiedFunctionLocal(const Value *V)
{
return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V);
}
//===----------------------------------------------------------------------===//
// GetElementPtr Instruction Decomposition and Analysis
//===----------------------------------------------------------------------===//
namespace {
enum ExtensionKind {
EK_NotExtended,
EK_SignExt,
EK_ZeroExt
};
struct VariableGEPIndex {
const Value *V;
ExtensionKind Extension;
int64_t Scale;
bool operator==(const VariableGEPIndex &Other) const {
return V == Other.V && Extension == Other.Extension &&
Scale == Other.Scale;
}
bool operator!=(const VariableGEPIndex &Other) const {
return !operator==(Other);
}
};
}
/// GetLinearExpression - Analyze the specified value as a linear expression:
/// "A*V + B", where A and B are constant integers. Return the scale and offset
/// values as APInts and return V as a Value*, and return whether we looked
/// through any sign or zero extends. The incoming Value is known to have
/// IntegerType and it may already be sign or zero extended.
///
/// Note that this looks through extends, so the high bits may not be
/// represented in the result.
static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
ExtensionKind &Extension,
const DataLayout &TD, unsigned Depth) {
assert(V->getType()->isIntegerTy() && "Not an integer value");
// Limit our recursion depth.
if (Depth == 6) {
Scale = 1;
Offset = 0;
return V;
}
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
switch (BOp->getOpcode()) {
default: break;
case Instruction::Or:
// X|C == X+C if all the bits in C are unset in X. Otherwise we can't
// analyze it.
if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD))
break;
// FALL THROUGH.
case Instruction::Add:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
TD, Depth+1);
Offset += RHSC->getValue();
return V;
case Instruction::Mul:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
TD, Depth+1);
Offset *= RHSC->getValue();
Scale *= RHSC->getValue();
return V;
case Instruction::Shl:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
TD, Depth+1);
Offset <<= RHSC->getValue().getLimitedValue();
Scale <<= RHSC->getValue().getLimitedValue();
return V;
}
}
}
// Since GEP indices are sign extended anyway, we don't care about the high
// bits of a sign or zero extended value - just scales and offsets. The
// extensions have to be consistent though.
if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
(isa<ZExtInst>(V) && Extension != EK_SignExt)) {
Value *CastOp = cast<CastInst>(V)->getOperand(0);
unsigned OldWidth = Scale.getBitWidth();
unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
Scale = Scale.trunc(SmallWidth);
Offset = Offset.trunc(SmallWidth);
Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
TD, Depth+1);
Scale = Scale.zext(OldWidth);
Offset = Offset.zext(OldWidth);
return Result;
}
Scale = 1;
Offset = 0;
return V;
}
/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
/// into a base pointer with a constant offset and a number of scaled symbolic
/// offsets.
///
/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
/// the VarIndices vector) are Value*'s that are known to be scaled by the
/// specified amount, but which may have other unrepresented high bits. As such,
/// the gep cannot necessarily be reconstructed from its decomposed form.
///
/// When DataLayout is around, this function is capable of analyzing everything
/// that GetUnderlyingObject can look through. When not, it just looks
/// through pointer casts.
///
static const Value *
DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
SmallVectorImpl<VariableGEPIndex> &VarIndices,
const DataLayout *TD) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = 6;
BaseOffs = 0;
do {
// See if this is a bitcast or GEP.
const Operator *Op = dyn_cast<Operator>(V);
if (Op == 0) {
// The only non-operator case we can handle are GlobalAliases.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
if (!GA->mayBeOverridden()) {
V = GA->getAliasee();
continue;
}
}
return V;
}
if (Op->getOpcode() == Instruction::BitCast) {
V = Op->getOperand(0);
continue;
}
const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
if (GEPOp == 0) {
// If it's not a GEP, hand it off to SimplifyInstruction to see if it
// can come up with something. This matches what GetUnderlyingObject does.
if (const Instruction *I = dyn_cast<Instruction>(V))
// TODO: Get a DominatorTree and use it here.
if (const Value *Simplified =
SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
V = Simplified;
continue;
}
return V;
}
// Don't attempt to analyze GEPs over unsized objects.
if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized())
return V;
// If we are lacking DataLayout information, we can't compute the offets of
// elements computed by GEPs. However, we can handle bitcast equivalent
// GEPs.
if (TD == 0) {
if (!GEPOp->hasAllZeroIndices())
return V;
V = GEPOp->getOperand(0);
continue;
}
unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
for (User::const_op_iterator I = GEPOp->op_begin()+1,
E = GEPOp->op_end(); I != E; ++I) {
Value *Index = *I;
// Compute the (potentially symbolic) offset in bytes for this index.
if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
if (FieldNo == 0) continue;
BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
continue;
}
// For an array/pointer, add the element offset, explicitly scaled.
if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
if (CIdx->isZero()) continue;
BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
continue;
}
uint64_t Scale = TD->getTypeAllocSize(*GTI);
ExtensionKind Extension = EK_NotExtended;
// If the integer type is smaller than the pointer size, it is implicitly
// sign extended to pointer size.
unsigned Width = Index->getType()->getIntegerBitWidth();
if (TD->getPointerSizeInBits(AS) > Width)
Extension = EK_SignExt;
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
*TD, 0);
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
BaseOffs += IndexOffset.getSExtValue()*Scale;
Scale *= IndexScale.getSExtValue();
// If we already had an occurrence of this index variable, merge this
// scale into it. For example, we want to handle:
// A[x][x] -> x*16 + x*4 -> x*20
// This also ensures that 'x' only appears in the index list once.
for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
if (VarIndices[i].V == Index &&
VarIndices[i].Extension == Extension) {
Scale += VarIndices[i].Scale;
VarIndices.erase(VarIndices.begin()+i);
break;
}
}
// Make sure that we have a scale that makes sense for this target's
// pointer size.
if (unsigned ShiftBits = 64 - TD->getPointerSizeInBits(AS)) {
Scale <<= ShiftBits;
Scale = (int64_t)Scale >> ShiftBits;
}
if (Scale) {
VariableGEPIndex Entry = {Index, Extension,
static_cast<int64_t>(Scale)};
VarIndices.push_back(Entry);
}
}
// Analyze the base pointer next.
V = GEPOp->getOperand(0);
} while (--MaxLookup);
// If the chain of expressions is too deep, just return early.
return V;
}
-/// GetIndexDifference - Dest and Src are the variable indices from two
-/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
-/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
-static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
- const SmallVectorImpl<VariableGEPIndex> &Src) {
- if (Src.empty()) return;
-
- for (unsigned i = 0, e = Src.size(); i != e; ++i) {
- const Value *V = Src[i].V;
- ExtensionKind Extension = Src[i].Extension;
- int64_t Scale = Src[i].Scale;
-
- // Find V in Dest. This is N^2, but pointer indices almost never have more
- // than a few variable indexes.
- for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
- if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
-
- // If we found it, subtract off Scale V's from the entry in Dest. If it
- // goes to zero, remove the entry.
- if (Dest[j].Scale != Scale)
- Dest[j].Scale -= Scale;
- else
- Dest.erase(Dest.begin()+j);
- Scale = 0;
- break;
- }
-
- // If we didn't consume this entry, add it to the end of the Dest list.
- if (Scale) {
- VariableGEPIndex Entry = { V, Extension, -Scale };
- Dest.push_back(Entry);
- }
- }
-}
-
//===----------------------------------------------------------------------===//
// BasicAliasAnalysis Pass
//===----------------------------------------------------------------------===//
#ifndef NDEBUG
static const Function *getParent(const Value *V) {
if (const Instruction *inst = dyn_cast<Instruction>(V))
return inst->getParent()->getParent();
if (const Argument *arg = dyn_cast<Argument>(V))
return arg->getParent();
return NULL;
}
static bool notDifferentParent(const Value *O1, const Value *O2) {
const Function *F1 = getParent(O1);
const Function *F2 = getParent(O2);
return !F1 || !F2 || F1 == F2;
}
#endif
namespace {
/// BasicAliasAnalysis - This is the primary alias analysis implementation.
struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
static char ID; // Class identification, replacement for typeinfo
BasicAliasAnalysis() : ImmutablePass(ID) {
initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
}
virtual void initializePass() {
InitializeAliasAnalysis(this);
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetLibraryInfo>();
}
virtual AliasResult alias(const Location &LocA,
const Location &LocB) {
assert(AliasCache.empty() && "AliasCache must be cleared after use!");
assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
"BasicAliasAnalysis doesn't support interprocedural queries.");
AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
LocB.Ptr, LocB.Size, LocB.TBAATag);
// AliasCache rarely has more than 1 or 2 elements, always use
// shrink_and_clear so it quickly returns to the inline capacity of the
// SmallDenseMap if it ever grows larger.
// FIXME: This should really be shrink_to_inline_capacity_and_clear().
AliasCache.shrink_and_clear();
+ VisitedPhiBBs.clear();
return Alias;
}
virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
const Location &Loc);
virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
ImmutableCallSite CS2) {
// The AliasAnalysis base class has some smarts, lets use them.
return AliasAnalysis::getModRefInfo(CS1, CS2);
}
/// pointsToConstantMemory - Chase pointers until we find a (constant
/// global) or not.
virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
/// getModRefBehavior - Return the behavior when calling the given
/// call site.
virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
/// getModRefBehavior - Return the behavior when calling the given function.
/// For use when the call site is not known.
virtual ModRefBehavior getModRefBehavior(const Function *F);
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
virtual void *getAdjustedAnalysisPointer(const void *ID) {
if (ID == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
private:
// AliasCache - Track alias queries to guard against recursion.
typedef std::pair<Location, Location> LocPair;
typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
AliasCacheTy AliasCache;
+ /// \brief Track phi nodes we have visited. When interpret "Value" pointer
+ /// equality as value equality we need to make sure that the "Value" is not
+ /// part of a cycle. Otherwise, two uses could come from different
+ /// "iterations" of a cycle and see different values for the same "Value"
+ /// pointer.
+ /// The following example shows the problem:
+ /// %p = phi(%alloca1, %addr2)
+ /// %l = load %ptr
+ /// %addr1 = gep, %alloca2, 0, %l
+ /// %addr2 = gep %alloca2, 0, (%l + 1)
+ /// alias(%p, %addr1) -> MayAlias !
+ /// store %l, ...
+ SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs;
+
// Visited - Track instructions visited by pointsToConstantMemory.
SmallPtrSet<const Value*, 16> Visited;
+ /// \brief Check whether two Values can be considered equivalent.
+ ///
+ /// In addition to pointer equivalence of \p V1 and \p V2 this checks
+ /// whether they can not be part of a cycle in the value graph by looking at
+ /// all visited phi nodes an making sure that the phis cannot reach the
+ /// value. We have to do this because we are looking through phi nodes (That
+ /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
+ bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
+
+ /// \brief Dest and Src are the variable indices from two decomposed
+ /// GetElementPtr instructions GEP1 and GEP2 which have common base
+ /// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
+ /// difference between the two pointers.
+ void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+ const SmallVectorImpl<VariableGEPIndex> &Src);
+
// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
// instruction against another.
AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
const MDNode *V1TBAAInfo,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAAInfo,
const Value *UnderlyingV1, const Value *UnderlyingV2);
// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
// instruction against another.
AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
const MDNode *PNTBAAInfo,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAAInfo);
/// aliasSelect - Disambiguate a Select instruction against another value.
AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
const MDNode *SITBAAInfo,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAAInfo);
AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
const MDNode *V1TBAATag,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAATag);
};
} // End of anonymous namespace
// Register this pass...
char BasicAliasAnalysis::ID = 0;
INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
false, true, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
false, true, false)
ImmutablePass *llvm::createBasicAliasAnalysisPass() {
return new BasicAliasAnalysis();
}
/// pointsToConstantMemory - Returns whether the given pointer value
/// points to memory that is local to the function, with global constants being
/// considered local to all functions.
bool
BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
assert(Visited.empty() && "Visited must be cleared after use!");
unsigned MaxLookup = 8;
SmallVector<const Value *, 16> Worklist;
Worklist.push_back(Loc.Ptr);
do {
const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD);
if (!Visited.insert(V)) {
Visited.clear();
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
}
// An alloca instruction defines local memory.
if (OrLocal && isa<AllocaInst>(V))
continue;
// A global constant counts as local memory for our purposes.
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
// Note: this doesn't require GV to be "ODR" because it isn't legal for a
// global to be marked constant in some modules and non-constant in
// others. GV may even be a declaration, not a definition.
if (!GV->isConstant()) {
Visited.clear();
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
}
continue;
}
// If both select values point to local memory, then so does the select.
if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
Worklist.push_back(SI->getTrueValue());
Worklist.push_back(SI->getFalseValue());
continue;
}
// If all values incoming to a phi node point to local memory, then so does
// the phi.
if (const PHINode *PN = dyn_cast<PHINode>(V)) {
// Don't bother inspecting phi nodes with many operands.
if (PN->getNumIncomingValues() > MaxLookup) {
Visited.clear();
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
}
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
Worklist.push_back(PN->getIncomingValue(i));
continue;
}
// Otherwise be conservative.
Visited.clear();
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
} while (!Worklist.empty() && --MaxLookup);
Visited.clear();
return Worklist.empty();
}
/// getModRefBehavior - Return the behavior when calling the given call site.
AliasAnalysis::ModRefBehavior
BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
if (CS.doesNotAccessMemory())
// Can't do better than this.
return DoesNotAccessMemory;
ModRefBehavior Min = UnknownModRefBehavior;
// If the callsite knows it only reads memory, don't return worse
// than that.
if (CS.onlyReadsMemory())
Min = OnlyReadsMemory;
// The AliasAnalysis base class has some smarts, lets use them.
return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
}
/// getModRefBehavior - Return the behavior when calling the given function.
/// For use when the call site is not known.
AliasAnalysis::ModRefBehavior
BasicAliasAnalysis::getModRefBehavior(const Function *F) {
// If the function declares it doesn't access memory, we can't do better.
if (F->doesNotAccessMemory())
return DoesNotAccessMemory;
// For intrinsics, we can check the table.
if (unsigned iid = F->getIntrinsicID()) {
#define GET_INTRINSIC_MODREF_BEHAVIOR
#include "llvm/IR/Intrinsics.gen"
#undef GET_INTRINSIC_MODREF_BEHAVIOR
}
ModRefBehavior Min = UnknownModRefBehavior;
// If the function declares it only reads memory, go with that.
if (F->onlyReadsMemory())
Min = OnlyReadsMemory;
// Otherwise be conservative.
return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
}
/// getModRefInfo - Check to see if the specified callsite can clobber the
/// specified memory object. Since we only look at local properties of this
/// function, we really can't say much about this query. We do, however, use
/// simple "address taken" analysis on local objects.
AliasAnalysis::ModRefResult
BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
const Location &Loc) {
assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
// If this is a tail call and Loc.Ptr points to a stack location, we know that
// the tail call cannot access or modify the local stack.
// We cannot exclude byval arguments here; these belong to the caller of
// the current function not to the current function, and a tail callee
// may reference them.
if (isa<AllocaInst>(Object))
if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
if (CI->isTailCall())
return NoModRef;
// If the pointer is to a locally allocated object that does not escape,
// then the call can not mod/ref the pointer unless the call takes the pointer
// as an argument, and itself doesn't capture it.
if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
isNonEscapingLocalObject(Object)) {
bool PassedAsArg = false;
unsigned ArgNo = 0;
for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture or byval pointer arguments. If this
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
(!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
continue;
// If this is a no-capture pointer argument, see if we can tell that it
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
// escape.
if (!isNoAlias(Location(*CI), Location(Object))) {
PassedAsArg = true;
break;
}
}
if (!PassedAsArg)
return NoModRef;
}
const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
ModRefResult Min = ModRef;
// Finally, handle specific knowledge of intrinsics.
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
if (II != 0)
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::memcpy:
case Intrinsic::memmove: {
uint64_t Len = UnknownSize;
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
Len = LenCI->getZExtValue();
Value *Dest = II->getArgOperand(0);
Value *Src = II->getArgOperand(1);
// If it can't overlap the source dest, then it doesn't modref the loc.
if (isNoAlias(Location(Dest, Len), Loc)) {
if (isNoAlias(Location(Src, Len), Loc))
return NoModRef;
// If it can't overlap the dest, then worst case it reads the loc.
Min = Ref;
} else if (isNoAlias(Location(Src, Len), Loc)) {
// If it can't overlap the source, then worst case it mutates the loc.
Min = Mod;
}
break;
}
case Intrinsic::memset:
// Since memset is 'accesses arguments' only, the AliasAnalysis base class
// will handle it for the variable length case.
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
uint64_t Len = LenCI->getZExtValue();
Value *Dest = II->getArgOperand(0);
if (isNoAlias(Location(Dest, Len), Loc))
return NoModRef;
}
// We know that memset doesn't load anything.
Min = Mod;
break;
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start: {
uint64_t PtrSize =
cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
if (isNoAlias(Location(II->getArgOperand(1),
PtrSize,
II->getMetadata(LLVMContext::MD_tbaa)),
Loc))
return NoModRef;
break;
}
case Intrinsic::invariant_end: {
uint64_t PtrSize =
cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
if (isNoAlias(Location(II->getArgOperand(2),
PtrSize,
II->getMetadata(LLVMContext::MD_tbaa)),
Loc))
return NoModRef;
break;
}
case Intrinsic::arm_neon_vld1: {
// LLVM's vld1 and vst1 intrinsics currently only support a single
// vector register.
uint64_t Size =
TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize;
if (isNoAlias(Location(II->getArgOperand(0), Size,
II->getMetadata(LLVMContext::MD_tbaa)),
Loc))
return NoModRef;
break;
}
case Intrinsic::arm_neon_vst1: {
uint64_t Size =
TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize;
if (isNoAlias(Location(II->getArgOperand(0), Size,
II->getMetadata(LLVMContext::MD_tbaa)),
Loc))
return NoModRef;
break;
}
}
// We can bound the aliasing properties of memset_pattern16 just as we can
// for memcpy/memset. This is particularly important because the
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
else if (TLI.has(LibFunc::memset_pattern16) &&
CS.getCalledFunction() &&
CS.getCalledFunction()->getName() == "memset_pattern16") {
const Function *MS = CS.getCalledFunction();
FunctionType *MemsetType = MS->getFunctionType();
if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 &&
isa<PointerType>(MemsetType->getParamType(0)) &&
isa<PointerType>(MemsetType->getParamType(1)) &&
isa<IntegerType>(MemsetType->getParamType(2))) {
uint64_t Len = UnknownSize;
if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
Len = LenCI->getZExtValue();
const Value *Dest = CS.getArgument(0);
const Value *Src = CS.getArgument(1);
// If it can't overlap the source dest, then it doesn't modref the loc.
if (isNoAlias(Location(Dest, Len), Loc)) {
// Always reads 16 bytes of the source.
if (isNoAlias(Location(Src, 16), Loc))
return NoModRef;
// If it can't overlap the dest, then worst case it reads the loc.
Min = Ref;
// Always reads 16 bytes of the source.
} else if (isNoAlias(Location(Src, 16), Loc)) {
// If it can't overlap the source, then worst case it mutates the loc.
Min = Mod;
}
}
}
// The AliasAnalysis base class has some smarts, lets use them.
return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
}
static bool areVarIndicesEqual(SmallVectorImpl<VariableGEPIndex> &Indices1,
SmallVectorImpl<VariableGEPIndex> &Indices2) {
unsigned Size1 = Indices1.size();
unsigned Size2 = Indices2.size();
if (Size1 != Size2)
return false;
for (unsigned I = 0; I != Size1; ++I)
if (Indices1[I] != Indices2[I])
return false;
return true;
}
/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
/// against another pointer. We know that V1 is a GEP, but we don't know
/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD),
/// UnderlyingV2 is the same for V2.
///
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const MDNode *V1TBAAInfo,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAAInfo,
const Value *UnderlyingV1,
const Value *UnderlyingV2) {
int64_t GEP1BaseOffset;
SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
// If we have two gep instructions with must-alias or not-alias'ing base
// pointers, figure out if the indexes to the GEP tell us anything about the
// derived pointer.
if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
// Do the base pointers alias?
AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
UnderlyingV2, UnknownSize, 0);
// Check for geps of non-aliasing underlying pointers where the offsets are
// identical.
if ((BaseAlias == MayAlias) && V1Size == V2Size) {
// Do the base pointers alias assuming type and size.
AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size,
V1TBAAInfo, UnderlyingV2,
V2Size, V2TBAAInfo);
if (PreciseBaseAlias == NoAlias) {
// See if the computed offset from the common pointer tells us about the
// relation of the resulting pointer.
int64_t GEP2BaseOffset;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
assert(TD == 0 &&
"DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
// Same offsets.
if (GEP1BaseOffset == GEP2BaseOffset &&
areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices))
return NoAlias;
GEP1VariableIndices.clear();
}
}
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
if (BaseAlias != MustAlias) return BaseAlias;
// Otherwise, we have a MustAlias. Since the base pointers alias each other
// exactly, see if the computed offset from the common pointer tells us
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
int64_t GEP2BaseOffset;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
assert(TD == 0 &&
"DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
// Subtract the GEP2 pointer from the GEP1 pointer to find out their
// symbolic difference.
GEP1BaseOffset -= GEP2BaseOffset;
GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
} else {
// Check to see if these two pointers are related by the getelementptr
// instruction. If one pointer is a GEP with a non-zero index of the other
// pointer, we know they cannot alias.
// If both accesses are unknown size, we can't do anything useful here.
if (V1Size == UnknownSize && V2Size == UnknownSize)
return MayAlias;
AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0,
V2, V2Size, V2TBAAInfo);
if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
// cannot alias per GEP semantics: "A pointer value formed from a
// getelementptr instruction is associated with the addresses associated
// with the first operand of the getelementptr".
return R;
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1) {
assert(TD == 0 &&
"DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
}
// In the two GEP Case, if there is no difference in the offsets of the
// computed pointers, the resultant pointers are a must alias. This
// hapens when we have two lexically identical GEP's (for example).
//
// In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2
// must aliases the GEP, the end result is a must alias also.
if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
return MustAlias;
// If there is a constant difference between the pointers, but the difference
// is less than the size of the associated memory object, then we know
// that the objects are partially overlapping. If the difference is
// greater, we know they do not overlap.
if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
if (GEP1BaseOffset >= 0) {
if (V2Size != UnknownSize) {
if ((uint64_t)GEP1BaseOffset < V2Size)
return PartialAlias;
return NoAlias;
}
} else {
- if (V1Size != UnknownSize) {
+ // We have the situation where:
+ // + +
+ // | BaseOffset |
+ // ---------------->|
+ // |-->V1Size |-------> V2Size
+ // GEP1 V2
+ // We need to know that V2Size is not unknown, otherwise we might have
+ // stripped a gep with negative index ('gep <ptr>, -1, ...).
+ if (V1Size != UnknownSize && V2Size != UnknownSize) {
if (-(uint64_t)GEP1BaseOffset < V1Size)
return PartialAlias;
return NoAlias;
}
}
}
// Try to distinguish something like &A[i][1] against &A[42][0].
// Grab the least significant bit set in any of the scales.
if (!GEP1VariableIndices.empty()) {
uint64_t Modulo = 0;
for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
Modulo = Modulo ^ (Modulo & (Modulo - 1));
// We can compute the difference between the two addresses
// mod Modulo. Check whether that difference guarantees that the
// two locations do not alias.
uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1);
if (V1Size != UnknownSize && V2Size != UnknownSize &&
ModOffset >= V2Size && V1Size <= Modulo - ModOffset)
return NoAlias;
}
// Statically, we can see that the base objects are the same, but the
// pointers have dynamic offsets which we can't resolve. And none of our
// little tricks above worked.
//
// TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
// practical effect of this is protecting TBAA in the case of dynamic
// indices into arrays of unions or malloc'd memory.
return PartialAlias;
}
static AliasAnalysis::AliasResult
MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) {
// If the results agree, take it.
if (A == B)
return A;
// A mix of PartialAlias and MustAlias is PartialAlias.
if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) ||
(B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias))
return AliasAnalysis::PartialAlias;
// Otherwise, we don't know anything.
return AliasAnalysis::MayAlias;
}
/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
/// instruction against another.
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
const MDNode *SITBAAInfo,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAAInfo) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
if (SI->getCondition() == SI2->getCondition()) {
AliasResult Alias =
aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo,
SI2->getTrueValue(), V2Size, V2TBAAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
SI2->getFalseValue(), V2Size, V2TBAAInfo);
return MergeAliasResults(ThisAlias, Alias);
}
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
AliasResult Alias =
aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
return MergeAliasResults(ThisAlias, Alias);
}
// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
// against another.
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
const MDNode *PNTBAAInfo,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAAInfo) {
+ // Track phi nodes we have visited. We use this information when we determine
+ // value equivalence.
+ VisitedPhiBBs.insert(PN->getParent());
+
// If the values are PHIs in the same block, we can do a more precise
// as well as efficient check: just check for aliases between the values
// on corresponding edges.
if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
if (PN2->getParent() == PN->getParent()) {
LocPair Locs(Location(PN, PNSize, PNTBAAInfo),
Location(V2, V2Size, V2TBAAInfo));
if (PN > V2)
std::swap(Locs.first, Locs.second);
// Analyse the PHIs' inputs under the assumption that the PHIs are
// NoAlias.
// If the PHIs are May/MustAlias there must be (recursively) an input
// operand from outside the PHIs' cycle that is MayAlias/MustAlias or
// there must be an operation on the PHIs within the PHIs' value cycle
// that causes a MayAlias.
// Pretend the phis do not alias.
AliasResult Alias = NoAlias;
assert(AliasCache.count(Locs) &&
"There must exist an entry for the phi node");
AliasResult OrigAliasResult = AliasCache[Locs];
AliasCache[Locs] = NoAlias;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
AliasResult ThisAlias =
aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
V2Size, V2TBAAInfo);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
}
// Reset if speculation failed.
if (Alias != NoAlias)
AliasCache[Locs] = OrigAliasResult;
return Alias;
}
SmallPtrSet<Value*, 4> UniqueSrc;
SmallVector<Value*, 4> V1Srcs;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *PV1 = PN->getIncomingValue(i);
if (isa<PHINode>(PV1))
// If any of the source itself is a PHI, return MayAlias conservatively
// to avoid compile time explosion. The worst possible case is if both
// sides are PHI nodes. In which case, this is O(m x n) time where 'm'
// and 'n' are the number of PHI sources.
return MayAlias;
if (UniqueSrc.insert(PV1))
V1Srcs.push_back(PV1);
}
AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo,
V1Srcs[0], PNSize, PNTBAAInfo);
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
if (Alias == MayAlias)
return MayAlias;
// If all sources of the PHI node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
Value *V = V1Srcs[i];
AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
V, PNSize, PNTBAAInfo);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
}
return Alias;
}
// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
// such as array references.
//
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
const MDNode *V1TBAAInfo,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAAInfo) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
if (V1Size == 0 || V2Size == 0)
return NoAlias;
// Strip off any casts if they exist.
V1 = V1->stripPointerCasts();
V2 = V2->stripPointerCasts();
// Are we checking for alias of the same value?
- if (V1 == V2) return MustAlias;
+ // Because we look 'through' phi nodes we could look at "Value" pointers from
+ // different iterations. We must therefore make sure that this is not the
+ // case. The function isValueEqualInPotentialCycles ensures that this cannot
+ // happen by looking at the visited phi nodes and making sure they cannot
+ // reach the value.
+ if (isValueEqualInPotentialCycles(V1, V2))
+ return MustAlias;
if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
return NoAlias; // Scalars cannot alias each other
// Figure out what objects these things are pointing to if we can.
const Value *O1 = GetUnderlyingObject(V1, TD);
const Value *O2 = GetUnderlyingObject(V2, TD);
// Null values in the default address space don't point to any object, so they
// don't alias any other pointer.
if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1))
if (CPN->getType()->getAddressSpace() == 0)
return NoAlias;
if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2))
if (CPN->getType()->getAddressSpace() == 0)
return NoAlias;
if (O1 != O2) {
// If V1/V2 point to two different objects we know that we have no alias.
if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
return NoAlias;
// Constant pointers can't alias with non-const isIdentifiedObject objects.
if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) ||
(isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
return NoAlias;
// Function arguments can't alias with things that are known to be
// unambigously identified at the function level.
if ((isa<Argument>(O1) && isIdentifiedFunctionLocal(O2)) ||
(isa<Argument>(O2) && isIdentifiedFunctionLocal(O1)))
return NoAlias;
// Most objects can't alias null.
if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
(isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
return NoAlias;
// If one pointer is the result of a call/invoke or load and the other is a
// non-escaping local object within the same function, then we know the
// object couldn't escape to a point where the call could return it.
//
// Note that if the pointers are in different functions, there are a
// variety of complications. A call with a nocapture argument may still
// temporary store the nocapture argument's value in a temporary memory
// location if that memory location doesn't escape. Or it may pass a
// nocapture value to other functions as long as they don't capture it.
if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
return NoAlias;
if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
return NoAlias;
}
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
if (TD)
if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) ||
(V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI)))
return NoAlias;
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
Location(V2, V2Size, V2TBAAInfo));
if (V1 > V2)
std::swap(Locs.first, Locs.second);
std::pair<AliasCacheTy::iterator, bool> Pair =
AliasCache.insert(std::make_pair(Locs, MayAlias));
if (!Pair.second)
return Pair.first->second;
// FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
// GEP can't simplify, we don't even look at the PHI cases.
if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
std::swap(V1, V2);
std::swap(V1Size, V2Size);
std::swap(O1, O2);
std::swap(V1TBAAInfo, V2TBAAInfo);
}
if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
AliasResult Result = aliasGEP(GV1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo, O1, O2);
if (Result != MayAlias) return AliasCache[Locs] = Result;
}
if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
std::swap(V1, V2);
std::swap(V1Size, V2Size);
std::swap(V1TBAAInfo, V2TBAAInfo);
}
if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
V2, V2Size, V2TBAAInfo);
if (Result != MayAlias) return AliasCache[Locs] = Result;
}
if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
std::swap(V1, V2);
std::swap(V1Size, V2Size);
std::swap(V1TBAAInfo, V2TBAAInfo);
}
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
V2, V2Size, V2TBAAInfo);
if (Result != MayAlias) return AliasCache[Locs] = Result;
}
// If both pointers are pointing into the same object and one of them
// accesses is accessing the entire object, then the accesses must
// overlap in some way.
if (TD && O1 == O2)
if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD, *TLI)) ||
(V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD, *TLI)))
return AliasCache[Locs] = PartialAlias;
AliasResult Result =
AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
Location(V2, V2Size, V2TBAAInfo));
return AliasCache[Locs] = Result;
+}
+
+bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
+ const Value *V2) {
+ if (V != V2)
+ return false;
+
+ const Instruction *Inst = dyn_cast<Instruction>(V);
+ if (!Inst)
+ return true;
+
+ if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
+ return false;
+
+ // Use dominance or loop info if available.
+ DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
+
+ // Make sure that the visited phis cannot reach the Value. This ensures that
+ // the Values cannot come from different iterations of a potential cycle the
+ // phi nodes could be involved in.
+ for (SmallPtrSet<const BasicBlock *, 8>::iterator PI = VisitedPhiBBs.begin(),
+ PE = VisitedPhiBBs.end();
+ PI != PE; ++PI)
+ if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI))
+ return false;
+
+ return true;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers.
+void BasicAliasAnalysis::GetIndexDifference(
+ SmallVectorImpl<VariableGEPIndex> &Dest,
+ const SmallVectorImpl<VariableGEPIndex> &Src) {
+ if (Src.empty())
+ return;
+
+ for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+ const Value *V = Src[i].V;
+ ExtensionKind Extension = Src[i].Extension;
+ int64_t Scale = Src[i].Scale;
+
+ // Find V in Dest. This is N^2, but pointer indices almost never have more
+ // than a few variable indexes.
+ for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+ if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
+ Dest[j].Extension != Extension)
+ continue;
+
+ // If we found it, subtract off Scale V's from the entry in Dest. If it
+ // goes to zero, remove the entry.
+ if (Dest[j].Scale != Scale)
+ Dest[j].Scale -= Scale;
+ else
+ Dest.erase(Dest.begin() + j);
+ Scale = 0;
+ break;
+ }
+
+ // If we didn't consume this entry, add it to the end of the Dest list.
+ if (Scale) {
+ VariableGEPIndex Entry = { V, Extension, -Scale };
+ Dest.push_back(Entry);
+ }
+ }
}
Index: head/contrib/llvm/lib/Analysis/IVUsers.cpp
===================================================================
--- head/contrib/llvm/lib/Analysis/IVUsers.cpp (revision 265924)
+++ head/contrib/llvm/lib/Analysis/IVUsers.cpp (revision 265925)
@@ -1,335 +1,354 @@
//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements bookkeeping for "interesting" users of expressions
// computed from induction variables.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "iv-users"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
char IVUsers::ID = 0;
INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
"Induction Variable Users", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(IVUsers, "iv-users",
"Induction Variable Users", false, true)
Pass *llvm::createIVUsersPass() {
return new IVUsers();
}
/// isInteresting - Test whether the given expression is "interesting" when
/// used by the given expression, within the context of analyzing the
/// given loop.
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
ScalarEvolution *SE, LoopInfo *LI) {
// An addrec is interesting if it's affine or if it has an interesting start.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Keep things simple. Don't touch loop-variant strides unless they're
// only used outside the loop and we can simplify them.
if (AR->getLoop() == L)
return AR->isAffine() ||
(!L->contains(I) &&
SE->getSCEVAtScope(AR, LI->getLoopFor(I->getParent())) != AR);
// Otherwise recurse to see if the start value is interesting, and that
// the step value is not interesting, since we don't yet know how to
// do effective SCEV expansions for addrecs with interesting steps.
return isInteresting(AR->getStart(), I, L, SE, LI) &&
!isInteresting(AR->getStepRecurrence(*SE), I, L, SE, LI);
}
// An add is interesting if exactly one of its operands is interesting.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
bool AnyInterestingYet = false;
for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
OI != OE; ++OI)
if (isInteresting(*OI, I, L, SE, LI)) {
if (AnyInterestingYet)
return false;
AnyInterestingYet = true;
}
return AnyInterestingYet;
}
// Nothing else is interesting here.
return false;
}
/// Return true if all loop headers that dominate this block are in simplified
/// form.
static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
const LoopInfo *LI,
SmallPtrSet<Loop*,16> &SimpleLoopNests) {
Loop *NearestLoop = 0;
for (DomTreeNode *Rung = DT->getNode(BB);
Rung; Rung = Rung->getIDom()) {
BasicBlock *DomBB = Rung->getBlock();
Loop *DomLoop = LI->getLoopFor(DomBB);
if (DomLoop && DomLoop->getHeader() == DomBB) {
// If the domtree walk reaches a loop with no preheader, return false.
if (!DomLoop->isLoopSimplifyForm())
return false;
// If we have already checked this loop nest, stop checking.
if (SimpleLoopNests.count(DomLoop))
break;
// If we have not already checked this loop nest, remember the loop
// header nearest to BB. The nearest loop may not contain BB.
if (!NearestLoop)
NearestLoop = DomLoop;
}
}
if (NearestLoop)
SimpleLoopNests.insert(NearestLoop);
return true;
}
/// AddUsersImpl - Inspect the specified instruction. If it is a
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
/// return true. Otherwise, return false.
bool IVUsers::AddUsersImpl(Instruction *I,
SmallPtrSet<Loop*,16> &SimpleLoopNests) {
// Add this IV user to the Processed set before returning false to ensure that
// all IV users are members of the set. See IVUsers::isIVUserOrOperand.
if (!Processed.insert(I))
return true; // Instruction already handled.
if (!SE->isSCEVable(I->getType()))
return false; // Void and FP expressions cannot be reduced.
// IVUsers is used by LSR which assumes that all SCEV expressions are safe to
// pass to SCEVExpander. Expressions are not safe to expand if they represent
// operations that are not safe to speculate, namely integer division.
if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, TD))
return false;
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
// Also avoid creating IVs of non-native types. For example, we don't want a
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
uint64_t Width = SE->getTypeSizeInBits(I->getType());
if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
return false;
// Get the symbolic expression for this instruction.
const SCEV *ISE = SE->getSCEV(I);
// If we've come to an uninteresting expression, stop the traversal and
// call this a user.
if (!isInteresting(ISE, I, L, SE, LI))
return false;
SmallPtrSet<Instruction *, 4> UniqueUsers;
for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
if (!UniqueUsers.insert(User))
continue;
// Do not infinitely recurse on PHI nodes.
if (isa<PHINode>(User) && Processed.count(User))
continue;
// Only consider IVUsers that are dominated by simplified loop
// headers. Otherwise, SCEVExpander will crash.
BasicBlock *UseBB = User->getParent();
// A phi's use is live out of its predecessor block.
if (PHINode *PHI = dyn_cast<PHINode>(User)) {
unsigned OperandNo = UI.getOperandNo();
unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
UseBB = PHI->getIncomingBlock(ValNo);
}
if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests))
return false;
// Descend recursively, but not into PHI nodes outside the current loop.
// It's important to see the entire expression outside the loop to get
// choices that depend on addressing mode use right, although we won't
// consider references outside the loop in all cases.
// If User is already in Processed, we don't want to recurse into it again,
// but do want to record a second reference in the same instruction.
bool AddUserToIVUsers = false;
if (LI->getLoopFor(User->getParent()) != L) {
if (isa<PHINode>(User) || Processed.count(User) ||
!AddUsersImpl(User, SimpleLoopNests)) {
DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
}
} else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) {
DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
}
if (AddUserToIVUsers) {
// Okay, we found a user that we cannot reduce.
- IVUses.push_back(new IVStrideUse(this, User, I));
- IVStrideUse &NewUse = IVUses.back();
+ IVStrideUse &NewUse = AddUser(User, I);
// Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
// The regular return value here is discarded; instead of recording
// it, we just recompute it when we need it.
+ const SCEV *OriginalISE = ISE;
ISE = TransformForPostIncUse(NormalizeAutodetect,
ISE, User, I,
NewUse.PostIncLoops,
*SE, *DT);
+
+ // PostIncNormalization effectively simplifies the expression under
+ // pre-increment assumptions. Those assumptions (no wrapping) might not
+ // hold for the post-inc value. Catch such cases by making sure the
+ // transformation is invertible.
+ if (OriginalISE != ISE) {
+ const SCEV *DenormalizedISE =
+ TransformForPostIncUse(Denormalize, ISE, User, I,
+ NewUse.PostIncLoops, *SE, *DT);
+
+ // If we normalized the expression, but denormalization doesn't give the
+ // original one, discard this user.
+ if (OriginalISE != DenormalizedISE) {
+ DEBUG(dbgs() << " DISCARDING (NORMALIZATION ISN'T INVERTIBLE): "
+ << *ISE << '\n');
+ IVUses.pop_back();
+ return false;
+ }
+ }
DEBUG(if (SE->getSCEV(I) != ISE)
dbgs() << " NORMALIZED TO: " << *ISE << '\n');
}
}
return true;
}
bool IVUsers::AddUsersIfInteresting(Instruction *I) {
// SCEVExpander can only handle users that are dominated by simplified loop
// entries. Keep track of all loops that are only dominated by other simple
// loops so we don't traverse the domtree for each user.
SmallPtrSet<Loop*,16> SimpleLoopNests;
return AddUsersImpl(I, SimpleLoopNests);
}
IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
IVUses.push_back(new IVStrideUse(this, User, Operand));
return IVUses.back();
}
IVUsers::IVUsers()
: LoopPass(ID) {
initializeIVUsersPass(*PassRegistry::getPassRegistry());
}
void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LoopInfo>();
AU.addRequired<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.setPreservesAll();
}
bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
L = l;
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
TD = getAnalysisIfAvailable<DataLayout>();
// Find all uses of induction variables in this loop, and categorize
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
(void)AddUsersIfInteresting(I);
return false;
}
void IVUsers::print(raw_ostream &OS, const Module *M) const {
OS << "IV Users for loop ";
WriteAsOperand(OS, L->getHeader(), false);
if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
OS << " with backedge-taken count "
<< *SE->getBackedgeTakenCount(L);
}
OS << ":\n";
for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
E = IVUses.end(); UI != E; ++UI) {
OS << " ";
WriteAsOperand(OS, UI->getOperandValToReplace(), false);
OS << " = " << *getReplacementExpr(*UI);
for (PostIncLoopSet::const_iterator
I = UI->PostIncLoops.begin(),
E = UI->PostIncLoops.end(); I != E; ++I) {
OS << " (post-inc with loop ";
WriteAsOperand(OS, (*I)->getHeader(), false);
OS << ")";
}
OS << " in ";
UI->getUser()->print(OS);
OS << '\n';
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void IVUsers::dump() const {
print(dbgs());
}
#endif
void IVUsers::releaseMemory() {
Processed.clear();
IVUses.clear();
}
/// getReplacementExpr - Return a SCEV expression which computes the
/// value of the OperandValToReplace.
const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
return SE->getSCEV(IU.getOperandValToReplace());
}
/// getExpr - Return the expression for the use.
const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
return
TransformForPostIncUse(Normalize, getReplacementExpr(IU),
IU.getUser(), IU.getOperandValToReplace(),
const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
*SE, *DT);
}
static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
if (AR->getLoop() == L)
return AR;
return findAddRecForLoop(AR->getStart(), L);
}
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
return AR;
return 0;
}
return 0;
}
const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
return AR->getStepRecurrence(*SE);
return 0;
}
void IVStrideUse::transformToPostInc(const Loop *L) {
PostIncLoops.insert(L);
}
void IVStrideUse::deleted() {
// Remove this user from the list.
Parent->Processed.erase(this->getUser());
Parent->IVUses.erase(this);
// this now dangles!
}
Index: head/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
===================================================================
--- head/contrib/llvm/lib/Analysis/ScalarEvolution.cpp (revision 265924)
+++ head/contrib/llvm/lib/Analysis/ScalarEvolution.cpp (revision 265925)
@@ -1,7720 +1,7720 @@
//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the implementation of the scalar evolution analysis
// engine, which is used primarily to analyze expressions involving induction
// variables in loops.
//
// There are several aspects to this library. First is the representation of
// scalar expressions, which are represented as subclasses of the SCEV class.
// These classes are used to represent certain types of subexpressions that we
// can handle. We only create one SCEV of a particular shape, so
// pointer-comparisons for equality are legal.
//
// One important aspect of the SCEV objects is that they are never cyclic, even
// if there is a cycle in the dataflow for an expression (ie, a PHI node). If
// the PHI node is one of the idioms that we can represent (e.g., a polynomial
// recurrence) then we represent it directly as a recurrence node, otherwise we
// represent it as a SCEVUnknown node.
//
// In addition to being able to represent expressions of various types, we also
// have folders that are used to build the *canonical* representation for a
// particular expression. These folders are capable of using a variety of
// rewrite rules to simplify the expressions.
//
// Once the folders are defined, we can implement the more interesting
// higher-level code, such as the code that recognizes PHI nodes of various
// types, computes the execution count of a loop, etc.
//
// TODO: We should use these routines and value representations to implement
// dependence analysis!
//
//===----------------------------------------------------------------------===//
//
// There are several good references for the techniques used in this analysis.
//
// Chains of recurrences -- a method to expedite the evaluation
// of closed-form functions
// Olaf Bachmann, Paul S. Wang, Eugene V. Zima
//
// On computational properties of chains of recurrences
// Eugene V. Zima
//
// Symbolic Evaluation of Chains of Recurrences for Loop Optimization
// Robert A. van Engelen
//
// Efficient Symbolic Analysis for Optimizing Compilers
// Robert A. van Engelen
//
// Using the chains of recurrences algebra for data dependence testing and
// induction variable substitution
// MS Thesis, Johnie Birch
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "scalar-evolution"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include <algorithm>
using namespace llvm;
STATISTIC(NumArrayLenItCounts,
"Number of trip counts computed with array length");
STATISTIC(NumTripCountsComputed,
"Number of loops with predictable loop counts");
STATISTIC(NumTripCountsNotComputed,
"Number of loops without predictable loop counts");
STATISTIC(NumBruteForceTripCountsComputed,
"Number of loops with trip counts computed by force");
static cl::opt<unsigned>
MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
cl::desc("Maximum number of iterations SCEV will "
"symbolically execute a constant "
"derived loop"),
cl::init(100));
// FIXME: Enable this with XDEBUG when the test suite is clean.
static cl::opt<bool>
VerifySCEV("verify-scev",
cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
char ScalarEvolution::ID = 0;
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Implementation of the SCEV class.
//
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void SCEV::dump() const {
print(dbgs());
dbgs() << '\n';
}
#endif
void SCEV::print(raw_ostream &OS) const {
switch (getSCEVType()) {
case scConstant:
WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
return;
case scTruncate: {
const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
const SCEV *Op = Trunc->getOperand();
OS << "(trunc " << *Op->getType() << " " << *Op << " to "
<< *Trunc->getType() << ")";
return;
}
case scZeroExtend: {
const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
const SCEV *Op = ZExt->getOperand();
OS << "(zext " << *Op->getType() << " " << *Op << " to "
<< *ZExt->getType() << ")";
return;
}
case scSignExtend: {
const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
const SCEV *Op = SExt->getOperand();
OS << "(sext " << *Op->getType() << " " << *Op << " to "
<< *SExt->getType() << ")";
return;
}
case scAddRecExpr: {
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
OS << "{" << *AR->getOperand(0);
for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
OS << ",+," << *AR->getOperand(i);
OS << "}<";
if (AR->getNoWrapFlags(FlagNUW))
OS << "nuw><";
if (AR->getNoWrapFlags(FlagNSW))
OS << "nsw><";
if (AR->getNoWrapFlags(FlagNW) &&
!AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
OS << "nw><";
WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
OS << ">";
return;
}
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
const char *OpStr = 0;
switch (NAry->getSCEVType()) {
case scAddExpr: OpStr = " + "; break;
case scMulExpr: OpStr = " * "; break;
case scUMaxExpr: OpStr = " umax "; break;
case scSMaxExpr: OpStr = " smax "; break;
}
OS << "(";
for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
I != E; ++I) {
OS << **I;
if (llvm::next(I) != E)
OS << OpStr;
}
OS << ")";
switch (NAry->getSCEVType()) {
case scAddExpr:
case scMulExpr:
if (NAry->getNoWrapFlags(FlagNUW))
OS << "<nuw>";
if (NAry->getNoWrapFlags(FlagNSW))
OS << "<nsw>";
}
return;
}
case scUDivExpr: {
const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
return;
}
case scUnknown: {
const SCEVUnknown *U = cast<SCEVUnknown>(this);
Type *AllocTy;
if (U->isSizeOf(AllocTy)) {
OS << "sizeof(" << *AllocTy << ")";
return;
}
if (U->isAlignOf(AllocTy)) {
OS << "alignof(" << *AllocTy << ")";
return;
}
Type *CTy;
Constant *FieldNo;
if (U->isOffsetOf(CTy, FieldNo)) {
OS << "offsetof(" << *CTy << ", ";
WriteAsOperand(OS, FieldNo, false);
OS << ")";
return;
}
// Otherwise just print it normally.
WriteAsOperand(OS, U->getValue(), false);
return;
}
case scCouldNotCompute:
OS << "***COULDNOTCOMPUTE***";
return;
default: break;
}
llvm_unreachable("Unknown SCEV kind!");
}
Type *SCEV::getType() const {
switch (getSCEVType()) {
case scConstant:
return cast<SCEVConstant>(this)->getType();
case scTruncate:
case scZeroExtend:
case scSignExtend:
return cast<SCEVCastExpr>(this)->getType();
case scAddRecExpr:
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr:
return cast<SCEVNAryExpr>(this)->getType();
case scAddExpr:
return cast<SCEVAddExpr>(this)->getType();
case scUDivExpr:
return cast<SCEVUDivExpr>(this)->getType();
case scUnknown:
return cast<SCEVUnknown>(this)->getType();
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
default:
llvm_unreachable("Unknown SCEV kind!");
}
}
bool SCEV::isZero() const {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
return SC->getValue()->isZero();
return false;
}
bool SCEV::isOne() const {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
return SC->getValue()->isOne();
return false;
}
bool SCEV::isAllOnesValue() const {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
return SC->getValue()->isAllOnesValue();
return false;
}
/// isNonConstantNegative - Return true if the specified scev is negated, but
/// not a constant.
bool SCEV::isNonConstantNegative() const {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
if (!Mul) return false;
// If there is a constant factor, it will be first.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
SCEVCouldNotCompute::SCEVCouldNotCompute() :
SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
bool SCEVCouldNotCompute::classof(const SCEV *S) {
return S->getSCEVType() == scCouldNotCompute;
}
const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
FoldingSetNodeID ID;
ID.AddInteger(scConstant);
ID.AddPointer(V);
void *IP = 0;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
return getConstant(ConstantInt::get(getContext(), Val));
}
const SCEV *
ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
return getConstant(ConstantInt::get(ITy, V, isSigned));
}
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
unsigned SCEVTy, const SCEV *op, Type *ty)
: SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scTruncate, op, ty) {
assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate non-integer value!");
}
SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scZeroExtend, op, ty) {
assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot zero extend non-integer value!");
}
SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scSignExtend, op, ty) {
assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot sign extend non-integer value!");
}
void SCEVUnknown::deleted() {
// Clear this SCEVUnknown from various maps.
SE->forgetMemoizedResults(this);
// Remove this SCEVUnknown from the uniquing map.
SE->UniqueSCEVs.RemoveNode(this);
// Release the value.
setValPtr(0);
}
void SCEVUnknown::allUsesReplacedWith(Value *New) {
// Clear this SCEVUnknown from various maps.
SE->forgetMemoizedResults(this);
// Remove this SCEVUnknown from the uniquing map.
SE->UniqueSCEVs.RemoveNode(this);
// Update this SCEVUnknown to point to the new value. This is needed
// because there may still be outstanding SCEVs which still point to
// this SCEVUnknown.
setValPtr(New);
}
bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
CE->getOperand(0)->isNullValue() &&
CE->getNumOperands() == 2)
if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
if (CI->isOne()) {
AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
->getElementType();
return true;
}
return false;
}
bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
CE->getOperand(0)->isNullValue()) {
Type *Ty =
cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
if (StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked() &&
CE->getNumOperands() == 3 &&
CE->getOperand(1)->isNullValue()) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
if (CI->isOne() &&
STy->getNumElements() == 2 &&
STy->getElementType(0)->isIntegerTy(1)) {
AllocTy = STy->getElementType(1);
return true;
}
}
}
return false;
}
bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
CE->getNumOperands() == 3 &&
CE->getOperand(0)->isNullValue() &&
CE->getOperand(1)->isNullValue()) {
Type *Ty =
cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
// Ignore vector types here so that ScalarEvolutionExpander doesn't
// emit getelementptrs that index into vectors.
if (Ty->isStructTy() || Ty->isArrayTy()) {
CTy = Ty;
FieldNo = CE->getOperand(2);
return true;
}
}
return false;
}
//===----------------------------------------------------------------------===//
// SCEV Utilities
//===----------------------------------------------------------------------===//
namespace {
/// SCEVComplexityCompare - Return true if the complexity of the LHS is less
/// than the complexity of the RHS. This comparator is used to canonicalize
/// expressions.
class SCEVComplexityCompare {
const LoopInfo *const LI;
public:
explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
// Return true or false if LHS is less than, or at least RHS, respectively.
bool operator()(const SCEV *LHS, const SCEV *RHS) const {
return compare(LHS, RHS) < 0;
}
// Return negative, zero, or positive, if LHS is less than, equal to, or
// greater than RHS, respectively. A three-way result allows recursive
// comparisons to be more efficient.
int compare(const SCEV *LHS, const SCEV *RHS) const {
// Fast-path: SCEVs are uniqued so we can do a quick equality check.
if (LHS == RHS)
return 0;
// Primarily, sort the SCEVs by their getSCEVType().
unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
if (LType != RType)
return (int)LType - (int)RType;
// Aside from the getSCEVType() ordering, the particular ordering
// isn't very important except that it's beneficial to be consistent,
// so that (a + b) and (b + a) don't end up as different expressions.
switch (LType) {
case scUnknown: {
const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
// Sort SCEVUnknown values with some loose heuristics. TODO: This is
// not as complete as it could be.
const Value *LV = LU->getValue(), *RV = RU->getValue();
// Order pointer values after integer values. This helps SCEVExpander
// form GEPs.
bool LIsPointer = LV->getType()->isPointerTy(),
RIsPointer = RV->getType()->isPointerTy();
if (LIsPointer != RIsPointer)
return (int)LIsPointer - (int)RIsPointer;
// Compare getValueID values.
unsigned LID = LV->getValueID(),
RID = RV->getValueID();
if (LID != RID)
return (int)LID - (int)RID;
// Sort arguments by their position.
if (const Argument *LA = dyn_cast<Argument>(LV)) {
const Argument *RA = cast<Argument>(RV);
unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
return (int)LArgNo - (int)RArgNo;
}
// For instructions, compare their loop depth, and their operand
// count. This is pretty loose.
if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
const Instruction *RInst = cast<Instruction>(RV);
// Compare loop depths.
const BasicBlock *LParent = LInst->getParent(),
*RParent = RInst->getParent();
if (LParent != RParent) {
unsigned LDepth = LI->getLoopDepth(LParent),
RDepth = LI->getLoopDepth(RParent);
if (LDepth != RDepth)
return (int)LDepth - (int)RDepth;
}
// Compare the number of operands.
unsigned LNumOps = LInst->getNumOperands(),
RNumOps = RInst->getNumOperands();
return (int)LNumOps - (int)RNumOps;
}
return 0;
}
case scConstant: {
const SCEVConstant *LC = cast<SCEVConstant>(LHS);
const SCEVConstant *RC = cast<SCEVConstant>(RHS);
// Compare constant values.
const APInt &LA = LC->getValue()->getValue();
const APInt &RA = RC->getValue()->getValue();
unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
if (LBitWidth != RBitWidth)
return (int)LBitWidth - (int)RBitWidth;
return LA.ult(RA) ? -1 : 1;
}
case scAddRecExpr: {
const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
// Compare addrec loop depths.
const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
if (LLoop != RLoop) {
unsigned LDepth = LLoop->getLoopDepth(),
RDepth = RLoop->getLoopDepth();
if (LDepth != RDepth)
return (int)LDepth - (int)RDepth;
}
// Addrec complexity grows with operand count.
unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
// Lexicographically compare.
for (unsigned i = 0; i != LNumOps; ++i) {
long X = compare(LA->getOperand(i), RA->getOperand(i));
if (X != 0)
return X;
}
return 0;
}
case scAddExpr:
case scMulExpr:
case scSMaxExpr:
case scUMaxExpr: {
const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
// Lexicographically compare n-ary expressions.
unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
for (unsigned i = 0; i != LNumOps; ++i) {
if (i >= RNumOps)
return 1;
long X = compare(LC->getOperand(i), RC->getOperand(i));
if (X != 0)
return X;
}
return (int)LNumOps - (int)RNumOps;
}
case scUDivExpr: {
const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
// Lexicographically compare udiv expressions.
long X = compare(LC->getLHS(), RC->getLHS());
if (X != 0)
return X;
return compare(LC->getRHS(), RC->getRHS());
}
case scTruncate:
case scZeroExtend:
case scSignExtend: {
const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
// Compare cast expressions by operand.
return compare(LC->getOperand(), RC->getOperand());
}
default:
llvm_unreachable("Unknown SCEV kind!");
}
}
};
}
/// GroupByComplexity - Given a list of SCEV objects, order them by their
/// complexity, and group objects of the same complexity together by value.
/// When this routine is finished, we know that any duplicates in the vector are
/// consecutive and that complexity is monotonically increasing.
///
/// Note that we go take special precautions to ensure that we get deterministic
/// results from this routine. In other words, we don't want the results of
/// this to depend on where the addresses of various SCEV objects happened to
/// land in memory.
///
static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
LoopInfo *LI) {
if (Ops.size() < 2) return; // Noop
if (Ops.size() == 2) {
// This is the common case, which also happens to be trivially simple.
// Special case it.
const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
if (SCEVComplexityCompare(LI)(RHS, LHS))
std::swap(LHS, RHS);
return;
}
// Do the rough sort by complexity.
std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
// Now that we are sorted by complexity, group elements of the same
// complexity. Note that this is, at worst, N^2, but the vector is likely to
// be extremely short in practice. Note that we take this approach because we
// do not want to depend on the addresses of the objects we are grouping.
for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
const SCEV *S = Ops[i];
unsigned Complexity = S->getSCEVType();
// If there are any objects of the same complexity and same value as this
// one, group them.
for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
if (Ops[j] == S) { // Found a duplicate.
// Move it to immediately after i'th element.
std::swap(Ops[i+1], Ops[j]);
++i; // no need to rescan it.
if (i == e-2) return; // Done!
}
}
}
}
//===----------------------------------------------------------------------===//
// Simple SCEV method implementations
//===----------------------------------------------------------------------===//
/// BinomialCoefficient - Compute BC(It, K). The result has width W.
/// Assume, K > 0.
static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
ScalarEvolution &SE,
Type *ResultTy) {
// Handle the simplest case efficiently.
if (K == 1)
return SE.getTruncateOrZeroExtend(It, ResultTy);
// We are using the following formula for BC(It, K):
//
// BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
//
// Suppose, W is the bitwidth of the return value. We must be prepared for
// overflow. Hence, we must assure that the result of our computation is
// equal to the accurate one modulo 2^W. Unfortunately, division isn't
// safe in modular arithmetic.
//
// However, this code doesn't use exactly that formula; the formula it uses
// is something like the following, where T is the number of factors of 2 in
// K! (i.e. trailing zeros in the binary representation of K!), and ^ is
// exponentiation:
//
// BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
//
// This formula is trivially equivalent to the previous formula. However,
// this formula can be implemented much more efficiently. The trick is that
// K! / 2^T is odd, and exact division by an odd number *is* safe in modular
// arithmetic. To do exact division in modular arithmetic, all we have
// to do is multiply by the inverse. Therefore, this step can be done at
// width W.
//
// The next issue is how to safely do the division by 2^T. The way this
// is done is by doing the multiplication step at a width of at least W + T
// bits. This way, the bottom W+T bits of the product are accurate. Then,
// when we perform the division by 2^T (which is equivalent to a right shift
// by T), the bottom W bits are accurate. Extra bits are okay; they'll get
// truncated out after the division by 2^T.
//
// In comparison to just directly using the first formula, this technique
// is much more efficient; using the first formula requires W * K bits,
// but this formula less than W + K bits. Also, the first formula requires
// a division step, whereas this formula only requires multiplies and shifts.
//
// It doesn't matter whether the subtraction step is done in the calculation
// width or the input iteration count's width; if the subtraction overflows,
// the result must be zero anyway. We prefer here to do it in the width of
// the induction variable because it helps a lot for certain cases; CodeGen
// isn't smart enough to ignore the overflow, which leads to much less
// efficient code if the width of the subtraction is wider than the native
// register width.
//
// (It's possible to not widen at all by pulling out factors of 2 before
// the multiplication; for example, K=2 can be calculated as
// It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
// extra arithmetic, so it's not an obvious win, and it gets
// much more complicated for K > 3.)
// Protection from insane SCEVs; this bound is conservative,
// but it probably doesn't matter.
if (K > 1000)
return SE.getCouldNotCompute();
unsigned W = SE.getTypeSizeInBits(ResultTy);
// Calculate K! / 2^T and T; we divide out the factors of two before
// multiplying for calculating K! / 2^T to avoid overflow.
// Other overflow doesn't matter because we only care about the bottom
// W bits of the result.
APInt OddFactorial(W, 1);
unsigned T = 1;
for (unsigned i = 3; i <= K; ++i) {
APInt Mult(W, i);
unsigned TwoFactors = Mult.countTrailingZeros();
T += TwoFactors;
Mult = Mult.lshr(TwoFactors);
OddFactorial *= Mult;
}
// We need at least W + T bits for the multiplication step
unsigned CalculationBits = W + T;
// Calculate 2^T, at width T+W.
APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
// Calculate the multiplicative inverse of K! / 2^T;
// this multiplication factor will perform the exact division by
// K! / 2^T.
APInt Mod = APInt::getSignedMinValue(W+1);
APInt MultiplyFactor = OddFactorial.zext(W+1);
MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
MultiplyFactor = MultiplyFactor.trunc(W);
// Calculate the product, at width T+W
IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
CalculationBits);
const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
for (unsigned i = 1; i != K; ++i) {
const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
Dividend = SE.getMulExpr(Dividend,
SE.getTruncateOrZeroExtend(S, CalculationTy));
}
// Divide by 2^T
const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
// Truncate the result, and divide by K! / 2^T.
return SE.getMulExpr(SE.getConstant(MultiplyFactor),
SE.getTruncateOrZeroExtend(DivResult, ResultTy));
}
/// evaluateAtIteration - Return the value of this chain of recurrences at
/// the specified iteration number. We can evaluate this recurrence by
/// multiplying each element in the chain by the binomial coefficient
/// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as:
///
/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
///
/// where BC(It, k) stands for binomial coefficient.
///
const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
ScalarEvolution &SE) const {
const SCEV *Result = getStart();
for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
// The computation is correct in the face of overflow provided that the
// multiplication is performed _after_ the evaluation of the binomial
// coefficient.
const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
if (isa<SCEVCouldNotCompute>(Coeff))
return Coeff;
Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
}
return Result;
}
//===----------------------------------------------------------------------===//
// SCEV Expression folder implementations
//===----------------------------------------------------------------------===//
const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
"This is not a truncating conversion!");
assert(isSCEVable(Ty) &&
"This is not a conversion to a SCEVable type!");
Ty = getEffectiveSCEVType(Ty);
FoldingSetNodeID ID;
ID.AddInteger(scTruncate);
ID.AddPointer(Op);
ID.AddPointer(Ty);
void *IP = 0;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
// trunc(trunc(x)) --> trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
return getTruncateExpr(ST->getOperand(), Ty);
// trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
return getTruncateOrSignExtend(SS->getOperand(), Ty);
// trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
// trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
// eliminate all the truncates.
if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
bool hasTrunc = false;
for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
hasTrunc = isa<SCEVTruncateExpr>(S);
Operands.push_back(S);
}
if (!hasTrunc)
return getAddExpr(Operands);
UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
}
// trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
// eliminate all the truncates.
if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
bool hasTrunc = false;
for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
hasTrunc = isa<SCEVTruncateExpr>(S);
Operands.push_back(S);
}
if (!hasTrunc)
return getMulExpr(Operands);
UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
}
// If the input value is a chrec scev, truncate the chrec's operands.
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
// The cast wasn't folded; create an explicit cast node. We can reuse
// the existing insert position since if we get here, we won't have
// made any changes which would invalidate it.
SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
"This is not a conversion to a SCEVable type!");
Ty = getEffectiveSCEVType(Ty);
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
// zext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getZeroExtendExpr(SZ->getOperand(), Ty);
// Before doing any expensive analysis, check to see if we've already
// computed a SCEV for this Op and Ty.
FoldingSetNodeID ID;
ID.AddInteger(scZeroExtend);
ID.AddPointer(Op);
ID.AddPointer(Ty);
void *IP = 0;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// zext(trunc(x)) --> zext(x) or x or trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
// It's possible the bits taken off by the truncate were all zero bits. If
// so, we should be able to simplify this further.
const SCEV *X = ST->getOperand();
ConstantRange CR = getUnsignedRange(X);
unsigned TruncBits = getTypeSizeInBits(ST->getType());
unsigned NewBits = getTypeSizeInBits(Ty);
if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
CR.zextOrTrunc(NewBits)))
return getTruncateOrZeroExtend(X, Ty);
}
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can zero extend all of the
// operands (often constants). This allows analysis of something like
// this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
if (AR->isAffine()) {
const SCEV *Start = AR->getStart();
const SCEV *Step = AR->getStepRecurrence(*this);
unsigned BitWidth = getTypeSizeInBits(AR->getType());
const Loop *L = AR->getLoop();
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->getNoWrapFlags(SCEV::FlagNUW))
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
getZeroExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
// simply not analyzable, and it covers the case where this code is
// being called from within backedge-taken count analysis, such that
// attempting to ask for the backedge-taken count would likely result
// in infinite recursion. In the later case, the analysis code will
// cope with a conservative value, and it will take care to purge
// that value once it has finished.
const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
// Manually compute the final value for AR, checking for
// overflow.
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
const SCEV *CastedMaxBECount =
getTruncateOrZeroExtend(MaxBECount, Start->getType());
const SCEV *RecastedMaxBECount =
getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
const SCEV *WideMaxBECount =
getZeroExtendExpr(CastedMaxBECount, WideTy);
const SCEV *OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
getZeroExtendExpr(Step, WideTy)));
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NUW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
getZeroExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as signed.
// This covers loops that count down.
OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
getSignExtendExpr(Step, WideTy)));
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NW, which is propagated to this AddRec.
// Negative step causes unsigned wrap, but it still can't self-wrap.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
getSignExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
}
// If the backedge is guarded by a comparison with the pre-inc value
// the addrec is safe. Also, if the entry is guarded by a comparison
// with the start value and the backedge is guarded by a comparison
// with the post-inc value, the addrec is safe.
if (isKnownPositive(Step)) {
const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
getUnsignedRange(Step).getUnsignedMax());
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
(isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
AR->getPostIncExpr(*this), N))) {
// Cache knowledge of AR NUW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
getZeroExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
} else if (isKnownNegative(Step)) {
const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
getSignedRange(Step).getSignedMin());
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
(isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
AR->getPostIncExpr(*this), N))) {
// Cache knowledge of AR NW, which is propagated to this AddRec.
// Negative step causes unsigned wrap, but it still can't self-wrap.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
getSignExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
}
}
}
// The cast wasn't folded; create an explicit cast node.
// Recompute the insert position, as it may have been invalidated.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
// Get the limit of a recurrence such that incrementing by Step cannot cause
// signed overflow as long as the value of the recurrence within the loop does
// not exceed this limit before incrementing.
static const SCEV *getOverflowLimitForStep(const SCEV *Step,
ICmpInst::Predicate *Pred,
ScalarEvolution *SE) {
unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
if (SE->isKnownPositive(Step)) {
*Pred = ICmpInst::ICMP_SLT;
return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
SE->getSignedRange(Step).getSignedMax());
}
if (SE->isKnownNegative(Step)) {
*Pred = ICmpInst::ICMP_SGT;
return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
SE->getSignedRange(Step).getSignedMin());
}
return 0;
}
// The recurrence AR has been shown to have no signed wrap. Typically, if we can
// prove NSW for AR, then we can just as easily prove NSW for its preincrement
// or postincrement sibling. This allows normalizing a sign extended AddRec as
// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
// result, the expression "Step + sext(PreIncAR)" is congruent with
// "sext(PostIncAR)"
static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
Type *Ty,
ScalarEvolution *SE) {
const Loop *L = AR->getLoop();
const SCEV *Start = AR->getStart();
const SCEV *Step = AR->getStepRecurrence(*SE);
// Check for a simple looking step prior to loop entry.
const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
if (!SA)
return 0;
// Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
// subtraction is expensive. For this purpose, perform a quick and dirty
// difference, by checking for Step in the operand list.
SmallVector<const SCEV *, 4> DiffOps;
for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end();
I != E; ++I) {
if (*I != Step)
DiffOps.push_back(*I);
}
if (DiffOps.size() == SA->getNumOperands())
return 0;
// This is a postinc AR. Check for overflow on the preinc recurrence using the
// same three conditions that getSignExtendedExpr checks.
// 1. NSW flags on the step increment.
const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
return PreStart;
// 2. Direct overflow check on the step operation's expression.
unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
const SCEV *OperandExtendedStart =
SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
SE->getSignExtendExpr(Step, WideTy));
if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
// Cache knowledge of PreAR NSW.
if (PreAR)
const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
// FIXME: this optimization needs a unit test
DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
return PreStart;
}
// 3. Loop precondition.
ICmpInst::Predicate Pred;
const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
if (OverflowLimit &&
SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
return PreStart;
}
return 0;
}
// Get the normalized sign-extended expression for this AddRec's Start.
static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
Type *Ty,
ScalarEvolution *SE) {
const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
if (!PreStart)
return SE->getSignExtendExpr(AR->getStart(), Ty);
return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
SE->getSignExtendExpr(PreStart, Ty));
}
const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
"This is not a conversion to a SCEVable type!");
Ty = getEffectiveSCEVType(Ty);
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
// sext(sext(x)) --> sext(x)
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
return getSignExtendExpr(SS->getOperand(), Ty);
// sext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getZeroExtendExpr(SZ->getOperand(), Ty);
// Before doing any expensive analysis, check to see if we've already
// computed a SCEV for this Op and Ty.
FoldingSetNodeID ID;
ID.AddInteger(scSignExtend);
ID.AddPointer(Op);
ID.AddPointer(Ty);
void *IP = 0;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// If the input value is provably positive, build a zext instead.
if (isKnownNonNegative(Op))
return getZeroExtendExpr(Op, Ty);
// sext(trunc(x)) --> sext(x) or x or trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
// It's possible the bits taken off by the truncate were all sign bits. If
// so, we should be able to simplify this further.
const SCEV *X = ST->getOperand();
ConstantRange CR = getSignedRange(X);
unsigned TruncBits = getTypeSizeInBits(ST->getType());
unsigned NewBits = getTypeSizeInBits(Ty);
if (CR.truncate(TruncBits).signExtend(NewBits).contains(
CR.sextOrTrunc(NewBits)))
return getTruncateOrSignExtend(X, Ty);
}
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can sign extend all of the
// operands (often constants). This allows analysis of something like
// this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
if (AR->isAffine()) {
const SCEV *Start = AR->getStart();
const SCEV *Step = AR->getStepRecurrence(*this);
unsigned BitWidth = getTypeSizeInBits(AR->getType());
const Loop *L = AR->getLoop();
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->getNoWrapFlags(SCEV::FlagNSW))
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getSignExtendExpr(Step, Ty),
L, SCEV::FlagNSW);
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
// simply not analyzable, and it covers the case where this code is
// being called from within backedge-taken count analysis, such that
// attempting to ask for the backedge-taken count would likely result
// in infinite recursion. In the later case, the analysis code will
// cope with a conservative value, and it will take care to purge
// that value once it has finished.
const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
// Manually compute the final value for AR, checking for
// overflow.
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
const SCEV *CastedMaxBECount =
getTruncateOrZeroExtend(MaxBECount, Start->getType());
const SCEV *RecastedMaxBECount =
getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
const SCEV *WideMaxBECount =
getZeroExtendExpr(CastedMaxBECount, WideTy);
const SCEV *OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
getSignExtendExpr(Step, WideTy)));
if (SAdd == OperandExtendedAdd) {
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getSignExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as unsigned.
// This covers loops that count up with an unsigned step.
OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
getZeroExtendExpr(Step, WideTy)));
if (SAdd == OperandExtendedAdd) {
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getZeroExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
}
// If the backedge is guarded by a comparison with the pre-inc value
// the addrec is safe. Also, if the entry is guarded by a comparison
// with the start value and the backedge is guarded by a comparison
// with the post-inc value, the addrec is safe.
ICmpInst::Predicate Pred;
const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
if (OverflowLimit &&
(isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
(isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
OverflowLimit)))) {
// Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getSignExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
}
}
// The cast wasn't folded; create an explicit cast node.
// Recompute the insert position, as it may have been invalidated.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
/// getAnyExtendExpr - Return a SCEV for the given operand extended with
/// unspecified bits out to the given type.
///
const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
"This is not a conversion to a SCEVable type!");
Ty = getEffectiveSCEVType(Ty);
// Sign-extend negative constants.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
if (SC->getValue()->getValue().isNegative())
return getSignExtendExpr(Op, Ty);
// Peel off a truncate cast.
if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
const SCEV *NewOp = T->getOperand();
if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
return getAnyExtendExpr(NewOp, Ty);
return getTruncateOrNoop(NewOp, Ty);
}
// Next try a zext cast. If the cast is folded, use it.
const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
if (!isa<SCEVZeroExtendExpr>(ZExt))
return ZExt;
// Next try a sext cast. If the cast is folded, use it.
const SCEV *SExt = getSignExtendExpr(Op, Ty);
if (!isa<SCEVSignExtendExpr>(SExt))
return SExt;
// Force the cast to be folded into the operands of an addrec.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Ops;
for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
I != E; ++I)
Ops.push_back(getAnyExtendExpr(*I, Ty));
return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
}
// If the expression is obviously signed, use the sext cast value.
if (isa<SCEVSMaxExpr>(Op))
return SExt;
// Absent any other information, use the zext cast value.
return ZExt;
}
/// CollectAddOperandsWithScales - Process the given Ops list, which is
/// a list of operands to be added under the given scale, update the given
/// map. This is a helper function for getAddRecExpr. As an example of
/// what it does, given a sequence of operands that would form an add
/// expression like this:
///
/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
///
/// where A and B are constants, update the map with these values:
///
/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
///
/// and add 13 + A*B*29 to AccumulatedConstant.
/// This will allow getAddRecExpr to produce this:
///
/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
///
/// This form often exposes folding opportunities that are hidden in
/// the original operand list.
///
/// Return true iff it appears that any interesting folding opportunities
/// may be exposed. This helps getAddRecExpr short-circuit extra work in
/// the common case where no interesting opportunities are present, and
/// is also used as a check to avoid infinite recursion.
///
static bool
CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
SmallVectorImpl<const SCEV *> &NewOps,
APInt &AccumulatedConstant,
const SCEV *const *Ops, size_t NumOperands,
const APInt &Scale,
ScalarEvolution &SE) {
bool Interesting = false;
// Iterate over the add operands. They are sorted, with constants first.
unsigned i = 0;
while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
++i;
// Pull a buried constant out to the outside.
if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
Interesting = true;
AccumulatedConstant += Scale * C->getValue()->getValue();
}
// Next comes everything else. We're especially interested in multiplies
// here, but they're in the middle, so just visit the rest with one loop.
for (; i != NumOperands; ++i) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
APInt NewScale =
Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
// A multiplication of a constant with another add; recurse.
const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
Interesting |=
CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
Add->op_begin(), Add->getNumOperands(),
NewScale, SE);
} else {
// A multiplication of a constant with some other value. Update
// the map.
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
const SCEV *Key = SE.getMulExpr(MulOps);
std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
M.insert(std::make_pair(Key, NewScale));
if (Pair.second) {
NewOps.push_back(Pair.first->first);
} else {
Pair.first->second += NewScale;
// The map already had an entry for this value, which may indicate
// a folding opportunity.
Interesting = true;
}
}
} else {
// An ordinary operand. Update the map.
std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
M.insert(std::make_pair(Ops[i], Scale));
if (Pair.second) {
NewOps.push_back(Pair.first->first);
} else {
Pair.first->second += Scale;
// The map already had an entry for this value, which may indicate
// a folding opportunity.
Interesting = true;
}
}
}
return Interesting;
}
namespace {
struct APIntCompare {
bool operator()(const APInt &LHS, const APInt &RHS) const {
return LHS.ult(RHS);
}
};
}
/// getAddExpr - Get a canonical add expression, or something simpler if
/// possible.
const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags) {
assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
"only nuw or nsw allowed");
assert(!Ops.empty() && "Cannot get empty add!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVAddExpr operand types don't match!");
#endif
// If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
// And vice-versa.
int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
bool All = true;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
E = Ops.end(); I != E; ++I)
if (!isKnownNonNegative(*I)) {
All = false;
break;
}
if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
}
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, LI);
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
++Idx;
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
Ops[0] = getConstant(LHSC->getValue()->getValue() +
RHSC->getValue()->getValue());
if (Ops.size() == 2) return Ops[0];
Ops.erase(Ops.begin()+1); // Erase the folded element
LHSC = cast<SCEVConstant>(Ops[0]);
}
// If we are left with a constant zero being added, strip it off.
if (LHSC->getValue()->isZero()) {
Ops.erase(Ops.begin());
--Idx;
}
if (Ops.size() == 1) return Ops[0];
}
// Okay, check to see if the same value occurs in the operand list more than
// once. If so, merge them together into an multiply expression. Since we
// sorted the list, these values are required to be adjacent.
Type *Ty = Ops[0]->getType();
bool FoundMatch = false;
for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
// Scan ahead to count how many equal operands there are.
unsigned Count = 2;
while (i+Count != e && Ops[i+Count] == Ops[i])
++Count;
// Merge the values into a multiply.
const SCEV *Scale = getConstant(Ty, Count);
const SCEV *Mul = getMulExpr(Scale, Ops[i]);
if (Ops.size() == Count)
return Mul;
Ops[i] = Mul;
Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
--i; e -= Count - 1;
FoundMatch = true;
}
if (FoundMatch)
return getAddExpr(Ops, Flags);
// Check for truncates. If all the operands are truncated from the same
// type, see if factoring out the truncate would permit the result to be
// folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
// if the contents of the resulting outer trunc fold to something simple.
for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
Type *DstType = Trunc->getType();
Type *SrcType = Trunc->getOperand()->getType();
SmallVector<const SCEV *, 8> LargeOps;
bool Ok = true;
// Check all the operands to see if they can be represented in the
// source type of the truncate.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
if (T->getOperand()->getType() != SrcType) {
Ok = false;
break;
}
LargeOps.push_back(T->getOperand());
} else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
LargeOps.push_back(getAnyExtendExpr(C, SrcType));
} else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
SmallVector<const SCEV *, 8> LargeMulOps;
for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
if (const SCEVTruncateExpr *T =
dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
if (T->getOperand()->getType() != SrcType) {
Ok = false;
break;
}
LargeMulOps.push_back(T->getOperand());
} else if (const SCEVConstant *C =
dyn_cast<SCEVConstant>(M->getOperand(j))) {
LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
} else {
Ok = false;
break;
}
}
if (Ok)
LargeOps.push_back(getMulExpr(LargeMulOps));
} else {
Ok = false;
break;
}
}
if (Ok) {
// Evaluate the expression in the larger type.
const SCEV *Fold = getAddExpr(LargeOps, Flags);
// If it folds to something simple, use it. Otherwise, don't.
if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
return getTruncateExpr(Fold, DstType);
}
}
// Skip past any other cast SCEVs.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
++Idx;
// If there are add operands they would be next.
if (Idx < Ops.size()) {
bool DeletedAdd = false;
while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
// If we have an add, expand the add operands onto the end of the operands
// list.
Ops.erase(Ops.begin()+Idx);
Ops.append(Add->op_begin(), Add->op_end());
DeletedAdd = true;
}
// If we deleted at least one add, we added operands to the end of the list,
// and they are not necessarily sorted. Recurse to resort and resimplify
// any operands we just acquired.
if (DeletedAdd)
return getAddExpr(Ops);
}
// Skip over the add expression until we get to a multiply.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
++Idx;
// Check to see if there are any folding opportunities present with
// operands multiplied by constant values.
if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
uint64_t BitWidth = getTypeSizeInBits(Ty);
DenseMap<const SCEV *, APInt> M;
SmallVector<const SCEV *, 8> NewOps;
APInt AccumulatedConstant(BitWidth, 0);
if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
Ops.data(), Ops.size(),
APInt(BitWidth, 1), *this)) {
// Some interesting folding opportunity is present, so its worthwhile to
// re-generate the operands list. Group the operands by constant scale,
// to avoid multiplying by the same constant scale multiple times.
std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
E = NewOps.end(); I != E; ++I)
MulOpLists[M.find(*I)->second].push_back(*I);
// Re-generate the operands list.
Ops.clear();
if (AccumulatedConstant != 0)
Ops.push_back(getConstant(AccumulatedConstant));
for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
if (I->first != 0)
Ops.push_back(getMulExpr(getConstant(I->first),
getAddExpr(I->second)));
if (Ops.empty())
return getConstant(Ty, 0);
if (Ops.size() == 1)
return Ops[0];
return getAddExpr(Ops);
}
}
// If we are adding something to a multiply expression, make sure the
// something is not already an operand of the multiply. If so, merge it into
// the multiply.
for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
if (isa<SCEVConstant>(MulOpSCEV))
continue;
for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
if (MulOpSCEV == Ops[AddOp]) {
// Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
if (Mul->getNumOperands() != 2) {
// If the multiply has more than two operands, we must get the
// Y*Z term.
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
Mul->op_begin()+MulOp);
MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul = getMulExpr(MulOps);
}
const SCEV *One = getConstant(Ty, 1);
const SCEV *AddOne = getAddExpr(One, InnerMul);
const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
if (Ops.size() == 2) return OuterMul;
if (AddOp < Idx) {
Ops.erase(Ops.begin()+AddOp);
Ops.erase(Ops.begin()+Idx-1);
} else {
Ops.erase(Ops.begin()+Idx);
Ops.erase(Ops.begin()+AddOp-1);
}
Ops.push_back(OuterMul);
return getAddExpr(Ops);
}
// Check this multiply against other multiplies being added together.
for (unsigned OtherMulIdx = Idx+1;
OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
++OtherMulIdx) {
const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
// If MulOp occurs in OtherMul, we can fold the two multiplies
// together.
for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
OMulOp != e; ++OMulOp)
if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
// Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
if (Mul->getNumOperands() != 2) {
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
Mul->op_begin()+MulOp);
MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul1 = getMulExpr(MulOps);
}
const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
if (OtherMul->getNumOperands() != 2) {
SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
OtherMul->op_begin()+OMulOp);
MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
InnerMul2 = getMulExpr(MulOps);
}
const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
if (Ops.size() == 2) return OuterMul;
Ops.erase(Ops.begin()+Idx);
Ops.erase(Ops.begin()+OtherMulIdx-1);
Ops.push_back(OuterMul);
return getAddExpr(Ops);
}
}
}
}
// If there are any add recurrences in the operands list, see if any other
// added values are loop invariant. If so, we can fold them into the
// recurrence.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
++Idx;
// Scan over all recurrences, trying to fold loop invariants into them.
for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
// Scan all of the other operands to this add and add them to the vector if
// they are loop invariant w.r.t. the recurrence.
SmallVector<const SCEV *, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (isLoopInvariant(Ops[i], AddRecLoop)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
}
// If we found some loop invariants, fold them into the recurrence.
if (!LIOps.empty()) {
// NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
LIOps.push_back(AddRec->getStart());
SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
AddRec->op_end());
AddRecOps[0] = getAddExpr(LIOps);
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer add and the inner addrec are guaranteed to have no overflow.
// Always propagate NW.
Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
// Otherwise, add the folded AddRec by the non-invariant parts.
for (unsigned i = 0;; ++i)
if (Ops[i] == AddRec) {
Ops[i] = NewRec;
break;
}
return getAddExpr(Ops);
}
// Okay, if there weren't any loop invariants to be folded, check to see if
// there are multiple AddRec's with the same loop induction variable being
// added together. If so, we can fold them.
for (unsigned OtherIdx = Idx+1;
OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx)
if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
// Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
AddRec->op_end());
for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx)
if (const SCEVAddRecExpr *OtherAddRec =
dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
if (OtherAddRec->getLoop() == AddRecLoop) {
for (unsigned i = 0, e = OtherAddRec->getNumOperands();
i != e; ++i) {
if (i >= AddRecOps.size()) {
AddRecOps.append(OtherAddRec->op_begin()+i,
OtherAddRec->op_end());
break;
}
AddRecOps[i] = getAddExpr(AddRecOps[i],
OtherAddRec->getOperand(i));
}
Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
}
// Step size has changed, so we cannot guarantee no self-wraparound.
Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
return getAddExpr(Ops);
}
// Otherwise couldn't fold anything into this recurrence. Move onto the
// next one.
}
// Okay, it looks like we really DO need an add expr. Check to see if we
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
ID.AddInteger(scAddExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
void *IP = 0;
SCEVAddExpr *S =
static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
}
S->setNoWrapFlags(Flags);
return S;
}
static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
uint64_t k = i*j;
if (j > 1 && k / j != i) Overflow = true;
return k;
}
/// Compute the result of "n choose k", the binomial coefficient. If an
/// intermediate computation overflows, Overflow will be set and the return will
/// be garbage. Overflow is not cleared on absence of overflow.
static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
// We use the multiplicative formula:
// n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
// At each iteration, we take the n-th term of the numeral and divide by the
// (k-n)th term of the denominator. This division will always produce an
// integral result, and helps reduce the chance of overflow in the
// intermediate computations. However, we can still overflow even when the
// final result would fit.
if (n == 0 || n == k) return 1;
if (k > n) return 0;
if (k > n/2)
k = n-k;
uint64_t r = 1;
for (uint64_t i = 1; i <= k; ++i) {
r = umul_ov(r, n-(i-1), Overflow);
r /= i;
}
return r;
}
/// getMulExpr - Get a canonical multiply expression, or something simpler if
/// possible.
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags) {
assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
"only nuw or nsw allowed");
assert(!Ops.empty() && "Cannot get empty mul!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVMulExpr operand types don't match!");
#endif
// If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
// And vice-versa.
int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
bool All = true;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
E = Ops.end(); I != E; ++I)
if (!isKnownNonNegative(*I)) {
All = false;
break;
}
if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
}
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, LI);
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
// C1*(C2+V) -> C1*C2 + C1*V
if (Ops.size() == 2)
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
if (Add->getNumOperands() == 2 &&
isa<SCEVConstant>(Add->getOperand(0)))
return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
getMulExpr(LHSC, Add->getOperand(1)));
++Idx;
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
ConstantInt *Fold = ConstantInt::get(getContext(),
LHSC->getValue()->getValue() *
RHSC->getValue()->getValue());
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
LHSC = cast<SCEVConstant>(Ops[0]);
}
// If we are left with a constant one being multiplied, strip it off.
if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
Ops.erase(Ops.begin());
--Idx;
} else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
// If we have a multiply of zero, it will always be zero.
return Ops[0];
} else if (Ops[0]->isAllOnesValue()) {
// If we have a mul by -1 of an add, try distributing the -1 among the
// add operands.
if (Ops.size() == 2) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
SmallVector<const SCEV *, 4> NewOps;
bool AnyFolded = false;
for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
E = Add->op_end(); I != E; ++I) {
const SCEV *Mul = getMulExpr(Ops[0], *I);
if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
NewOps.push_back(Mul);
}
if (AnyFolded)
return getAddExpr(NewOps);
}
else if (const SCEVAddRecExpr *
AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
// Negation preserves a recurrence's no self-wrap property.
SmallVector<const SCEV *, 4> Operands;
for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
E = AddRec->op_end(); I != E; ++I) {
Operands.push_back(getMulExpr(Ops[0], *I));
}
return getAddRecExpr(Operands, AddRec->getLoop(),
AddRec->getNoWrapFlags(SCEV::FlagNW));
}
}
}
if (Ops.size() == 1)
return Ops[0];
}
// Skip over the add expression until we get to a multiply.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
++Idx;
// If there are mul operands inline them all into this expression.
if (Idx < Ops.size()) {
bool DeletedMul = false;
while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
// If we have an mul, expand the mul operands onto the end of the operands
// list.
Ops.erase(Ops.begin()+Idx);
Ops.append(Mul->op_begin(), Mul->op_end());
DeletedMul = true;
}
// If we deleted at least one mul, we added operands to the end of the list,
// and they are not necessarily sorted. Recurse to resort and resimplify
// any operands we just acquired.
if (DeletedMul)
return getMulExpr(Ops);
}
// If there are any add recurrences in the operands list, see if any other
// added values are loop invariant. If so, we can fold them into the
// recurrence.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
++Idx;
// Scan over all recurrences, trying to fold loop invariants into them.
for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
// Scan all of the other operands to this mul and add them to the vector if
// they are loop invariant w.r.t. the recurrence.
SmallVector<const SCEV *, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (isLoopInvariant(Ops[i], AddRecLoop)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
}
// If we found some loop invariants, fold them into the recurrence.
if (!LIOps.empty()) {
// NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(AddRec->getNumOperands());
const SCEV *Scale = getMulExpr(LIOps);
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer mul and the inner addrec are guaranteed to have no overflow.
//
// No self-wrap cannot be guaranteed after changing the step size, but
// will be inferred if either NUW or NSW is true.
Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
// Otherwise, multiply the folded AddRec by the non-invariant parts.
for (unsigned i = 0;; ++i)
if (Ops[i] == AddRec) {
Ops[i] = NewRec;
break;
}
return getMulExpr(Ops);
}
// Okay, if there weren't any loop invariants to be folded, check to see if
// there are multiple AddRec's with the same loop induction variable being
// multiplied together. If so, we can fold them.
for (unsigned OtherIdx = Idx+1;
OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx) {
if (AddRecLoop != cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop())
continue;
// {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
// = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
// choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
// ]]],+,...up to x=2n}.
// Note that the arguments to choose() are always integers with values
// known at compile time, never SCEV objects.
//
// The implementation avoids pointless extra computations when the two
// addrec's are of different length (mathematically, it's equivalent to
// an infinite stream of zeros on the right).
bool OpsModified = false;
for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx) {
const SCEVAddRecExpr *OtherAddRec =
dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
continue;
bool Overflow = false;
Type *Ty = AddRec->getType();
bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
SmallVector<const SCEV*, 7> AddRecOps;
for (int x = 0, xe = AddRec->getNumOperands() +
OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
const SCEV *Term = getConstant(Ty, 0);
for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
z < ze && !Overflow; ++z) {
uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
uint64_t Coeff;
if (LargerThan64Bits)
Coeff = umul_ov(Coeff1, Coeff2, Overflow);
else
Coeff = Coeff1*Coeff2;
const SCEV *CoeffTerm = getConstant(Ty, Coeff);
const SCEV *Term1 = AddRec->getOperand(y-z);
const SCEV *Term2 = OtherAddRec->getOperand(z);
Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
}
}
AddRecOps.push_back(Term);
}
if (!Overflow) {
const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
SCEV::FlagAnyWrap);
if (Ops.size() == 2) return NewAddRec;
Ops[Idx] = NewAddRec;
Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
OpsModified = true;
AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
if (!AddRec)
break;
}
}
if (OpsModified)
return getMulExpr(Ops);
}
// Otherwise couldn't fold anything into this recurrence. Move onto the
// next one.
}
// Okay, it looks like we really DO need an mul expr. Check to see if we
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
ID.AddInteger(scMulExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
void *IP = 0;
SCEVMulExpr *S =
static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
}
S->setNoWrapFlags(Flags);
return S;
}
/// getUDivExpr - Get a canonical unsigned division expression, or something
/// simpler if possible.
const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
const SCEV *RHS) {
assert(getEffectiveSCEVType(LHS->getType()) ==
getEffectiveSCEVType(RHS->getType()) &&
"SCEVUDivExpr operand types don't match!");
if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
if (RHSC->getValue()->equalsInt(1))
return LHS; // X udiv 1 --> x
// If the denominator is zero, the result of the udiv is undefined. Don't
// try to analyze it, because the resolution chosen here may differ from
// the resolution chosen in other parts of the compiler.
if (!RHSC->getValue()->isZero()) {
// Determine if the division can be folded into the operands of
// its operands.
// TODO: Generalize this to non-constants by using known-bits information.
Type *Ty = LHS->getType();
unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
// For non-power-of-two values, effectively round the value up to the
// nearest power of two.
if (!RHSC->getValue()->getValue().isPowerOf2())
++MaxShiftAmt;
IntegerType *ExtTy =
IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
if (const SCEVConstant *Step =
dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
// {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
const APInt &StepInt = Step->getValue()->getValue();
const APInt &DivInt = RHSC->getValue()->getValue();
if (!StepInt.urem(DivInt) &&
getZeroExtendExpr(AR, ExtTy) ==
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
AR->getLoop(), SCEV::FlagAnyWrap)) {
SmallVector<const SCEV *, 4> Operands;
for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
return getAddRecExpr(Operands, AR->getLoop(),
SCEV::FlagNW);
}
/// Get a canonical UDivExpr for a recurrence.
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
// We can currently only fold X%N if X is constant.
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
if (StartC && !DivInt.urem(StepInt) &&
getZeroExtendExpr(AR, ExtTy) ==
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
AR->getLoop(), SCEV::FlagAnyWrap)) {
const APInt &StartInt = StartC->getValue()->getValue();
const APInt &StartRem = StartInt.urem(StepInt);
if (StartRem != 0)
LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
AR->getLoop(), SCEV::FlagNW);
}
}
// (A*B)/C --> A*(B/C) if safe and B/C can be folded.
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
// Find an operand that's safely divisible.
for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
const SCEV *Op = M->getOperand(i);
const SCEV *Div = getUDivExpr(Op, RHSC);
if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
M->op_end());
Operands[i] = Div;
return getMulExpr(Operands);
}
}
}
// (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
Operands.clear();
for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
if (isa<SCEVUDivExpr>(Op) ||
getMulExpr(Op, RHS) != A->getOperand(i))
break;
Operands.push_back(Op);
}
if (Operands.size() == A->getNumOperands())
return getAddExpr(Operands);
}
}
// Fold if both operands are constant.
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
Constant *LHSCV = LHSC->getValue();
Constant *RHSCV = RHSC->getValue();
return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
RHSCV)));
}
}
}
FoldingSetNodeID ID;
ID.AddInteger(scUDivExpr);
ID.AddPointer(LHS);
ID.AddPointer(RHS);
void *IP = 0;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
LHS, RHS);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
/// getAddRecExpr - Get an add recurrence expression for the specified loop.
/// Simplify the expression as much as possible.
const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
const Loop *L,
SCEV::NoWrapFlags Flags) {
SmallVector<const SCEV *, 4> Operands;
Operands.push_back(Start);
if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
if (StepChrec->getLoop() == L) {
Operands.append(StepChrec->op_begin(), StepChrec->op_end());
return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
}
Operands.push_back(Step);
return getAddRecExpr(Operands, L, Flags);
}
/// getAddRecExpr - Get an add recurrence expression for the specified loop.
/// Simplify the expression as much as possible.
const SCEV *
ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
const Loop *L, SCEV::NoWrapFlags Flags) {
if (Operands.size() == 1) return Operands[0];
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
for (unsigned i = 1, e = Operands.size(); i != e; ++i)
assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
"SCEVAddRecExpr operand types don't match!");
for (unsigned i = 0, e = Operands.size(); i != e; ++i)
assert(isLoopInvariant(Operands[i], L) &&
"SCEVAddRecExpr operand is not loop-invariant!");
#endif
if (Operands.back()->isZero()) {
Operands.pop_back();
return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
}
// It's tempting to want to call getMaxBackedgeTakenCount count here and
// use that information to infer NUW and NSW flags. However, computing a
// BE count requires calling getAddRecExpr, so we may not yet have a
// meaningful BE count at this point (and if we don't, we'd be stuck
// with a SCEVCouldNotCompute as the cached BE count).
// If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
// And vice-versa.
int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
bool All = true;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
E = Operands.end(); I != E; ++I)
if (!isKnownNonNegative(*I)) {
All = false;
break;
}
if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
}
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
const Loop *NestedLoop = NestedAR->getLoop();
if (L->contains(NestedLoop) ?
(L->getLoopDepth() < NestedLoop->getLoopDepth()) :
(!NestedLoop->contains(L) &&
DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
NestedAR->op_end());
Operands[0] = NestedAR->getStart();
// AddRecs require their operands be loop-invariant with respect to their
// loops. Don't perform this transformation if it would break this
// requirement.
bool AllInvariant = true;
for (unsigned i = 0, e = Operands.size(); i != e; ++i)
if (!isLoopInvariant(Operands[i], L)) {
AllInvariant = false;
break;
}
if (AllInvariant) {
// Create a recurrence for the outer loop with the same step size.
//
// The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
// inner recurrence has the same property.
SCEV::NoWrapFlags OuterFlags =
maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
AllInvariant = true;
for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
AllInvariant = false;
break;
}
if (AllInvariant) {
// Ok, both add recurrences are valid after the transformation.
//
// The inner recurrence keeps its NW flag but only keeps NUW/NSW if
// the outer recurrence has the same property.
SCEV::NoWrapFlags InnerFlags =
maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
}
}
// Reset Operands to its original state.
Operands[0] = NestedAR;
}
}
// Okay, it looks like we really DO need an addrec expr. Check to see if we
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
ID.AddInteger(scAddRecExpr);
for (unsigned i = 0, e = Operands.size(); i != e; ++i)
ID.AddPointer(Operands[i]);
ID.AddPointer(L);
void *IP = 0;
SCEVAddRecExpr *S =
static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
std::uninitialized_copy(Operands.begin(), Operands.end(), O);
S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
O, Operands.size(), L);
UniqueSCEVs.InsertNode(S, IP);
}
S->setNoWrapFlags(Flags);
return S;
}
const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
const SCEV *RHS) {
SmallVector<const SCEV *, 2> Ops;
Ops.push_back(LHS);
Ops.push_back(RHS);
return getSMaxExpr(Ops);
}
const SCEV *
ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty smax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVSMaxExpr operand types don't match!");
#endif
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, LI);
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
++Idx;
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
ConstantInt *Fold = ConstantInt::get(getContext(),
APIntOps::smax(LHSC->getValue()->getValue(),
RHSC->getValue()->getValue()));
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
LHSC = cast<SCEVConstant>(Ops[0]);
}
// If we are left with a constant minimum-int, strip it off.
if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
Ops.erase(Ops.begin());
--Idx;
} else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
// If we have an smax with a constant maximum-int, it will always be
// maximum-int.
return Ops[0];
}
if (Ops.size() == 1) return Ops[0];
}
// Find the first SMax
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
++Idx;
// Check to see if one of the operands is an SMax. If so, expand its operands
// onto our operand list, and recurse to simplify.
if (Idx < Ops.size()) {
bool DeletedSMax = false;
while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
Ops.erase(Ops.begin()+Idx);
Ops.append(SMax->op_begin(), SMax->op_end());
DeletedSMax = true;
}
if (DeletedSMax)
return getSMaxExpr(Ops);
}
// Okay, check to see if the same value occurs in the operand list twice. If
// so, delete one. Since we sorted the list, these values are required to
// be adjacent.
for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
// X smax Y smax Y --> X smax Y
// X smax Y --> X, if X is always greater than Y
if (Ops[i] == Ops[i+1] ||
isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
--i; --e;
} else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
--i; --e;
}
if (Ops.size() == 1) return Ops[0];
assert(!Ops.empty() && "Reduced smax down to nothing!");
// Okay, it looks like we really DO need an smax expr. Check to see if we
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
ID.AddInteger(scSMaxExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
void *IP = 0;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
return S;
}
const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
const SCEV *RHS) {
SmallVector<const SCEV *, 2> Ops;
Ops.push_back(LHS);
Ops.push_back(RHS);
return getUMaxExpr(Ops);
}
const SCEV *
ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty umax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVUMaxExpr operand types don't match!");
#endif
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, LI);
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
++Idx;
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
ConstantInt *Fold = ConstantInt::get(getContext(),
APIntOps::umax(LHSC->getValue()->getValue(),
RHSC->getValue()->getValue()));
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
LHSC = cast<SCEVConstant>(Ops[0]);
}
// If we are left with a constant minimum-int, strip it off.
if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
Ops.erase(Ops.begin());
--Idx;
} else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
// If we have an umax with a constant maximum-int, it will always be
// maximum-int.
return Ops[0];
}
if (Ops.size() == 1) return Ops[0];
}
// Find the first UMax
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
++Idx;
// Check to see if one of the operands is a UMax. If so, expand its operands
// onto our operand list, and recurse to simplify.
if (Idx < Ops.size()) {
bool DeletedUMax = false;
while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
Ops.erase(Ops.begin()+Idx);
Ops.append(UMax->op_begin(), UMax->op_end());
DeletedUMax = true;
}
if (DeletedUMax)
return getUMaxExpr(Ops);
}
// Okay, check to see if the same value occurs in the operand list twice. If
// so, delete one. Since we sorted the list, these values are required to
// be adjacent.
for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
// X umax Y umax Y --> X umax Y
// X umax Y --> X, if X is always greater than Y
if (Ops[i] == Ops[i+1] ||
isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
--i; --e;
} else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
--i; --e;
}
if (Ops.size() == 1) return Ops[0];
assert(!Ops.empty() && "Reduced umax down to nothing!");
// Okay, it looks like we really DO need a umax expr. Check to see if we
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
ID.AddInteger(scUMaxExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
void *IP = 0;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
return S;
}
const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
const SCEV *RHS) {
// ~smax(~x, ~y) == smin(x, y).
return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
}
const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
const SCEV *RHS) {
// ~umax(~x, ~y) == umin(x, y)
return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
}
const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
// If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
if (TD)
return getConstant(IntTy, TD->getTypeAllocSize(AllocTy));
Constant *C = ConstantExpr::getSizeOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
assert(Ty == IntTy && "Effective SCEV type doesn't match");
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
StructType *STy,
unsigned FieldNo) {
// If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
if (TD) {
return getConstant(IntTy,
TD->getStructLayout(STy)->getElementOffset(FieldNo));
}
Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
const SCEV *ScalarEvolution::getUnknown(Value *V) {
// Don't attempt to do anything other than create a SCEVUnknown object
// here. createSCEV only calls getUnknown after checking for all other
// interesting possibilities, and any other code that calls getUnknown
// is doing so in order to hide a value from SCEV canonicalization.
FoldingSetNodeID ID;
ID.AddInteger(scUnknown);
ID.AddPointer(V);
void *IP = 0;
if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
assert(cast<SCEVUnknown>(S)->getValue() == V &&
"Stale SCEVUnknown in uniquing map!");
return S;
}
SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
FirstUnknown);
FirstUnknown = cast<SCEVUnknown>(S);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
//===----------------------------------------------------------------------===//
// Basic SCEV Analysis and PHI Idiom Recognition Code
//
/// isSCEVable - Test if values of the given type are analyzable within
/// the SCEV framework. This primarily includes integer types, and it
/// can optionally include pointer types if the ScalarEvolution class
/// has access to target-specific information.
bool ScalarEvolution::isSCEVable(Type *Ty) const {
// Integers and pointers are always SCEVable.
return Ty->isIntegerTy() || Ty->isPointerTy();
}
/// getTypeSizeInBits - Return the size in bits of the specified type,
/// for which isSCEVable must return true.
uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
// If we have a DataLayout, use it!
if (TD)
return TD->getTypeSizeInBits(Ty);
// Integer types have fixed sizes.
if (Ty->isIntegerTy())
return Ty->getPrimitiveSizeInBits();
// The only other support type is pointer. Without DataLayout, conservatively
// assume pointers are 64-bit.
assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
return 64;
}
/// getEffectiveSCEVType - Return a type with the same bitwidth as
/// the given type and which represents how SCEV will treat the given
/// type, for which isSCEVable must return true. For pointer types,
/// this is the pointer-sized integer type.
Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
if (Ty->isIntegerTy()) {
return Ty;
}
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
if (TD)
return TD->getIntPtrType(Ty);
// Without DataLayout, conservatively assume pointers are 64-bit.
return Type::getInt64Ty(getContext());
}
const SCEV *ScalarEvolution::getCouldNotCompute() {
return &CouldNotCompute;
}
namespace {
// Helper class working with SCEVTraversal to figure out if a SCEV contains
// a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
// is set iff if find such SCEVUnknown.
//
struct FindInvalidSCEVUnknown {
bool FindOne;
FindInvalidSCEVUnknown() { FindOne = false; }
bool follow(const SCEV *S) {
switch (S->getSCEVType()) {
case scConstant:
return false;
case scUnknown:
if (!cast<SCEVUnknown>(S)->getValue())
FindOne = true;
return false;
default:
return true;
}
}
bool isDone() const { return FindOne; }
};
}
bool ScalarEvolution::checkValidity(const SCEV *S) const {
FindInvalidSCEVUnknown F;
SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
ST.visitAll(S);
return !F.FindOne;
}
/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
/// expression and create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
const SCEV *S = I->second;
if (checkValidity(S))
return S;
else
ValueExprMap.erase(I);
}
const SCEV *S = createSCEV(V);
// The process of creating a SCEV for V may have caused other SCEVs
// to have been created, so it's necessary to insert the new entry
// from scratch, rather than trying to remember the insert position
// above.
ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
return S;
}
/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
///
const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
return getMulExpr(V,
getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
}
/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
const SCEV *AllOnes =
getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
return getMinusSCEV(AllOnes, V);
}
/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
SCEV::NoWrapFlags Flags) {
assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
// Fast path: X - X --> 0.
if (LHS == RHS)
return getConstant(LHS->getType(), 0);
// X - Y --> X + -Y
return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
}
/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
/// input value to the specified type. If the type must be extended, it is zero
/// extended.
const SCEV *
ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
return getTruncateExpr(V, Ty);
return getZeroExtendExpr(V, Ty);
}
/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
/// input value to the specified type. If the type must be extended, it is sign
/// extended.
const SCEV *
ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
Type *Ty) {
Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
return getTruncateExpr(V, Ty);
return getSignExtendExpr(V, Ty);
}
/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
/// input value to the specified type. If the type must be extended, it is zero
/// extended. The conversion must not be narrowing.
const SCEV *
ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or zero extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrZeroExtend cannot truncate!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
return getZeroExtendExpr(V, Ty);
}
/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
/// input value to the specified type. If the type must be extended, it is sign
/// extended. The conversion must not be narrowing.
const SCEV *
ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or sign extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrSignExtend cannot truncate!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
return getSignExtendExpr(V, Ty);
}
/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
/// the input value to the specified type. If the type must be extended,
/// it is extended with unspecified bits. The conversion must not be
/// narrowing.
const SCEV *
ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or any extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrAnyExtend cannot truncate!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
return getAnyExtendExpr(V, Ty);
}
/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
/// input value to the specified type. The conversion must not be widening.
const SCEV *
ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or noop with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
"getTruncateOrNoop cannot extend!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
return getTruncateExpr(V, Ty);
}
/// getUMaxFromMismatchedTypes - Promote the operands to the wider of
/// the types using zero-extension, and then perform a umax operation
/// with them.
const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
const SCEV *RHS) {
const SCEV *PromotedLHS = LHS;
const SCEV *PromotedRHS = RHS;
if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
else
PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
return getUMaxExpr(PromotedLHS, PromotedRHS);
}
/// getUMinFromMismatchedTypes - Promote the operands to the wider of
/// the types using zero-extension, and then perform a umin operation
/// with them.
const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
const SCEV *RHS) {
const SCEV *PromotedLHS = LHS;
const SCEV *PromotedRHS = RHS;
if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
else
PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
return getUMinExpr(PromotedLHS, PromotedRHS);
}
/// getPointerBase - Transitively follow the chain of pointer-type operands
/// until reaching a SCEV that does not have a single pointer operand. This
/// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
/// but corner cases do exist.
const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
// A pointer operand may evaluate to a nonpointer expression, such as null.
if (!V->getType()->isPointerTy())
return V;
if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
return getPointerBase(Cast->getOperand());
}
else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
const SCEV *PtrOp = 0;
for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
I != E; ++I) {
if ((*I)->getType()->isPointerTy()) {
// Cannot find the base of an expression with multiple pointer operands.
if (PtrOp)
return V;
PtrOp = *I;
}
}
if (!PtrOp)
return V;
return getPointerBase(PtrOp);
}
return V;
}
/// PushDefUseChildren - Push users of the given Instruction
/// onto the given Worklist.
static void
PushDefUseChildren(Instruction *I,
SmallVectorImpl<Instruction *> &Worklist) {
// Push the def-use children onto the Worklist stack.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI)
Worklist.push_back(cast<Instruction>(*UI));
}
/// ForgetSymbolicValue - This looks up computed SCEV values for all
/// instructions that depend on the given instruction and removes them from
/// the ValueExprMapType map if they reference SymName. This is used during PHI
/// resolution.
void
ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
SmallVector<Instruction *, 16> Worklist;
PushDefUseChildren(PN, Worklist);
SmallPtrSet<Instruction *, 8> Visited;
Visited.insert(PN);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
const SCEV *Old = It->second;
// Short-circuit the def-use traversal if the symbolic name
// ceases to appear in expressions.
if (Old != SymName && !hasOperand(Old, SymName))
continue;
// SCEVUnknown for a PHI either means that it has an unrecognized
// structure, it's a PHI that's in the progress of being computed
// by createNodeForPHI, or it's a single-value PHI. In the first case,
// additional loop trip count information isn't going to change anything.
// In the second case, createNodeForPHI will perform the necessary
// updates on its own when it gets to that point. In the third, we do
// want to forget the SCEVUnknown.
if (!isa<PHINode>(I) ||
!isa<SCEVUnknown>(Old) ||
(I != PN && Old == SymName)) {
forgetMemoizedResults(Old);
ValueExprMap.erase(It);
}
}
PushDefUseChildren(I, Worklist);
}
}
/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in
/// a loop header, making it a potential recurrence, or it doesn't.
///
const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
if (const Loop *L = LI->getLoopFor(PN->getParent()))
if (L->getHeader() == PN->getParent()) {
// The loop may have multiple entrances or multiple exits; we can analyze
// this phi as an addrec if it has a unique entry value and a unique
// backedge value.
Value *BEValueV = 0, *StartValueV = 0;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
if (L->contains(PN->getIncomingBlock(i))) {
if (!BEValueV) {
BEValueV = V;
} else if (BEValueV != V) {
BEValueV = 0;
break;
}
} else if (!StartValueV) {
StartValueV = V;
} else if (StartValueV != V) {
StartValueV = 0;
break;
}
}
if (BEValueV && StartValueV) {
// While we are analyzing this PHI node, handle its value symbolically.
const SCEV *SymbolicName = getUnknown(PN);
assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
"PHI node already processed?");
ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
// Using this symbolic name for the PHI, analyze the value coming around
// the back-edge.
const SCEV *BEValue = getSCEV(BEValueV);
// NOTE: If BEValue is loop invariant, we know that the PHI node just
// has a special value for the first iteration of the loop.
// If the value coming around the backedge is an add with the symbolic
// value we just inserted, then we found a simple induction variable!
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
// If there is a single occurrence of the symbolic value, replace it
// with a recurrence.
unsigned FoundIndex = Add->getNumOperands();
for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
if (Add->getOperand(i) == SymbolicName)
if (FoundIndex == e) {
FoundIndex = i;
break;
}
if (FoundIndex != Add->getNumOperands()) {
// Create an add with everything but the specified operand.
SmallVector<const SCEV *, 8> Ops;
for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
if (i != FoundIndex)
Ops.push_back(Add->getOperand(i));
const SCEV *Accum = getAddExpr(Ops);
// This is not a valid addrec if the step amount is varying each
// loop iteration, but is not itself an addrec in this loop.
if (isLoopInvariant(Accum, L) ||
(isa<SCEVAddRecExpr>(Accum) &&
cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
// If the increment doesn't overflow, then neither the addrec nor
// the post-increment will overflow.
if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
if (OBO->hasNoUnsignedWrap())
Flags = setFlags(Flags, SCEV::FlagNUW);
if (OBO->hasNoSignedWrap())
Flags = setFlags(Flags, SCEV::FlagNSW);
} else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
// If the increment is an inbounds GEP, then we know the address
// space cannot be wrapped around. We cannot make any guarantee
// about signed or unsigned overflow because pointers are
// unsigned but we may have a negative index from the base
// pointer. We can guarantee that no unsigned wrap occurs if the
// indices form a positive value.
if (GEP->isInBounds()) {
Flags = setFlags(Flags, SCEV::FlagNW);
const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
Flags = setFlags(Flags, SCEV::FlagNUW);
}
} else if (const SubOperator *OBO =
dyn_cast<SubOperator>(BEValueV)) {
if (OBO->hasNoUnsignedWrap())
Flags = setFlags(Flags, SCEV::FlagNUW);
if (OBO->hasNoSignedWrap())
Flags = setFlags(Flags, SCEV::FlagNSW);
}
const SCEV *StartVal = getSCEV(StartValueV);
const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
// Since the no-wrap flags are on the increment, they apply to the
// post-incremented value as well.
if (isLoopInvariant(Accum, L))
(void)getAddRecExpr(getAddExpr(StartVal, Accum),
Accum, L, Flags);
// Okay, for the entire analysis of this edge we assumed the PHI
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
ForgetSymbolicName(PN, SymbolicName);
ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
return PHISCEV;
}
}
} else if (const SCEVAddRecExpr *AddRec =
dyn_cast<SCEVAddRecExpr>(BEValue)) {
// Otherwise, this could be a loop like this:
// i = 0; for (j = 1; ..; ++j) { .... i = j; }
// In this case, j = {1,+,1} and BEValue is j.
// Because the other in-value of i (0) fits the evolution of BEValue
// i really is an addrec evolution.
if (AddRec->getLoop() == L && AddRec->isAffine()) {
const SCEV *StartVal = getSCEV(StartValueV);
// If StartVal = j.start - j.stride, we can use StartVal as the
// initial step of the addrec evolution.
if (StartVal == getMinusSCEV(AddRec->getOperand(0),
AddRec->getOperand(1))) {
// FIXME: For constant StartVal, we should be able to infer
// no-wrap flags.
const SCEV *PHISCEV =
getAddRecExpr(StartVal, AddRec->getOperand(1), L,
SCEV::FlagAnyWrap);
// Okay, for the entire analysis of this edge we assumed the PHI
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
ForgetSymbolicName(PN, SymbolicName);
ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
return PHISCEV;
}
}
}
}
}
// If the PHI has a single incoming value, follow that value, unless the
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
if (LI->replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
// If it's not a loop phi, we can't handle it yet.
return getUnknown(PN);
}
/// createNodeForGEP - Expand GEP instructions into add and multiply
/// operations. This allows them to be analyzed by regular SCEV code.
///
const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
Value *Base = GEP->getOperand(0);
// Don't attempt to analyze GEPs over unsized objects.
if (!Base->getType()->getPointerElementType()->isSized())
return getUnknown(GEP);
// Don't blindly transfer the inbounds flag from the GEP instruction to the
// Add expression, because the Instruction may be guarded by control flow
// and the no-overflow bits may not be valid for the expression in any
// context.
SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
gep_type_iterator GTI = gep_type_begin(GEP);
for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
E = GEP->op_end();
I != E; ++I) {
Value *Index = *I;
// Compute the (potentially symbolic) offset in bytes for this index.
if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
// Add the field offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, FieldOffset);
} else {
// For an array, add the element offset, explicitly scaled.
const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI);
const SCEV *IndexS = getSCEV(Index);
// Getelementptr indices are signed.
IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
// Multiply the index by the element size to compute the element offset.
const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap);
// Add the element offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, LocalOffset);
}
}
// Get the SCEV for the GEP base.
const SCEV *BaseS = getSCEV(Base);
// Add the total offset from all the GEP indices to the base.
return getAddExpr(BaseS, TotalOffset, Wrap);
}
/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
/// guaranteed to end in (at every loop iteration). It is, at the same time,
/// the minimum number of times S is divisible by 2. For example, given {4,+,8}
/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S.
uint32_t
ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
return C->getValue()->getValue().countTrailingZeros();
if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
return std::min(GetMinTrailingZeros(T->getOperand()),
(uint32_t)getTypeSizeInBits(T->getType()));
if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
getTypeSizeInBits(E->getType()) : OpRes;
}
if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
getTypeSizeInBits(E->getType()) : OpRes;
}
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
// The result is the min of all operands results.
uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
return MinOpRes;
}
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
// The result is the sum of all operands results.
uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
uint32_t BitWidth = getTypeSizeInBits(M->getType());
for (unsigned i = 1, e = M->getNumOperands();
SumOpRes != BitWidth && i != e; ++i)
SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
BitWidth);
return SumOpRes;
}
if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
// The result is the min of all operands results.
uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
return MinOpRes;
}
if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
// The result is the min of all operands results.
uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
return MinOpRes;
}
if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
// The result is the min of all operands results.
uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
return MinOpRes;
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
ComputeMaskedBits(U->getValue(), Zeros, Ones);
return Zeros.countTrailingOnes();
}
// SCEVUDivExpr
return 0;
}
/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
///
ConstantRange
ScalarEvolution::getUnsignedRange(const SCEV *S) {
// See if we've computed this range already.
DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
if (I != UnsignedRanges.end())
return I->second;
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
unsigned BitWidth = getTypeSizeInBits(S->getType());
ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
// If the value has known zeros, the maximum unsigned value will have those
// known zeros as well.
uint32_t TZ = GetMinTrailingZeros(S);
if (TZ != 0)
ConservativeResult =
ConstantRange(APInt::getMinValue(BitWidth),
APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
ConstantRange X = getUnsignedRange(Add->getOperand(0));
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
X = X.add(getUnsignedRange(Add->getOperand(i)));
return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
ConstantRange X = getUnsignedRange(Mul->getOperand(0));
for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
}
if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
ConstantRange X = getUnsignedRange(SMax->getOperand(0));
for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
X = X.smax(getUnsignedRange(SMax->getOperand(i)));
return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
}
if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
ConstantRange X = getUnsignedRange(UMax->getOperand(0));
for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
X = X.umax(getUnsignedRange(UMax->getOperand(i)));
return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
}
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
ConstantRange X = getUnsignedRange(UDiv->getLHS());
ConstantRange Y = getUnsignedRange(UDiv->getRHS());
return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
}
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
ConstantRange X = getUnsignedRange(ZExt->getOperand());
return setUnsignedRange(ZExt,
ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
}
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
ConstantRange X = getUnsignedRange(SExt->getOperand());
return setUnsignedRange(SExt,
ConservativeResult.intersectWith(X.signExtend(BitWidth)));
}
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
ConstantRange X = getUnsignedRange(Trunc->getOperand());
return setUnsignedRange(Trunc,
ConservativeResult.intersectWith(X.truncate(BitWidth)));
}
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
// If there's no unsigned wrap, the value will never be less than its
// initial value.
if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
if (!C->getValue()->isZero())
ConservativeResult =
ConservativeResult.intersectWith(
ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
// TODO: non-affine addrec
if (AddRec->isAffine()) {
Type *Ty = AddRec->getType();
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
const SCEV *Start = AddRec->getStart();
const SCEV *Step = AddRec->getStepRecurrence(*this);
ConstantRange StartRange = getUnsignedRange(Start);
ConstantRange StepRange = getSignedRange(Step);
ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
ConstantRange EndRange =
StartRange.add(MaxBECountRange.multiply(StepRange));
// Check for overflow. This must be done with ConstantRange arithmetic
// because we could be called from within the ScalarEvolution overflow
// checking code.
ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
ConstantRange ExtMaxBECountRange =
MaxBECountRange.zextOrTrunc(BitWidth*2+1);
ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
ExtEndRange)
return setUnsignedRange(AddRec, ConservativeResult);
APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
EndRange.getUnsignedMin());
APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
EndRange.getUnsignedMax());
if (Min.isMinValue() && Max.isMaxValue())
return setUnsignedRange(AddRec, ConservativeResult);
return setUnsignedRange(AddRec,
ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
}
}
return setUnsignedRange(AddRec, ConservativeResult);
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
ComputeMaskedBits(U->getValue(), Zeros, Ones, TD);
if (Ones == ~Zeros + 1)
return setUnsignedRange(U, ConservativeResult);
return setUnsignedRange(U,
ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
}
return setUnsignedRange(S, ConservativeResult);
}
/// getSignedRange - Determine the signed range for a particular SCEV.
///
ConstantRange
ScalarEvolution::getSignedRange(const SCEV *S) {
// See if we've computed this range already.
DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
if (I != SignedRanges.end())
return I->second;
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
unsigned BitWidth = getTypeSizeInBits(S->getType());
ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
// If the value has known zeros, the maximum signed value will have those
// known zeros as well.
uint32_t TZ = GetMinTrailingZeros(S);
if (TZ != 0)
ConservativeResult =
ConstantRange(APInt::getSignedMinValue(BitWidth),
APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
ConstantRange X = getSignedRange(Add->getOperand(0));
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
X = X.add(getSignedRange(Add->getOperand(i)));
return setSignedRange(Add, ConservativeResult.intersectWith(X));
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
ConstantRange X = getSignedRange(Mul->getOperand(0));
for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
X = X.multiply(getSignedRange(Mul->getOperand(i)));
return setSignedRange(Mul, ConservativeResult.intersectWith(X));
}
if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
ConstantRange X = getSignedRange(SMax->getOperand(0));
for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
X = X.smax(getSignedRange(SMax->getOperand(i)));
return setSignedRange(SMax, ConservativeResult.intersectWith(X));
}
if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
ConstantRange X = getSignedRange(UMax->getOperand(0));
for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
X = X.umax(getSignedRange(UMax->getOperand(i)));
return setSignedRange(UMax, ConservativeResult.intersectWith(X));
}
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
ConstantRange X = getSignedRange(UDiv->getLHS());
ConstantRange Y = getSignedRange(UDiv->getRHS());
return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
}
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
ConstantRange X = getSignedRange(ZExt->getOperand());
return setSignedRange(ZExt,
ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
}
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
ConstantRange X = getSignedRange(SExt->getOperand());
return setSignedRange(SExt,
ConservativeResult.intersectWith(X.signExtend(BitWidth)));
}
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
ConstantRange X = getSignedRange(Trunc->getOperand());
return setSignedRange(Trunc,
ConservativeResult.intersectWith(X.truncate(BitWidth)));
}
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
// If there's no signed wrap, and all the operands have the same sign or
// zero, the value won't ever change sign.
if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
bool AllNonNeg = true;
bool AllNonPos = true;
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
}
if (AllNonNeg)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt(BitWidth, 0),
APInt::getSignedMinValue(BitWidth)));
else if (AllNonPos)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth),
APInt(BitWidth, 1)));
}
// TODO: non-affine addrec
if (AddRec->isAffine()) {
Type *Ty = AddRec->getType();
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
const SCEV *Start = AddRec->getStart();
const SCEV *Step = AddRec->getStepRecurrence(*this);
ConstantRange StartRange = getSignedRange(Start);
ConstantRange StepRange = getSignedRange(Step);
ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
ConstantRange EndRange =
StartRange.add(MaxBECountRange.multiply(StepRange));
// Check for overflow. This must be done with ConstantRange arithmetic
// because we could be called from within the ScalarEvolution overflow
// checking code.
ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
ConstantRange ExtMaxBECountRange =
MaxBECountRange.zextOrTrunc(BitWidth*2+1);
ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
ExtEndRange)
return setSignedRange(AddRec, ConservativeResult);
APInt Min = APIntOps::smin(StartRange.getSignedMin(),
EndRange.getSignedMin());
APInt Max = APIntOps::smax(StartRange.getSignedMax(),
EndRange.getSignedMax());
if (Min.isMinSignedValue() && Max.isMaxSignedValue())
return setSignedRange(AddRec, ConservativeResult);
return setSignedRange(AddRec,
ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
}
}
return setSignedRange(AddRec, ConservativeResult);
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
if (!U->getValue()->getType()->isIntegerTy() && !TD)
return setSignedRange(U, ConservativeResult);
unsigned NS = ComputeNumSignBits(U->getValue(), TD);
if (NS <= 1)
return setSignedRange(U, ConservativeResult);
return setSignedRange(U, ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
}
return setSignedRange(S, ConservativeResult);
}
/// createSCEV - We know that there is no SCEV for the specified value.
/// Analyze the expression.
///
const SCEV *ScalarEvolution::createSCEV(Value *V) {
if (!isSCEVable(V->getType()))
return getUnknown(V);
unsigned Opcode = Instruction::UserOp1;
if (Instruction *I = dyn_cast<Instruction>(V)) {
Opcode = I->getOpcode();
// Don't attempt to analyze instructions in blocks that aren't
// reachable. Such instructions don't matter, and they aren't required
// to obey basic rules for definitions dominating uses which this
// analysis depends on.
if (!DT->isReachableFromEntry(I->getParent()))
return getUnknown(V);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
Opcode = CE->getOpcode();
else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
else if (isa<ConstantPointerNull>(V))
return getConstant(V->getType(), 0);
else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
else
return getUnknown(V);
Operator *U = cast<Operator>(V);
switch (Opcode) {
case Instruction::Add: {
// The simple thing to do would be to just call getSCEV on both operands
// and call getAddExpr with the result. However if we're looking at a
// bunch of things all added together, this can be quite inefficient,
// because it leads to N-1 getAddExpr calls for N ultimate operands.
// Instead, gather up all the operands and make a single getAddExpr call.
// LLVM IR canonical form means we need only traverse the left operands.
//
// Don't apply this instruction's NSW or NUW flags to the new
// expression. The instruction may be guarded by control flow that the
// no-wrap behavior depends on. Non-control-equivalent instructions can be
// mapped to the same SCEV expression, and it would be incorrect to transfer
// NSW/NUW semantics to those operations.
SmallVector<const SCEV *, 4> AddOps;
AddOps.push_back(getSCEV(U->getOperand(1)));
for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
unsigned Opcode = Op->getValueID() - Value::InstructionVal;
if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
break;
U = cast<Operator>(Op);
const SCEV *Op1 = getSCEV(U->getOperand(1));
if (Opcode == Instruction::Sub)
AddOps.push_back(getNegativeSCEV(Op1));
else
AddOps.push_back(Op1);
}
AddOps.push_back(getSCEV(U->getOperand(0)));
return getAddExpr(AddOps);
}
case Instruction::Mul: {
// Don't transfer NSW/NUW for the same reason as AddExpr.
SmallVector<const SCEV *, 4> MulOps;
MulOps.push_back(getSCEV(U->getOperand(1)));
for (Value *Op = U->getOperand(0);
Op->getValueID() == Instruction::Mul + Value::InstructionVal;
Op = U->getOperand(0)) {
U = cast<Operator>(Op);
MulOps.push_back(getSCEV(U->getOperand(1)));
}
MulOps.push_back(getSCEV(U->getOperand(0)));
return getMulExpr(MulOps);
}
case Instruction::UDiv:
return getUDivExpr(getSCEV(U->getOperand(0)),
getSCEV(U->getOperand(1)));
case Instruction::Sub:
return getMinusSCEV(getSCEV(U->getOperand(0)),
getSCEV(U->getOperand(1)));
case Instruction::And:
// For an expression like x&255 that merely masks off the high bits,
// use zext(trunc(x)) as the SCEV expression.
if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
if (CI->isNullValue())
return getSCEV(U->getOperand(1));
if (CI->isAllOnesValue())
return getSCEV(U->getOperand(0));
const APInt &A = CI->getValue();
// Instcombine's ShrinkDemandedConstant may strip bits out of
// constants, obscuring what would otherwise be a low-bits mask.
// Use ComputeMaskedBits to compute what ShrinkDemandedConstant
// knew about to reconstruct a low-bits mask value.
unsigned LZ = A.countLeadingZeros();
unsigned BitWidth = A.getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD);
APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
return
getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
IntegerType::get(getContext(), BitWidth - LZ)),
U->getType());
}
break;
case Instruction::Or:
// If the RHS of the Or is a constant, we may have something like:
// X*4+1 which got turned into X*4|1. Handle this as an Add so loop
// optimizations will transparently handle this case.
//
// In order for this transformation to be safe, the LHS must be of the
// form X*(2^n) and the Or constant must be less than 2^n.
if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
const SCEV *LHS = getSCEV(U->getOperand(0));
const APInt &CIVal = CI->getValue();
if (GetMinTrailingZeros(LHS) >=
(CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
// Build a plain add SCEV.
const SCEV *S = getAddExpr(LHS, getSCEV(CI));
// If the LHS of the add was an addrec and it has no-wrap flags,
// transfer the no-wrap flags, since an or won't introduce a wrap.
if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
OldAR->getNoWrapFlags());
}
return S;
}
}
break;
case Instruction::Xor:
if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
// If the RHS of the xor is a signbit, then this is just an add.
// Instcombine turns add of signbit into xor as a strength reduction step.
if (CI->getValue().isSignBit())
return getAddExpr(getSCEV(U->getOperand(0)),
getSCEV(U->getOperand(1)));
// If the RHS of xor is -1, then this is a not operation.
if (CI->isAllOnesValue())
return getNotSCEV(getSCEV(U->getOperand(0)));
// Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
// This is a variant of the check for xor with -1, and it handles
// the case where instcombine has trimmed non-demanded bits out
// of an xor with -1.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
if (BO->getOpcode() == Instruction::And &&
LCI->getValue() == CI->getValue())
if (const SCEVZeroExtendExpr *Z =
dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
Type *UTy = U->getType();
const SCEV *Z0 = Z->getOperand();
Type *Z0Ty = Z0->getType();
unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
// If C is a low-bits mask, the zero extend is serving to
// mask off the high bits. Complement the operand and
// re-apply the zext.
if (APIntOps::isMask(Z0TySize, CI->getValue()))
return getZeroExtendExpr(getNotSCEV(Z0), UTy);
// If C is a single bit, it may be in the sign-bit position
// before the zero-extend. In this case, represent the xor
// using an add, which is equivalent, and re-apply the zext.
APInt Trunc = CI->getValue().trunc(Z0TySize);
if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
Trunc.isSignBit())
return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
UTy);
}
}
break;
case Instruction::Shl:
// Turn shift left of a constant amount into a multiply.
if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
// If the shift count is not less than the bitwidth, the result of
// the shift is undefined. Don't try to analyze it, because the
// resolution chosen here may differ from the resolution chosen in
// other parts of the compiler.
if (SA->getValue().uge(BitWidth))
break;
Constant *X = ConstantInt::get(getContext(),
APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
}
break;
case Instruction::LShr:
// Turn logical shift right of a constant into a unsigned divide.
if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
// If the shift count is not less than the bitwidth, the result of
// the shift is undefined. Don't try to analyze it, because the
// resolution chosen here may differ from the resolution chosen in
// other parts of the compiler.
if (SA->getValue().uge(BitWidth))
break;
Constant *X = ConstantInt::get(getContext(),
APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
}
break;
case Instruction::AShr:
// For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
if (L->getOpcode() == Instruction::Shl &&
L->getOperand(1) == U->getOperand(1)) {
uint64_t BitWidth = getTypeSizeInBits(U->getType());
// If the shift count is not less than the bitwidth, the result of
// the shift is undefined. Don't try to analyze it, because the
// resolution chosen here may differ from the resolution chosen in
// other parts of the compiler.
if (CI->getValue().uge(BitWidth))
break;
uint64_t Amt = BitWidth - CI->getZExtValue();
if (Amt == BitWidth)
return getSCEV(L->getOperand(0)); // shift by zero --> noop
return
getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
IntegerType::get(getContext(),
Amt)),
U->getType());
}
break;
case Instruction::Trunc:
return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
case Instruction::ZExt:
return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
case Instruction::SExt:
return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
case Instruction::BitCast:
// BitCasts are no-op casts so we just eliminate the cast.
if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
return getSCEV(U->getOperand(0));
break;
// It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
// lead to pointer expressions which cannot safely be expanded to GEPs,
// because ScalarEvolution doesn't respect the GEP aliasing rules when
// simplifying integer expressions.
case Instruction::GetElementPtr:
return createNodeForGEP(cast<GEPOperator>(U));
case Instruction::PHI:
return createNodeForPHI(cast<PHINode>(U));
case Instruction::Select:
// This could be a smax or umax that was lowered earlier.
// Try to recover it.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
switch (ICI->getPredicate()) {
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
std::swap(LHS, RHS);
// fall through
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
// a >s b ? a+x : b+x -> smax(a, b)+x
// a >s b ? b+x : a+x -> smin(a, b)+x
if (LHS->getType() == U->getType()) {
const SCEV *LS = getSCEV(LHS);
const SCEV *RS = getSCEV(RHS);
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, LS);
const SCEV *RDiff = getMinusSCEV(RA, RS);
if (LDiff == RDiff)
return getAddExpr(getSMaxExpr(LS, RS), LDiff);
LDiff = getMinusSCEV(LA, RS);
RDiff = getMinusSCEV(RA, LS);
if (LDiff == RDiff)
return getAddExpr(getSMinExpr(LS, RS), LDiff);
}
break;
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
std::swap(LHS, RHS);
// fall through
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
// a >u b ? a+x : b+x -> umax(a, b)+x
// a >u b ? b+x : a+x -> umin(a, b)+x
if (LHS->getType() == U->getType()) {
const SCEV *LS = getSCEV(LHS);
const SCEV *RS = getSCEV(RHS);
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, LS);
const SCEV *RDiff = getMinusSCEV(RA, RS);
if (LDiff == RDiff)
return getAddExpr(getUMaxExpr(LS, RS), LDiff);
LDiff = getMinusSCEV(LA, RS);
RDiff = getMinusSCEV(RA, LS);
if (LDiff == RDiff)
return getAddExpr(getUMinExpr(LS, RS), LDiff);
}
break;
case ICmpInst::ICMP_NE:
// n != 0 ? n+x : 1+x -> umax(n, 1)+x
if (LHS->getType() == U->getType() &&
isa<ConstantInt>(RHS) &&
cast<ConstantInt>(RHS)->isZero()) {
const SCEV *One = getConstant(LHS->getType(), 1);
const SCEV *LS = getSCEV(LHS);
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, LS);
const SCEV *RDiff = getMinusSCEV(RA, One);
if (LDiff == RDiff)
return getAddExpr(getUMaxExpr(One, LS), LDiff);
}
break;
case ICmpInst::ICMP_EQ:
// n == 0 ? 1+x : n+x -> umax(n, 1)+x
if (LHS->getType() == U->getType() &&
isa<ConstantInt>(RHS) &&
cast<ConstantInt>(RHS)->isZero()) {
const SCEV *One = getConstant(LHS->getType(), 1);
const SCEV *LS = getSCEV(LHS);
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, One);
const SCEV *RDiff = getMinusSCEV(RA, LS);
if (LDiff == RDiff)
return getAddExpr(getUMaxExpr(One, LS), LDiff);
}
break;
default:
break;
}
}
default: // We cannot analyze this expression.
break;
}
return getUnknown(V);
}
//===----------------------------------------------------------------------===//
// Iteration Count Computation Code
//
/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
/// normal unsigned value. Returns 0 if the trip count is unknown or not
/// constant. Will also return 0 if the maximum trip count is very large (>=
/// 2^32).
///
/// This "trip count" assumes that control exits via ExitingBlock. More
/// precisely, it is the number of times that control may reach ExitingBlock
/// before taking the branch. For loops with multiple exits, it may not be the
/// number times that the loop header executes because the loop may exit
/// prematurely via another branch.
///
/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of
/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all
/// loop exits. getExitCount() may return an exact count for this branch
/// assuming no-signed-wrap. The number of well-defined iterations may actually
/// be higher than this trip count if this exit test is skipped and the loop
/// exits via a different branch. Ideally, getExitCount() would know whether it
/// depends on a NSW assumption, and we would only fall back to a conservative
/// trip count in that case.
unsigned ScalarEvolution::
getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) {
const SCEVConstant *ExitCount =
dyn_cast<SCEVConstant>(getBackedgeTakenCount(L));
if (!ExitCount)
return 0;
ConstantInt *ExitConst = ExitCount->getValue();
// Guard against huge trip counts.
if (ExitConst->getValue().getActiveBits() > 32)
return 0;
// In case of integer overflow, this returns 0, which is correct.
return ((unsigned)ExitConst->getZExtValue()) + 1;
}
/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
/// trip count of this loop as a normal unsigned value, if possible. This
/// means that the actual trip count is always a multiple of the returned
/// value (don't forget the trip count could very well be zero as well!).
///
/// Returns 1 if the trip count is unknown or not guaranteed to be the
/// multiple of a constant (which is also the case if the trip count is simply
/// constant, use getSmallConstantTripCount for that case), Will also return 1
/// if the trip count is very large (>= 2^32).
///
/// As explained in the comments for getSmallConstantTripCount, this assumes
/// that control exits the loop via ExitingBlock.
unsigned ScalarEvolution::
getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) {
const SCEV *ExitCount = getBackedgeTakenCount(L);
if (ExitCount == getCouldNotCompute())
return 1;
// Get the trip count from the BE count by adding 1.
const SCEV *TCMul = getAddExpr(ExitCount,
getConstant(ExitCount->getType(), 1));
// FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
// to factor simple cases.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
TCMul = Mul->getOperand(0);
const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
if (!MulC)
return 1;
ConstantInt *Result = MulC->getValue();
// Guard against huge trip counts (this requires checking
// for zero to handle the case where the trip count == -1 and the
// addition wraps).
if (!Result || Result->getValue().getActiveBits() > 32 ||
Result->getValue().getActiveBits() == 0)
return 1;
return (unsigned)Result->getZExtValue();
}
// getExitCount - Get the expression for the number of loop iterations for which
// this loop is guaranteed not to exit via ExitingBlock. Otherwise return
// SCEVCouldNotCompute.
const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
}
/// getBackedgeTakenCount - If the specified loop has a predictable
/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
/// object. The backedge-taken count is the number of times the loop header
/// will be branched to from within the loop. This is one less than the
/// trip count of the loop, since it doesn't count the first iteration,
/// when the header is branched to from outside the loop.
///
/// Note that it is not valid to call this method on a loop without a
/// loop-invariant backedge-taken count (see
/// hasLoopInvariantBackedgeTakenCount).
///
const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
return getBackedgeTakenInfo(L).getExact(this);
}
/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
/// return the least SCEV value that is known never to be less than the
/// actual backedge taken count.
const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
return getBackedgeTakenInfo(L).getMax(this);
}
/// PushLoopPHIs - Push PHI nodes in the header of the given loop
/// onto the given Worklist.
static void
PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
BasicBlock *Header = L->getHeader();
// Push all Loop-header PHIs onto the Worklist stack.
for (BasicBlock::iterator I = Header->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I)
Worklist.push_back(PN);
}
const ScalarEvolution::BackedgeTakenInfo &
ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// Initially insert an invalid entry for this loop. If the insertion
// succeeds, proceed to actually compute a backedge-taken count and
// update the value. The temporary CouldNotCompute value tells SCEV
// code elsewhere that it shouldn't attempt to request a new
// backedge-taken count, which could result in infinite recursion.
std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
if (!Pair.second)
return Pair.first->second;
// ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
// into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
// must be cleared in this scope.
BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
if (Result.getExact(this) != getCouldNotCompute()) {
assert(isLoopInvariant(Result.getExact(this), L) &&
isLoopInvariant(Result.getMax(this), L) &&
"Computed backedge-taken count isn't loop invariant for loop!");
++NumTripCountsComputed;
}
else if (Result.getMax(this) == getCouldNotCompute() &&
isa<PHINode>(L->getHeader()->begin())) {
// Only count loops that have phi nodes as not being computable.
++NumTripCountsNotComputed;
}
// Now that we know more about the trip count for this loop, forget any
// existing SCEV values for PHI nodes in this loop since they are only
// conservative estimates made without the benefit of trip count
// information. This is similar to the code in forgetLoop, except that
// it handles SCEVUnknown PHI nodes specially.
if (Result.hasAnyInfo()) {
SmallVector<Instruction *, 16> Worklist;
PushLoopPHIs(L, Worklist);
SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
const SCEV *Old = It->second;
// SCEVUnknown for a PHI either means that it has an unrecognized
// structure, or it's a PHI that's in the progress of being computed
// by createNodeForPHI. In the former case, additional loop trip
// count information isn't going to change anything. In the later
// case, createNodeForPHI will perform the necessary updates on its
// own when it gets to that point.
if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
forgetMemoizedResults(Old);
ValueExprMap.erase(It);
}
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
PushDefUseChildren(I, Worklist);
}
}
// Re-lookup the insert position, since the call to
// ComputeBackedgeTakenCount above could result in a
// recusive call to getBackedgeTakenInfo (on a different
// loop), which would invalidate the iterator computed
// earlier.
return BackedgeTakenCounts.find(L)->second = Result;
}
/// forgetLoop - This method should be called by the client when it has
/// changed a loop in a way that may effect ScalarEvolution's ability to
/// compute a trip count, or if the loop is deleted.
void ScalarEvolution::forgetLoop(const Loop *L) {
// Drop any stored trip count value.
DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
BackedgeTakenCounts.find(L);
if (BTCPos != BackedgeTakenCounts.end()) {
BTCPos->second.clear();
BackedgeTakenCounts.erase(BTCPos);
}
// Drop information about expressions based on loop-header PHIs.
SmallVector<Instruction *, 16> Worklist;
PushLoopPHIs(L, Worklist);
SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
forgetMemoizedResults(It->second);
ValueExprMap.erase(It);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
PushDefUseChildren(I, Worklist);
}
// Forget all contained loops too, to avoid dangling entries in the
// ValuesAtScopes map.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
forgetLoop(*I);
}
/// forgetValue - This method should be called by the client when it has
/// changed a value in a way that may effect its value, or which may
/// disconnect it from a def-use chain linking it to a loop.
void ScalarEvolution::forgetValue(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return;
// Drop information about expressions based on loop-header PHIs.
SmallVector<Instruction *, 16> Worklist;
Worklist.push_back(I);
SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
forgetMemoizedResults(It->second);
ValueExprMap.erase(It);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
PushDefUseChildren(I, Worklist);
}
}
/// getExact - Get the exact loop backedge taken count considering all loop
/// exits. A computable result can only be return for loops with a single exit.
/// Returning the minimum taken count among all exits is incorrect because one
/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
/// the limit of each loop test is never skipped. This is a valid assumption as
/// long as the loop exits via that test. For precise results, it is the
/// caller's responsibility to specify the relevant loop exit using
/// getExact(ExitingBlock, SE).
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
// If any exits were not computable, the loop is not computable.
if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
// We need exactly one computable exit.
if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
const SCEV *BECount = 0;
for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
ENT != 0; ENT = ENT->getNextExit()) {
assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
if (!BECount)
BECount = ENT->ExactNotTaken;
else if (BECount != ENT->ExactNotTaken)
return SE->getCouldNotCompute();
}
assert(BECount && "Invalid not taken count for loop exit");
return BECount;
}
/// getExact - Get the exact not taken count for this loop exit.
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
ScalarEvolution *SE) const {
for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
ENT != 0; ENT = ENT->getNextExit()) {
if (ENT->ExitingBlock == ExitingBlock)
return ENT->ExactNotTaken;
}
return SE->getCouldNotCompute();
}
/// getMax - Get the max backedge taken count for the loop.
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
return Max ? Max : SE->getCouldNotCompute();
}
bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
ScalarEvolution *SE) const {
if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
return true;
if (!ExitNotTaken.ExitingBlock)
return false;
for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
ENT != 0; ENT = ENT->getNextExit()) {
if (ENT->ExactNotTaken != SE->getCouldNotCompute()
&& SE->hasOperand(ENT->ExactNotTaken, S)) {
return true;
}
}
return false;
}
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
if (!Complete)
ExitNotTaken.setIncomplete();
unsigned NumExits = ExitCounts.size();
if (NumExits == 0) return;
ExitNotTaken.ExitingBlock = ExitCounts[0].first;
ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
if (NumExits == 1) return;
// Handle the rare case of multiple computable exits.
ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
ExitNotTakenInfo *PrevENT = &ExitNotTaken;
for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
PrevENT->setNextExit(ENT);
ENT->ExitingBlock = ExitCounts[i].first;
ENT->ExactNotTaken = ExitCounts[i].second;
}
}
/// clear - Invalidate this result and free the ExitNotTakenInfo array.
void ScalarEvolution::BackedgeTakenInfo::clear() {
ExitNotTaken.ExitingBlock = 0;
ExitNotTaken.ExactNotTaken = 0;
delete[] ExitNotTaken.getNextExit();
}
/// ComputeBackedgeTakenCount - Compute the number of times the backedge
/// of the specified loop will execute.
ScalarEvolution::BackedgeTakenInfo
ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
// Examine all exits and pick the most conservative values.
const SCEV *MaxBECount = getCouldNotCompute();
bool CouldComputeBECount = true;
SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
if (EL.Exact == getCouldNotCompute())
// We couldn't compute an exact value for this exit, so
// we won't be able to compute an exact value for the loop.
CouldComputeBECount = false;
else
ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact));
if (MaxBECount == getCouldNotCompute())
MaxBECount = EL.Max;
else if (EL.Max != getCouldNotCompute()) {
// We cannot take the "min" MaxBECount, because non-unit stride loops may
// skip some loop tests. Taking the max over the exits is sufficiently
// conservative. TODO: We could do better taking into consideration
// that (1) the loop has unit stride (2) the last loop test is
// less-than/greater-than (3) any loop test is less-than/greater-than AND
// falls-through some constant times less then the other tests.
MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
}
}
return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
}
/// ComputeExitLimit - Compute the number of times the backedge of the specified
/// loop will execute if it exits via the specified block.
ScalarEvolution::ExitLimit
ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
// Okay, we've chosen an exiting block. See what condition causes us to
// exit at this block.
//
// FIXME: we should be able to handle switch instructions (with a single exit)
BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (ExitBr == 0) return getCouldNotCompute();
assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
// At this point, we know we have a conditional branch that determines whether
// the loop is exited. However, we don't know if the branch is executed each
// time through the loop. If not, then the execution count of the branch will
// not be equal to the trip count of the loop.
//
// Currently we check for this by checking to see if the Exit branch goes to
// the loop header. If so, we know it will always execute the same number of
// times as the loop. We also handle the case where the exit block *is* the
// loop header. This is common for un-rotated loops.
//
// If both of those tests fail, walk up the unique predecessor chain to the
// header, stopping if there is an edge that doesn't exit the loop. If the
// header is reached, the execution count of the branch will be equal to the
// trip count of the loop.
//
// More extensive analysis could be done to handle more cases here.
//
if (ExitBr->getSuccessor(0) != L->getHeader() &&
ExitBr->getSuccessor(1) != L->getHeader() &&
ExitBr->getParent() != L->getHeader()) {
// The simple checks failed, try climbing the unique predecessor chain
// up to the header.
bool Ok = false;
for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
BasicBlock *Pred = BB->getUniquePredecessor();
if (!Pred)
return getCouldNotCompute();
TerminatorInst *PredTerm = Pred->getTerminator();
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
BasicBlock *PredSucc = PredTerm->getSuccessor(i);
if (PredSucc == BB)
continue;
// If the predecessor has a successor that isn't BB and isn't
// outside the loop, assume the worst.
if (L->contains(PredSucc))
return getCouldNotCompute();
}
if (Pred == L->getHeader()) {
Ok = true;
break;
}
BB = Pred;
}
if (!Ok)
return getCouldNotCompute();
}
// Proceed to the next level to examine the exit condition expression.
return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
ExitBr->getSuccessor(0),
ExitBr->getSuccessor(1),
/*IsSubExpr=*/false);
}
/// ComputeExitLimitFromCond - Compute the number of times the
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of ExitCond, TBB, and FBB.
///
/// @param IsSubExpr is true if ExitCond does not directly control the exit
/// branch. In this case, we cannot assume that the loop only exits when the
/// condition is true and cannot infer that failing to meet the condition prior
/// to integer wraparound results in undefined behavior.
ScalarEvolution::ExitLimit
ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
Value *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
bool IsSubExpr) {
// Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
bool EitherMayExit = L->contains(TBB);
ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
IsSubExpr || EitherMayExit);
ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
IsSubExpr || EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
// Both conditions must be true for the loop to continue executing.
// Choose the less conservative count.
if (EL0.Exact == getCouldNotCompute() ||
EL1.Exact == getCouldNotCompute())
BECount = getCouldNotCompute();
else
BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
if (EL0.Max == getCouldNotCompute())
MaxBECount = EL1.Max;
else if (EL1.Max == getCouldNotCompute())
MaxBECount = EL0.Max;
else
MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
} else {
// Both conditions must be true at the same time for the loop to exit.
// For now, be conservative.
assert(L->contains(FBB) && "Loop block has no successor in loop!");
if (EL0.Max == EL1.Max)
MaxBECount = EL0.Max;
if (EL0.Exact == EL1.Exact)
BECount = EL0.Exact;
}
return ExitLimit(BECount, MaxBECount);
}
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
bool EitherMayExit = L->contains(FBB);
ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
IsSubExpr || EitherMayExit);
ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
IsSubExpr || EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
// Both conditions must be false for the loop to continue executing.
// Choose the less conservative count.
if (EL0.Exact == getCouldNotCompute() ||
EL1.Exact == getCouldNotCompute())
BECount = getCouldNotCompute();
else
BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
if (EL0.Max == getCouldNotCompute())
MaxBECount = EL1.Max;
else if (EL1.Max == getCouldNotCompute())
MaxBECount = EL0.Max;
else
MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
} else {
// Both conditions must be false at the same time for the loop to exit.
// For now, be conservative.
assert(L->contains(TBB) && "Loop block has no successor in loop!");
if (EL0.Max == EL1.Max)
MaxBECount = EL0.Max;
if (EL0.Exact == EL1.Exact)
BECount = EL0.Exact;
}
return ExitLimit(BECount, MaxBECount);
}
}
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr);
// Check for a constant condition. These are normally stripped out by
// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
// preserve the CFG and is temporarily leaving constant conditions
// in place.
if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
if (L->contains(FBB) == !CI->getZExtValue())
// The backedge is always taken.
return getCouldNotCompute();
else
// The backedge is never taken.
return getConstant(CI->getType(), 0);
}
// If it's not an integer or pointer comparison then compute it the hard way.
return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
}
/// ComputeExitLimitFromICmp - Compute the number of times the
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
ScalarEvolution::ExitLimit
ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
bool IsSubExpr) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Cond;
if (!L->contains(FBB))
Cond = ExitCond->getPredicate();
else
Cond = ExitCond->getInversePredicate();
// Handle common loops like: for (X = "string"; *X; ++X)
if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
ExitLimit ItCnt =
ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
if (ItCnt.hasAnyInfo())
return ItCnt;
}
const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
// Try to evaluate any dependencies out of the loop.
LHS = getSCEVAtScope(LHS, L);
RHS = getSCEVAtScope(RHS, L);
// At this point, we would like to compute how many iterations of the
// loop the predicate will return true for these inputs.
if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
// If there is a loop-invariant, force it into the RHS.
std::swap(LHS, RHS);
Cond = ICmpInst::getSwappedPredicate(Cond);
}
// Simplify the operands before analyzing them.
(void)SimplifyICmpOperands(Cond, LHS, RHS);
// If we have a comparison of a chrec against a constant, try to use value
// ranges to answer this query.
if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
if (AddRec->getLoop() == L) {
// Form the constant range.
ConstantRange CompRange(
ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
}
switch (Cond) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_EQ: { // while (X == Y)
// Convert to: while (X-Y == 0)
ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT: { // while (X < Y)
bool IsSigned = Cond == ICmpInst::ICMP_SLT;
ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_UGT: { // while (X > Y)
bool IsSigned = Cond == ICmpInst::ICMP_SGT;
ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
default:
#if 0
dbgs() << "ComputeBackedgeTakenCount ";
if (ExitCond->getOperand(0)->getType()->isUnsigned())
dbgs() << "[unsigned] ";
dbgs() << *LHS << " "
<< Instruction::getOpcodeName(Instruction::ICmp)
<< " " << *RHS << "\n";
#endif
break;
}
return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
}
static ConstantInt *
EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
ScalarEvolution &SE) {
const SCEV *InVal = SE.getConstant(C);
const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
assert(isa<SCEVConstant>(Val) &&
"Evaluation of SCEV at constant didn't fold correctly?");
return cast<SCEVConstant>(Val)->getValue();
}
/// ComputeLoadConstantCompareExitLimit - Given an exit condition of
/// 'icmp op load X, cst', try to see if we can compute the backedge
/// execution count.
ScalarEvolution::ExitLimit
ScalarEvolution::ComputeLoadConstantCompareExitLimit(
LoadInst *LI,
Constant *RHS,
const Loop *L,
ICmpInst::Predicate predicate) {
if (LI->isVolatile()) return getCouldNotCompute();
// Check to see if the loaded pointer is a getelementptr of a global.
// TODO: Use SCEV instead of manually grubbing with GEPs.
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
if (!GEP) return getCouldNotCompute();
// Make sure that it is really a constant global we are gepping, with an
// initializer, and make sure the first IDX is really 0.
GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
!cast<Constant>(GEP->getOperand(1))->isNullValue())
return getCouldNotCompute();
// Okay, we allow one non-constant index into the GEP instruction.
Value *VarIdx = 0;
std::vector<Constant*> Indexes;
unsigned VarIdxNum = 0;
for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
Indexes.push_back(CI);
} else if (!isa<ConstantInt>(GEP->getOperand(i))) {
if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
VarIdx = GEP->getOperand(i);
VarIdxNum = i-2;
Indexes.push_back(0);
}
// Loop-invariant loads may be a byproduct of loop optimization. Skip them.
if (!VarIdx)
return getCouldNotCompute();
// Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
// Check to see if X is a loop variant variable value now.
const SCEV *Idx = getSCEV(VarIdx);
Idx = getSCEVAtScope(Idx, L);
// We can only recognize very limited forms of loop index expressions, in
// particular, only affine AddRec's like {C1,+,C2}.
const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
!isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
!isa<SCEVConstant>(IdxExpr->getOperand(1)))
return getCouldNotCompute();
unsigned MaxSteps = MaxBruteForceIterations;
for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
ConstantInt *ItCst = ConstantInt::get(
cast<IntegerType>(IdxExpr->getType()), IterationNum);
ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
// Form the GEP offset.
Indexes[VarIdxNum] = Val;
Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
Indexes);
if (Result == 0) break; // Cannot compute!
// Evaluate the condition for this iteration.
Result = ConstantExpr::getICmp(predicate, Result, RHS);
if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
#if 0
dbgs() << "\n***\n*** Computed loop count " << *ItCst
<< "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
<< "***\n";
#endif
++NumArrayLenItCounts;
return getConstant(ItCst); // Found terminating iteration!
}
}
return getCouldNotCompute();
}
/// CanConstantFold - Return true if we can constant fold an instruction of the
/// specified type, assuming that all operands were constants.
static bool CanConstantFold(const Instruction *I) {
if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
isa<LoadInst>(I))
return true;
if (const CallInst *CI = dyn_cast<CallInst>(I))
if (const Function *F = CI->getCalledFunction())
return canConstantFoldCallTo(F);
return false;
}
/// Determine whether this instruction can constant evolve within this loop
/// assuming its operands can all constant evolve.
static bool canConstantEvolve(Instruction *I, const Loop *L) {
// An instruction outside of the loop can't be derived from a loop PHI.
if (!L->contains(I)) return false;
if (isa<PHINode>(I)) {
if (L->getHeader() == I->getParent())
return true;
else
// We don't currently keep track of the control flow needed to evaluate
// PHIs, so we cannot handle PHIs inside of loops.
return false;
}
// If we won't be able to constant fold this expression even if the operands
// are constants, bail early.
return CanConstantFold(I);
}
/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
/// recursing through each instruction operand until reaching a loop header phi.
static PHINode *
getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
DenseMap<Instruction *, PHINode *> &PHIMap) {
// Otherwise, we can evaluate this instruction if all of its operands are
// constant or derived from a PHI node themselves.
PHINode *PHI = 0;
for (Instruction::op_iterator OpI = UseInst->op_begin(),
OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
if (isa<Constant>(*OpI)) continue;
Instruction *OpInst = dyn_cast<Instruction>(*OpI);
if (!OpInst || !canConstantEvolve(OpInst, L)) return 0;
PHINode *P = dyn_cast<PHINode>(OpInst);
if (!P)
// If this operand is already visited, reuse the prior result.
// We may have P != PHI if this is the deepest point at which the
// inconsistent paths meet.
P = PHIMap.lookup(OpInst);
if (!P) {
// Recurse and memoize the results, whether a phi is found or not.
// This recursive call invalidates pointers into PHIMap.
P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
PHIMap[OpInst] = P;
}
if (P == 0) return 0; // Not evolving from PHI
if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs.
PHI = P;
}
// This is a expression evolving from a constant PHI!
return PHI;
}
/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
/// in the loop that V is derived from. We allow arbitrary operations along the
/// way, but the operands of an operation must either be constants or a value
/// derived from a constant PHI. If this expression does not fit with these
/// constraints, return null.
static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0 || !canConstantEvolve(I, L)) return 0;
if (PHINode *PN = dyn_cast<PHINode>(I)) {
return PN;
}
// Record non-constant instructions contained by the loop.
DenseMap<Instruction *, PHINode *> PHIMap;
return getConstantEvolvingPHIOperands(I, L, PHIMap);
}
/// EvaluateExpression - Given an expression that passes the
/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
/// in the loop has the value PHIVal. If we can't fold this expression for some
/// reason, return null.
static Constant *EvaluateExpression(Value *V, const Loop *L,
DenseMap<Instruction *, Constant *> &Vals,
const DataLayout *TD,
const TargetLibraryInfo *TLI) {
// Convenient constant check, but redundant for recursive calls.
if (Constant *C = dyn_cast<Constant>(V)) return C;
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return 0;
if (Constant *C = Vals.lookup(I)) return C;
// An instruction inside the loop depends on a value outside the loop that we
// weren't given a mapping for, or a value such as a call inside the loop.
if (!canConstantEvolve(I, L)) return 0;
// An unmapped PHI can be due to a branch or another loop inside this loop,
// or due to this not being the initial iteration through a loop where we
// couldn't compute the evolution of this particular PHI last time.
if (isa<PHINode>(I)) return 0;
std::vector<Constant*> Operands(I->getNumOperands());
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
if (!Operand) {
Operands[i] = dyn_cast<Constant>(I->getOperand(i));
if (!Operands[i]) return 0;
continue;
}
Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
Vals[Operand] = C;
if (!C) return 0;
Operands[i] = C;
}
if (CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
Operands[1], TD, TLI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isVolatile())
return ConstantFoldLoadFromConstPtr(Operands[0], TD);
}
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
TLI);
}
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
/// in the header of its containing loop, we know the loop executes a
/// constant number of times, and the PHI node is just a recurrence
/// involving constants, fold it.
Constant *
ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
const APInt &BEs,
const Loop *L) {
DenseMap<PHINode*, Constant*>::const_iterator I =
ConstantEvolutionLoopExitValue.find(PN);
if (I != ConstantEvolutionLoopExitValue.end())
return I->second;
if (BEs.ugt(MaxBruteForceIterations))
return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it.
Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
DenseMap<Instruction *, Constant *> CurrentIterVals;
BasicBlock *Header = L->getHeader();
assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
// Since the loop is canonicalized, the PHI node must have two entries. One
// entry must be a constant (coming in from outside of the loop), and the
// second must be derived from the same PHI.
bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
PHINode *PHI = 0;
for (BasicBlock::iterator I = Header->begin();
(PHI = dyn_cast<PHINode>(I)); ++I) {
Constant *StartCST =
dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
if (StartCST == 0) continue;
CurrentIterVals[PHI] = StartCST;
}
if (!CurrentIterVals.count(PN))
return RetVal = 0;
Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
// Execute the loop symbolically to determine the exit value.
if (BEs.getActiveBits() >= 32)
return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
unsigned NumIterations = BEs.getZExtValue(); // must be in range
unsigned IterationNum = 0;
for (; ; ++IterationNum) {
if (IterationNum == NumIterations)
return RetVal = CurrentIterVals[PN]; // Got exit value!
// Compute the value of the PHIs for the next iteration.
// EvaluateExpression adds non-phi values to the CurrentIterVals map.
DenseMap<Instruction *, Constant *> NextIterVals;
Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
TLI);
if (NextPHI == 0)
return 0; // Couldn't evaluate!
NextIterVals[PN] = NextPHI;
bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
// Also evaluate the other PHI nodes. However, we don't get to stop if we
// cease to be able to evaluate one of them or if they stop evolving,
// because that doesn't necessarily prevent us from computing PN.
SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
for (DenseMap<Instruction *, Constant *>::const_iterator
I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
PHINode *PHI = dyn_cast<PHINode>(I->first);
if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
PHIsToCompute.push_back(std::make_pair(PHI, I->second));
}
// We use two distinct loops because EvaluateExpression may invalidate any
// iterators into CurrentIterVals.
for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
PHINode *PHI = I->first;
Constant *&NextPHI = NextIterVals[PHI];
if (!NextPHI) { // Not already computed.
Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
}
if (NextPHI != I->second)
StoppedEvolving = false;
}
// If all entries in CurrentIterVals == NextIterVals then we can stop
// iterating, the loop can't continue to change.
if (StoppedEvolving)
return RetVal = CurrentIterVals[PN];
CurrentIterVals.swap(NextIterVals);
}
}
/// ComputeExitCountExhaustively - If the loop is known to execute a
/// constant number of times (the condition evolves only from constants),
/// try to evaluate a few iterations of the loop until we get the exit
/// condition gets a value of ExitWhen (true or false). If we cannot
/// evaluate the trip count of the loop, return getCouldNotCompute().
const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
Value *Cond,
bool ExitWhen) {
PHINode *PN = getConstantEvolvingPHI(Cond, L);
if (PN == 0) return getCouldNotCompute();
// If the loop is canonicalized, the PHI will have exactly two entries.
// That's the only form we support here.
if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
DenseMap<Instruction *, Constant *> CurrentIterVals;
BasicBlock *Header = L->getHeader();
assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
// One entry must be a constant (coming in from outside of the loop), and the
// second must be derived from the same PHI.
bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
PHINode *PHI = 0;
for (BasicBlock::iterator I = Header->begin();
(PHI = dyn_cast<PHINode>(I)); ++I) {
Constant *StartCST =
dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
if (StartCST == 0) continue;
CurrentIterVals[PHI] = StartCST;
}
if (!CurrentIterVals.count(PN))
return getCouldNotCompute();
// Okay, we find a PHI node that defines the trip count of this loop. Execute
// the loop symbolically to determine when the condition gets a value of
// "ExitWhen".
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
ConstantInt *CondVal =
dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
TD, TLI));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
if (CondVal->getValue() == uint64_t(ExitWhen)) {
++NumBruteForceTripCountsComputed;
return getConstant(Type::getInt32Ty(getContext()), IterationNum);
}
// Update all the PHI nodes for the next iteration.
DenseMap<Instruction *, Constant *> NextIterVals;
// Create a list of which PHIs we need to compute. We want to do this before
// calling EvaluateExpression on them because that may invalidate iterators
// into CurrentIterVals.
SmallVector<PHINode *, 8> PHIsToCompute;
for (DenseMap<Instruction *, Constant *>::const_iterator
I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
PHINode *PHI = dyn_cast<PHINode>(I->first);
if (!PHI || PHI->getParent() != Header) continue;
PHIsToCompute.push_back(PHI);
}
for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
E = PHIsToCompute.end(); I != E; ++I) {
PHINode *PHI = *I;
Constant *&NextPHI = NextIterVals[PHI];
if (NextPHI) continue; // Already computed!
Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
}
CurrentIterVals.swap(NextIterVals);
}
// Too many iterations were needed to evaluate.
return getCouldNotCompute();
}
/// getSCEVAtScope - Return a SCEV expression for the specified value
/// at the specified scope in the program. The L value specifies a loop
/// nest to evaluate the expression at, where null is the top-level or a
/// specified loop is immediately inside of the loop.
///
/// This method can be used to compute the exit value for a variable defined
/// in a loop by querying what the value will hold in the parent loop.
///
/// In the case that a relevant loop exit value cannot be computed, the
/// original value V is returned.
const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
// Check to see if we've folded this expression at this loop before.
SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
for (unsigned u = 0; u < Values.size(); u++) {
if (Values[u].first == L)
return Values[u].second ? Values[u].second : V;
}
Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0)));
// Otherwise compute it.
const SCEV *C = computeSCEVAtScope(V, L);
SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
for (unsigned u = Values2.size(); u > 0; u--) {
if (Values2[u - 1].first == L) {
Values2[u - 1].second = C;
break;
}
}
return C;
}
/// This builds up a Constant using the ConstantExpr interface. That way, we
/// will return Constants for objects which aren't represented by a
/// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
/// Returns NULL if the SCEV isn't representable as a Constant.
static Constant *BuildConstantFromSCEV(const SCEV *V) {
switch (V->getSCEVType()) {
default: // TODO: smax, umax.
case scCouldNotCompute:
case scAddRecExpr:
break;
case scConstant:
return cast<SCEVConstant>(V)->getValue();
case scUnknown:
return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
case scSignExtend: {
const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
return ConstantExpr::getSExt(CastOp, SS->getType());
break;
}
case scZeroExtend: {
const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
return ConstantExpr::getZExt(CastOp, SZ->getType());
break;
}
case scTruncate: {
const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
return ConstantExpr::getTrunc(CastOp, ST->getType());
break;
}
case scAddExpr: {
const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
unsigned AS = PTy->getAddressSpace();
Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
C = ConstantExpr::getBitCast(C, DestPtrTy);
}
for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
if (!C2) return 0;
// First pointer!
if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
unsigned AS = C2->getType()->getPointerAddressSpace();
std::swap(C, C2);
Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
// The offsets have been converted to bytes. We can add bytes to an
// i8* by GEP with the byte count in the first index.
C = ConstantExpr::getBitCast(C, DestPtrTy);
}
// Don't bother trying to sum two pointers. We probably can't
// statically compute a load that results from it anyway.
if (C2->getType()->isPointerTy())
return 0;
if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
if (PTy->getElementType()->isStructTy())
C2 = ConstantExpr::getIntegerCast(
C2, Type::getInt32Ty(C->getContext()), true);
C = ConstantExpr::getGetElementPtr(C, C2);
} else
C = ConstantExpr::getAdd(C, C2);
}
return C;
}
break;
}
case scMulExpr: {
const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
// Don't bother with pointers at all.
if (C->getType()->isPointerTy()) return 0;
for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
if (!C2 || C2->getType()->isPointerTy()) return 0;
C = ConstantExpr::getMul(C, C2);
}
return C;
}
break;
}
case scUDivExpr: {
const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
if (LHS->getType() == RHS->getType())
return ConstantExpr::getUDiv(LHS, RHS);
break;
}
}
return 0;
}
const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
if (isa<SCEVConstant>(V)) return V;
// If this instruction is evolved from a constant-evolving PHI, compute the
// exit value from the loop without using SCEVs.
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
const Loop *LI = (*this->LI)[I->getParent()];
if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
if (PHINode *PN = dyn_cast<PHINode>(I))
if (PN->getParent() == LI->getHeader()) {
// Okay, there is no closed form solution for the PHI node. Check
// to see if the loop that contains it has a known backedge-taken
// count. If so, we may be able to force computation of the exit
// value.
const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
if (const SCEVConstant *BTCC =
dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
// Okay, we know how many times the containing loop executes. If
// this is a constant evolving PHI node, get the final value at
// the specified iteration number.
Constant *RV = getConstantEvolutionLoopExitValue(PN,
BTCC->getValue()->getValue(),
LI);
if (RV) return getSCEV(RV);
}
}
// Okay, this is an expression that we cannot symbolically evaluate
// into a SCEV. Check to see if it's possible to symbolically evaluate
// the arguments into constants, and if so, try to constant propagate the
// result. This is particularly useful for computing loop exit values.
if (CanConstantFold(I)) {
SmallVector<Constant *, 4> Operands;
bool MadeImprovement = false;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Value *Op = I->getOperand(i);
if (Constant *C = dyn_cast<Constant>(Op)) {
Operands.push_back(C);
continue;
}
// If any of the operands is non-constant and if they are
// non-integer and non-pointer, don't even try to analyze them
// with scev techniques.
if (!isSCEVable(Op->getType()))
return V;
const SCEV *OrigV = getSCEV(Op);
const SCEV *OpV = getSCEVAtScope(OrigV, L);
MadeImprovement |= OrigV != OpV;
Constant *C = BuildConstantFromSCEV(OpV);
if (!C) return V;
if (C->getType() != Op->getType())
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
Op->getType(),
false),
C, Op->getType());
Operands.push_back(C);
}
// Check to see if getSCEVAtScope actually made an improvement.
if (MadeImprovement) {
Constant *C = 0;
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(),
Operands[0], Operands[1], TD,
TLI);
else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isVolatile())
C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
} else
C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
Operands, TD, TLI);
if (!C) return V;
return getSCEV(C);
}
}
}
// This is some other type of SCEVUnknown, just return it.
return V;
}
if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
// Avoid performing the look-up in the common case where the specified
// expression has no loop-variant portions.
for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
if (OpAtScope != Comm->getOperand(i)) {
// Okay, at least one of these operands is loop variant but might be
// foldable. Build a new instance of the folded commutative expression.
SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
Comm->op_begin()+i);
NewOps.push_back(OpAtScope);
for (++i; i != e; ++i) {
OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
NewOps.push_back(OpAtScope);
}
if (isa<SCEVAddExpr>(Comm))
return getAddExpr(NewOps);
if (isa<SCEVMulExpr>(Comm))
return getMulExpr(NewOps);
if (isa<SCEVSMaxExpr>(Comm))
return getSMaxExpr(NewOps);
if (isa<SCEVUMaxExpr>(Comm))
return getUMaxExpr(NewOps);
llvm_unreachable("Unknown commutative SCEV type!");
}
}
// If we got here, all operands are loop invariant.
return Comm;
}
if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
if (LHS == Div->getLHS() && RHS == Div->getRHS())
return Div; // must be loop invariant
return getUDivExpr(LHS, RHS);
}
// If this is a loop recurrence for a loop that does not contain L, then we
// are dealing with the final value computed by the loop.
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
// First, attempt to evaluate each operand.
// Avoid performing the look-up in the common case where the specified
// expression has no loop-variant portions.
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
if (OpAtScope == AddRec->getOperand(i))
continue;
// Okay, at least one of these operands is loop variant but might be
// foldable. Build a new instance of the folded commutative expression.
SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
AddRec->op_begin()+i);
NewOps.push_back(OpAtScope);
for (++i; i != e; ++i)
NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
const SCEV *FoldedRec =
getAddRecExpr(NewOps, AddRec->getLoop(),
AddRec->getNoWrapFlags(SCEV::FlagNW));
AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
// The addrec may be folded to a nonrecurrence, for example, if the
// induction variable is multiplied by zero after constant folding. Go
// ahead and return the folded value.
if (!AddRec)
return FoldedRec;
break;
}
// If the scope is outside the addrec's loop, evaluate it by using the
// loop exit value of the addrec.
if (!AddRec->getLoop()->contains(L)) {
// To evaluate this recurrence, we need to know how many times the AddRec
// loop iterates. Compute this now.
const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
// Then, evaluate the AddRec.
return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
}
return AddRec;
}
if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
if (Op == Cast->getOperand())
return Cast; // must be loop invariant
return getZeroExtendExpr(Op, Cast->getType());
}
if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
if (Op == Cast->getOperand())
return Cast; // must be loop invariant
return getSignExtendExpr(Op, Cast->getType());
}
if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
if (Op == Cast->getOperand())
return Cast; // must be loop invariant
return getTruncateExpr(Op, Cast->getType());
}
llvm_unreachable("Unknown SCEV type!");
}
/// getSCEVAtScope - This is a convenience function which does
/// getSCEVAtScope(getSCEV(V), L).
const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
return getSCEVAtScope(getSCEV(V), L);
}
/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
/// following equation:
///
/// A * X = B (mod N)
///
/// where N = 2^BW and BW is the common bit width of A and B. The signedness of
/// A and B isn't important.
///
/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
ScalarEvolution &SE) {
uint32_t BW = A.getBitWidth();
assert(BW == B.getBitWidth() && "Bit widths must be the same.");
assert(A != 0 && "A must be non-zero.");
// 1. D = gcd(A, N)
//
// The gcd of A and N may have only one prime factor: 2. The number of
// trailing zeros in A is its multiplicity
uint32_t Mult2 = A.countTrailingZeros();
// D = 2^Mult2
// 2. Check if B is divisible by D.
//
// B is divisible by D if and only if the multiplicity of prime factor 2 for B
// is not less than multiplicity of this prime factor for D.
if (B.countTrailingZeros() < Mult2)
return SE.getCouldNotCompute();
// 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
// modulo (N / D).
//
// (N / D) may need BW+1 bits in its representation. Hence, we'll use this
// bit width during computations.
APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
APInt Mod(BW + 1, 0);
Mod.setBit(BW - Mult2); // Mod = N / D
APInt I = AD.multiplicativeInverse(Mod);
// 4. Compute the minimum unsigned root of the equation:
// I * (B / D) mod (N / D)
APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
// The result is guaranteed to be less than 2^BW so we may truncate it to BW
// bits.
return SE.getConstant(Result.trunc(BW));
}
/// SolveQuadraticEquation - Find the roots of the quadratic equation for the
/// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which
/// might be the same) or two SCEVCouldNotCompute objects.
///
static std::pair<const SCEV *,const SCEV *>
SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
// We currently can only solve this if the coefficients are constants.
if (!LC || !MC || !NC) {
const SCEV *CNC = SE.getCouldNotCompute();
return std::make_pair(CNC, CNC);
}
uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
const APInt &L = LC->getValue()->getValue();
const APInt &M = MC->getValue()->getValue();
const APInt &N = NC->getValue()->getValue();
APInt Two(BitWidth, 2);
APInt Four(BitWidth, 4);
{
using namespace APIntOps;
const APInt& C = L;
// Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
// The B coefficient is M-N/2
APInt B(M);
B -= sdiv(N,Two);
// The A coefficient is N/2
APInt A(N.sdiv(Two));
// Compute the B^2-4ac term.
APInt SqrtTerm(B);
SqrtTerm *= B;
SqrtTerm -= Four * (A * C);
if (SqrtTerm.isNegative()) {
// The loop is provably infinite.
const SCEV *CNC = SE.getCouldNotCompute();
return std::make_pair(CNC, CNC);
}
// Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
// integer value or else APInt::sqrt() will assert.
APInt SqrtVal(SqrtTerm.sqrt());
// Compute the two solutions for the quadratic formula.
// The divisions must be performed as signed divisions.
APInt NegB(-B);
APInt TwoA(A << 1);
if (TwoA.isMinValue()) {
const SCEV *CNC = SE.getCouldNotCompute();
return std::make_pair(CNC, CNC);
}
LLVMContext &Context = SE.getContext();
ConstantInt *Solution1 =
ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
ConstantInt *Solution2 =
ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
return std::make_pair(SE.getConstant(Solution1),
SE.getConstant(Solution2));
} // end APIntOps namespace
}
/// HowFarToZero - Return the number of times a backedge comparing the specified
/// value to zero will execute. If not computable, return CouldNotCompute.
///
/// This is only used for loops with a "x != y" exit test. The exit condition is
/// now expressed as a single expression, V = x-y. So the exit test is
/// effectively V != 0. We know and take advantage of the fact that this
/// expression only being used in a comparison by zero context.
ScalarEvolution::ExitLimit
ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
// If the value is already zero, the branch will execute zero times.
if (C->getValue()->isZero()) return C;
return getCouldNotCompute(); // Otherwise it will loop infinitely.
}
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
if (!AddRec || AddRec->getLoop() != L)
return getCouldNotCompute();
// If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
// the quadratic equation to solve it.
if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
std::pair<const SCEV *,const SCEV *> Roots =
SolveQuadraticEquation(AddRec, *this);
const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
if (R1 && R2) {
#if 0
dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
<< " sol#2: " << *R2 << "\n";
#endif
// Pick the smallest positive root value.
if (ConstantInt *CB =
dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
R1->getValue(),
R2->getValue()))) {
if (CB->getZExtValue() == false)
std::swap(R1, R2); // R1 is the minimum root now.
// We can only use this value if the chrec ends up with an exact zero
// value at this index. When solving for "X*X != 5", for example, we
// should not accept a root of 2.
const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
if (Val->isZero())
return R1; // We found a quadratic root!
}
}
return getCouldNotCompute();
}
// Otherwise we can only handle this if it is affine.
if (!AddRec->isAffine())
return getCouldNotCompute();
// If this is an affine expression, the execution count of this branch is
// the minimum unsigned root of the following equation:
//
// Start + Step*N = 0 (mod 2^BW)
//
// equivalent to:
//
// Step*N = -Start (mod 2^BW)
//
// where BW is the common bit width of Start and Step.
// Get the initial value for the loop.
const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
// For now we handle only constant steps.
//
// TODO: Handle a nonconstant Step given AddRec<NUW>. If the
// AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
// to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
// We have not yet seen any such cases.
const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
if (StepC == 0 || StepC->getValue()->equalsInt(0))
return getCouldNotCompute();
// For positive steps (counting up until unsigned overflow):
// N = -Start/Step (as unsigned)
// For negative steps (counting down to zero):
// N = Start/-Step
// First compute the unsigned distance from zero in the direction of Step.
bool CountDown = StepC->getValue()->getValue().isNegative();
const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
// Handle unitary steps, which cannot wraparound.
// 1*N = -Start; -1*N = Start (mod 2^BW), so:
// N = Distance (as unsigned)
if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
ConstantRange CR = getUnsignedRange(Start);
const SCEV *MaxBECount;
if (!CountDown && CR.getUnsignedMin().isMinValue())
// When counting up, the worst starting value is 1, not 0.
MaxBECount = CR.getUnsignedMax().isMinValue()
? getConstant(APInt::getMinValue(CR.getBitWidth()))
: getConstant(APInt::getMaxValue(CR.getBitWidth()));
else
MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
: -CR.getUnsignedMin());
return ExitLimit(Distance, MaxBECount);
}
// If the recurrence is known not to wraparound, unsigned divide computes the
// back edge count. (Ideally we would have an "isexact" bit for udiv). We know
// that the value will either become zero (and thus the loop terminates), that
// the loop will terminate through some other exit condition first, or that
// the loop has undefined behavior. This means we can't "miss" the exit
// value, even with nonunit stride.
//
// This is only valid for expressions that directly compute the loop exit. It
// is invalid for subexpressions in which the loop may exit through this
// branch even if this subexpression is false. In that case, the trip count
// computed by this udiv could be smaller than the number of well-defined
// iterations.
if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW))
return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
// Then, try to solve the above equation provided that Start is constant.
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
-StartC->getValue()->getValue(),
*this);
return getCouldNotCompute();
}
/// HowFarToNonZero - Return the number of times a backedge checking the
/// specified value for nonzero will execute. If not computable, return
/// CouldNotCompute
ScalarEvolution::ExitLimit
ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
// Loops that look like: while (X == 0) are very strange indeed. We don't
// handle them yet except for the trivial case. This could be expanded in the
// future as needed.
// If the value is a constant, check to see if it is known to be non-zero
// already. If so, the backedge will execute zero times.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
if (!C->getValue()->isNullValue())
return getConstant(C->getType(), 0);
return getCouldNotCompute(); // Otherwise it will loop infinitely.
}
// We could implement others, but I really doubt anyone writes loops like
// this, and if they did, they would already be constant folded.
return getCouldNotCompute();
}
/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
/// (which may not be an immediate predecessor) which has exactly one
/// successor from which BB is reachable, or null if no such block is
/// found.
///
std::pair<BasicBlock *, BasicBlock *>
ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
// If the block has a unique predecessor, then there is no path from the
// predecessor to the block that does not go through the direct edge
// from the predecessor to the block.
if (BasicBlock *Pred = BB->getSinglePredecessor())
return std::make_pair(Pred, BB);
// A loop's header is defined to be a block that dominates the loop.
// If the header has a unique predecessor outside the loop, it must be
// a block that has exactly one successor that can reach the loop.
if (Loop *L = LI->getLoopFor(BB))
return std::make_pair(L->getLoopPredecessor(), L->getHeader());
return std::pair<BasicBlock *, BasicBlock *>();
}
/// HasSameValue - SCEV structural equivalence is usually sufficient for
/// testing whether two expressions are equal, however for the purposes of
/// looking for a condition guarding a loop, it can be useful to be a little
/// more general, since a front-end may have replicated the controlling
/// expression.
///
static bool HasSameValue(const SCEV *A, const SCEV *B) {
// Quick check to see if they are the same SCEV.
if (A == B) return true;
// Otherwise, if they're both SCEVUnknown, it's possible that they hold
// two different instructions with the same value. Check for this case.
if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
return true;
// Otherwise assume they may have a different value.
return false;
}
/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
/// predicate Pred. Return true iff any changes were made.
///
bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
const SCEV *&LHS, const SCEV *&RHS,
unsigned Depth) {
bool Changed = false;
// If we hit the max recursion limit bail out.
if (Depth >= 3)
return false;
// Canonicalize a constant to the right side.
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
// Check for both operands constant.
if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
if (ConstantExpr::getICmp(Pred,
LHSC->getValue(),
RHSC->getValue())->isNullValue())
goto trivially_false;
else
goto trivially_true;
}
// Otherwise swap the operands to put the constant on the right.
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
Changed = true;
}
// If we're comparing an addrec with a value which is loop-invariant in the
// addrec's loop, put the addrec on the left. Also make a dominance check,
// as both operands could be addrecs loop-invariant in each other's loop.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
const Loop *L = AR->getLoop();
if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
Changed = true;
}
}
// If there's a constant operand, canonicalize comparisons with boundary
// cases, and canonicalize *-or-equal comparisons to regular comparisons.
if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
const APInt &RA = RC->getValue()->getValue();
switch (Pred) {
default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_NE:
// Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
if (!RA)
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
ME->getOperand(0)->isAllOnesValue()) {
RHS = AE->getOperand(1);
LHS = ME->getOperand(1);
Changed = true;
}
break;
case ICmpInst::ICMP_UGE:
if ((RA - 1).isMinValue()) {
Pred = ICmpInst::ICMP_NE;
RHS = getConstant(RA - 1);
Changed = true;
break;
}
if (RA.isMaxValue()) {
Pred = ICmpInst::ICMP_EQ;
Changed = true;
break;
}
if (RA.isMinValue()) goto trivially_true;
Pred = ICmpInst::ICMP_UGT;
RHS = getConstant(RA - 1);
Changed = true;
break;
case ICmpInst::ICMP_ULE:
if ((RA + 1).isMaxValue()) {
Pred = ICmpInst::ICMP_NE;
RHS = getConstant(RA + 1);
Changed = true;
break;
}
if (RA.isMinValue()) {
Pred = ICmpInst::ICMP_EQ;
Changed = true;
break;
}
if (RA.isMaxValue()) goto trivially_true;
Pred = ICmpInst::ICMP_ULT;
RHS = getConstant(RA + 1);
Changed = true;
break;
case ICmpInst::ICMP_SGE:
if ((RA - 1).isMinSignedValue()) {
Pred = ICmpInst::ICMP_NE;
RHS = getConstant(RA - 1);
Changed = true;
break;
}
if (RA.isMaxSignedValue()) {
Pred = ICmpInst::ICMP_EQ;
Changed = true;
break;
}
if (RA.isMinSignedValue()) goto trivially_true;
Pred = ICmpInst::ICMP_SGT;
RHS = getConstant(RA - 1);
Changed = true;
break;
case ICmpInst::ICMP_SLE:
if ((RA + 1).isMaxSignedValue()) {
Pred = ICmpInst::ICMP_NE;
RHS = getConstant(RA + 1);
Changed = true;
break;
}
if (RA.isMinSignedValue()) {
Pred = ICmpInst::ICMP_EQ;
Changed = true;
break;
}
if (RA.isMaxSignedValue()) goto trivially_true;
Pred = ICmpInst::ICMP_SLT;
RHS = getConstant(RA + 1);
Changed = true;
break;
case ICmpInst::ICMP_UGT:
if (RA.isMinValue()) {
Pred = ICmpInst::ICMP_NE;
Changed = true;
break;
}
if ((RA + 1).isMaxValue()) {
Pred = ICmpInst::ICMP_EQ;
RHS = getConstant(RA + 1);
Changed = true;
break;
}
if (RA.isMaxValue()) goto trivially_false;
break;
case ICmpInst::ICMP_ULT:
if (RA.isMaxValue()) {
Pred = ICmpInst::ICMP_NE;
Changed = true;
break;
}
if ((RA - 1).isMinValue()) {
Pred = ICmpInst::ICMP_EQ;
RHS = getConstant(RA - 1);
Changed = true;
break;
}
if (RA.isMinValue()) goto trivially_false;
break;
case ICmpInst::ICMP_SGT:
if (RA.isMinSignedValue()) {
Pred = ICmpInst::ICMP_NE;
Changed = true;
break;
}
if ((RA + 1).isMaxSignedValue()) {
Pred = ICmpInst::ICMP_EQ;
RHS = getConstant(RA + 1);
Changed = true;
break;
}
if (RA.isMaxSignedValue()) goto trivially_false;
break;
case ICmpInst::ICMP_SLT:
if (RA.isMaxSignedValue()) {
Pred = ICmpInst::ICMP_NE;
Changed = true;
break;
}
if ((RA - 1).isMinSignedValue()) {
Pred = ICmpInst::ICMP_EQ;
RHS = getConstant(RA - 1);
Changed = true;
break;
}
if (RA.isMinSignedValue()) goto trivially_false;
break;
}
}
// Check for obvious equality.
if (HasSameValue(LHS, RHS)) {
if (ICmpInst::isTrueWhenEqual(Pred))
goto trivially_true;
if (ICmpInst::isFalseWhenEqual(Pred))
goto trivially_false;
}
// If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
// adding or subtracting 1 from one of the operands.
switch (Pred) {
case ICmpInst::ICMP_SLE:
if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SLT;
Changed = true;
} else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SLT;
Changed = true;
}
break;
case ICmpInst::ICMP_SGE:
if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SGT;
Changed = true;
} else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SGT;
Changed = true;
}
break;
case ICmpInst::ICMP_ULE:
if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
SCEV::FlagNUW);
Pred = ICmpInst::ICMP_ULT;
Changed = true;
} else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
SCEV::FlagNUW);
Pred = ICmpInst::ICMP_ULT;
Changed = true;
}
break;
case ICmpInst::ICMP_UGE:
if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
SCEV::FlagNUW);
Pred = ICmpInst::ICMP_UGT;
Changed = true;
} else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
SCEV::FlagNUW);
Pred = ICmpInst::ICMP_UGT;
Changed = true;
}
break;
default:
break;
}
// TODO: More simplifications are possible here.
// Recursively simplify until we either hit a recursion limit or nothing
// changes.
if (Changed)
return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
return Changed;
trivially_true:
// Return 0 == 0.
LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
Pred = ICmpInst::ICMP_EQ;
return true;
trivially_false:
// Return 0 != 0.
LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
Pred = ICmpInst::ICMP_NE;
return true;
}
bool ScalarEvolution::isKnownNegative(const SCEV *S) {
return getSignedRange(S).getSignedMax().isNegative();
}
bool ScalarEvolution::isKnownPositive(const SCEV *S) {
return getSignedRange(S).getSignedMin().isStrictlyPositive();
}
bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
return !getSignedRange(S).getSignedMin().isNegative();
}
bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
return !getSignedRange(S).getSignedMax().isStrictlyPositive();
}
bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
return isKnownNegative(S) || isKnownPositive(S);
}
bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// Canonicalize the inputs first.
(void)SimplifyICmpOperands(Pred, LHS, RHS);
// If LHS or RHS is an addrec, check to see if the condition is true in
// every iteration of the loop.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
if (isLoopEntryGuardedByCond(
AR->getLoop(), Pred, AR->getStart(), RHS) &&
isLoopBackedgeGuardedByCond(
AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
return true;
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
if (isLoopEntryGuardedByCond(
AR->getLoop(), Pred, LHS, AR->getStart()) &&
isLoopBackedgeGuardedByCond(
AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
return true;
// Otherwise see what can be done with known constant ranges.
return isKnownPredicateWithRanges(Pred, LHS, RHS);
}
bool
ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
if (HasSameValue(LHS, RHS))
return ICmpInst::isTrueWhenEqual(Pred);
// This code is split out from isKnownPredicate because it is called from
// within isLoopEntryGuardedByCond.
switch (Pred) {
default:
llvm_unreachable("Unexpected ICmpInst::Predicate value!");
case ICmpInst::ICMP_SGT:
Pred = ICmpInst::ICMP_SLT;
std::swap(LHS, RHS);
case ICmpInst::ICMP_SLT: {
ConstantRange LHSRange = getSignedRange(LHS);
ConstantRange RHSRange = getSignedRange(RHS);
if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
return true;
if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
return false;
break;
}
case ICmpInst::ICMP_SGE:
Pred = ICmpInst::ICMP_SLE;
std::swap(LHS, RHS);
case ICmpInst::ICMP_SLE: {
ConstantRange LHSRange = getSignedRange(LHS);
ConstantRange RHSRange = getSignedRange(RHS);
if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
return true;
if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
return false;
break;
}
case ICmpInst::ICMP_UGT:
Pred = ICmpInst::ICMP_ULT;
std::swap(LHS, RHS);
case ICmpInst::ICMP_ULT: {
ConstantRange LHSRange = getUnsignedRange(LHS);
ConstantRange RHSRange = getUnsignedRange(RHS);
if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
return true;
if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
return false;
break;
}
case ICmpInst::ICMP_UGE:
Pred = ICmpInst::ICMP_ULE;
std::swap(LHS, RHS);
case ICmpInst::ICMP_ULE: {
ConstantRange LHSRange = getUnsignedRange(LHS);
ConstantRange RHSRange = getUnsignedRange(RHS);
if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
return true;
if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
return false;
break;
}
case ICmpInst::ICMP_NE: {
if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
return true;
if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
return true;
const SCEV *Diff = getMinusSCEV(LHS, RHS);
if (isKnownNonZero(Diff))
return true;
break;
}
case ICmpInst::ICMP_EQ:
// The check at the top of the function catches the case where
// the values are known to be equal.
break;
}
return false;
}
/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
/// protected by a conditional between LHS and RHS. This is used to
/// to eliminate casts.
bool
ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// Interpret a null as meaning no loop, where there is obviously no guard
// (interprocedural conditions notwithstanding).
if (!L) return true;
BasicBlock *Latch = L->getLoopLatch();
if (!Latch)
return false;
BranchInst *LoopContinuePredicate =
dyn_cast<BranchInst>(Latch->getTerminator());
if (!LoopContinuePredicate ||
LoopContinuePredicate->isUnconditional())
return false;
return isImpliedCond(Pred, LHS, RHS,
LoopContinuePredicate->getCondition(),
LoopContinuePredicate->getSuccessor(0) != L->getHeader());
}
/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
/// by a conditional between LHS and RHS. This is used to help avoid max
/// expressions in loop trip counts, and to eliminate casts.
bool
ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// Interpret a null as meaning no loop, where there is obviously no guard
// (interprocedural conditions notwithstanding).
if (!L) return false;
// Starting at the loop predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
// leading to the original header.
for (std::pair<BasicBlock *, BasicBlock *>
Pair(L->getLoopPredecessor(), L->getHeader());
Pair.first;
Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
BranchInst *LoopEntryPredicate =
dyn_cast<BranchInst>(Pair.first->getTerminator());
if (!LoopEntryPredicate ||
LoopEntryPredicate->isUnconditional())
continue;
if (isImpliedCond(Pred, LHS, RHS,
LoopEntryPredicate->getCondition(),
LoopEntryPredicate->getSuccessor(0) != Pair.second))
return true;
}
return false;
}
/// RAII wrapper to prevent recursive application of isImpliedCond.
/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
/// currently evaluating isImpliedCond.
struct MarkPendingLoopPredicate {
Value *Cond;
DenseSet<Value*> &LoopPreds;
bool Pending;
MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
: Cond(C), LoopPreds(LP) {
Pending = !LoopPreds.insert(Cond).second;
}
~MarkPendingLoopPredicate() {
if (!Pending)
LoopPreds.erase(Cond);
}
};
/// isImpliedCond - Test whether the condition described by Pred, LHS,
/// and RHS is true whenever the given Cond value evaluates to true.
bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
Value *FoundCondValue,
bool Inverse) {
MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
if (Mark.Pending)
return false;
// Recursively handle And and Or conditions.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
if (BO->getOpcode() == Instruction::And) {
if (!Inverse)
return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
} else if (BO->getOpcode() == Instruction::Or) {
if (Inverse)
return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
}
}
ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
if (!ICI) return false;
// Bail if the ICmp's operands' types are wider than the needed type
// before attempting to call getSCEV on them. This avoids infinite
// recursion, since the analysis of widening casts can require loop
// exit condition information for overflow checking, which would
// lead back here.
if (getTypeSizeInBits(LHS->getType()) <
getTypeSizeInBits(ICI->getOperand(0)->getType()))
return false;
// Now that we found a conditional branch that dominates the loop or controls
// the loop latch. Check to see if it is the comparison we are looking for.
ICmpInst::Predicate FoundPred;
if (Inverse)
FoundPred = ICI->getInversePredicate();
else
FoundPred = ICI->getPredicate();
const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
// Balance the types. The case where FoundLHS' type is wider than
// LHS' type is checked for above.
if (getTypeSizeInBits(LHS->getType()) >
getTypeSizeInBits(FoundLHS->getType())) {
- if (CmpInst::isSigned(Pred)) {
+ if (CmpInst::isSigned(FoundPred)) {
FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
} else {
FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
}
}
// Canonicalize the query to match the way instcombine will have
// canonicalized the comparison.
if (SimplifyICmpOperands(Pred, LHS, RHS))
if (LHS == RHS)
return CmpInst::isTrueWhenEqual(Pred);
if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
if (FoundLHS == FoundRHS)
return CmpInst::isFalseWhenEqual(FoundPred);
// Check to see if we can make the LHS or RHS match.
if (LHS == FoundRHS || RHS == FoundLHS) {
if (isa<SCEVConstant>(RHS)) {
std::swap(FoundLHS, FoundRHS);
FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
} else {
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
}
}
// Check whether the found predicate is the same as the desired predicate.
if (FoundPred == Pred)
return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
// Check whether swapping the found predicate makes it the same as the
// desired predicate.
if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
if (isa<SCEVConstant>(RHS))
return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
else
return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
RHS, LHS, FoundLHS, FoundRHS);
}
// Check whether the actual condition is beyond sufficient.
if (FoundPred == ICmpInst::ICMP_EQ)
if (ICmpInst::isTrueWhenEqual(Pred))
if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
return true;
if (Pred == ICmpInst::ICMP_NE)
if (!ICmpInst::isTrueWhenEqual(FoundPred))
if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
return true;
// Otherwise assume the worst.
return false;
}
/// isImpliedCondOperands - Test whether the condition described by Pred,
/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
/// and FoundRHS is true.
bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS) {
return isImpliedCondOperandsHelper(Pred, LHS, RHS,
FoundLHS, FoundRHS) ||
// ~x < ~y --> x > y
isImpliedCondOperandsHelper(Pred, LHS, RHS,
getNotSCEV(FoundRHS),
getNotSCEV(FoundLHS));
}
/// isImpliedCondOperandsHelper - Test whether the condition described by
/// Pred, LHS, and RHS is true whenever the condition described by Pred,
/// FoundLHS, and FoundRHS is true.
bool
ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS) {
switch (Pred) {
default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_NE:
if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
return true;
break;
}
return false;
}
// Verify if an linear IV with positive stride can overflow when in a
// less-than comparison, knowing the invariant term of the comparison, the
// stride and the knowledge of NSW/NUW flags on the recurrence.
bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
bool IsSigned, bool NoWrap) {
if (NoWrap) return false;
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
const SCEV *One = getConstant(Stride->getType(), 1);
if (IsSigned) {
APInt MaxRHS = getSignedRange(RHS).getSignedMax();
APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
.getSignedMax();
// SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
}
APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
APInt MaxValue = APInt::getMaxValue(BitWidth);
APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
.getUnsignedMax();
// UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
}
// Verify if an linear IV with negative stride can overflow when in a
// greater-than comparison, knowing the invariant term of the comparison,
// the stride and the knowledge of NSW/NUW flags on the recurrence.
bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
bool IsSigned, bool NoWrap) {
if (NoWrap) return false;
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
const SCEV *One = getConstant(Stride->getType(), 1);
if (IsSigned) {
APInt MinRHS = getSignedRange(RHS).getSignedMin();
APInt MinValue = APInt::getSignedMinValue(BitWidth);
APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
.getSignedMax();
// SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
}
APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
APInt MinValue = APInt::getMinValue(BitWidth);
APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
.getUnsignedMax();
// UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
}
// Compute the backedge taken count knowing the interval difference, the
// stride and presence of the equality in the comparison.
const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
bool Equality) {
const SCEV *One = getConstant(Step->getType(), 1);
Delta = Equality ? getAddExpr(Delta, Step)
: getAddExpr(Delta, getMinusSCEV(Step, One));
return getUDivExpr(Delta, Step);
}
/// HowManyLessThans - Return the number of times a backedge containing the
/// specified less-than comparison will execute. If not computable, return
/// CouldNotCompute.
///
/// @param IsSubExpr is true when the LHS < RHS condition does not directly
/// control the branch. In this case, we can only compute an iteration count for
/// a subexpression that cannot overflow before evaluating true.
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
bool IsSubExpr) {
// We handle only IV < Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
// Avoid weird loops
if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
bool NoWrap = !IsSubExpr &&
IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
const SCEV *Stride = IV->getStepRecurrence(*this);
// Avoid negative or zero stride values
if (!isKnownPositive(Stride))
return getCouldNotCompute();
// Avoid proven overflow cases: this will ensure that the backedge taken count
// will not generate any unsigned overflow. Relaxed no-overflow conditions
// exploit NoWrapFlags, allowing to optimize in presence of undefined
// behaviors like the case of C language.
if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
return getCouldNotCompute();
ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT;
const SCEV *Start = IV->getStart();
const SCEV *End = RHS;
if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
End = IsSigned ? getSMaxExpr(RHS, Start)
: getUMaxExpr(RHS, Start);
const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
: getUnsignedRange(Start).getUnsignedMin();
APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
: getUnsignedRange(Stride).getUnsignedMin();
unsigned BitWidth = getTypeSizeInBits(LHS->getType());
APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
: APInt::getMaxValue(BitWidth) - (MinStride - 1);
// Although End can be a MAX expression we estimate MaxEnd considering only
// the case End = RHS. This is safe because in the other case (End - Start)
// is zero, leading to a zero maximum backedge taken count.
APInt MaxEnd =
IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
: APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
const SCEV *MaxBECount = getCouldNotCompute();
if (isa<SCEVConstant>(BECount))
MaxBECount = BECount;
else
MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
getConstant(MinStride), false);
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
return ExitLimit(BECount, MaxBECount);
}
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
bool IsSubExpr) {
// We handle only IV > Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
// Avoid weird loops
if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
bool NoWrap = !IsSubExpr &&
IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
// Avoid negative or zero stride values
if (!isKnownPositive(Stride))
return getCouldNotCompute();
// Avoid proven overflow cases: this will ensure that the backedge taken count
// will not generate any unsigned overflow. Relaxed no-overflow conditions
// exploit NoWrapFlags, allowing to optimize in presence of undefined
// behaviors like the case of C language.
if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
return getCouldNotCompute();
ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT;
const SCEV *Start = IV->getStart();
const SCEV *End = RHS;
if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
End = IsSigned ? getSMinExpr(RHS, Start)
: getUMinExpr(RHS, Start);
const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
: getUnsignedRange(Start).getUnsignedMax();
APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
: getUnsignedRange(Stride).getUnsignedMin();
unsigned BitWidth = getTypeSizeInBits(LHS->getType());
APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
: APInt::getMinValue(BitWidth) + (MinStride - 1);
// Although End can be a MIN expression we estimate MinEnd considering only
// the case End = RHS. This is safe because in the other case (Start - End)
// is zero, leading to a zero maximum backedge taken count.
APInt MinEnd =
IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
: APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
const SCEV *MaxBECount = getCouldNotCompute();
if (isa<SCEVConstant>(BECount))
MaxBECount = BECount;
else
MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
getConstant(MinStride), false);
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
return ExitLimit(BECount, MaxBECount);
}
/// getNumIterationsInRange - Return the number of iterations of this loop that
/// produce values in the specified constant range. Another way of looking at
/// this is that it returns the first iteration number where the value is not in
/// the condition, thus computing the exit count. If the iteration count can't
/// be computed, an instance of SCEVCouldNotCompute is returned.
const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
ScalarEvolution &SE) const {
if (Range.isFullSet()) // Infinite loop.
return SE.getCouldNotCompute();
// If the start is a non-zero constant, shift the range to simplify things.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
if (!SC->getValue()->isZero()) {
SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
Operands[0] = SE.getConstant(SC->getType(), 0);
const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
getNoWrapFlags(FlagNW));
if (const SCEVAddRecExpr *ShiftedAddRec =
dyn_cast<SCEVAddRecExpr>(Shifted))
return ShiftedAddRec->getNumIterationsInRange(
Range.subtract(SC->getValue()->getValue()), SE);
// This is strange and shouldn't happen.
return SE.getCouldNotCompute();
}
// The only time we can solve this is when we have all constant indices.
// Otherwise, we cannot determine the overflow conditions.
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
if (!isa<SCEVConstant>(getOperand(i)))
return SE.getCouldNotCompute();
// Okay at this point we know that all elements of the chrec are constants and
// that the start element is zero.
// First check to see if the range contains zero. If not, the first
// iteration exits.
unsigned BitWidth = SE.getTypeSizeInBits(getType());
if (!Range.contains(APInt(BitWidth, 0)))
return SE.getConstant(getType(), 0);
if (isAffine()) {
// If this is an affine expression then we have this situation:
// Solve {0,+,A} in Range === Ax in Range
// We know that zero is in the range. If A is positive then we know that
// the upper value of the range must be the first possible exit value.
// If A is negative then the lower of the range is the last possible loop
// value. Also note that we already checked for a full range.
APInt One(BitWidth,1);
APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
// The exit value should be (End+A)/A.
APInt ExitVal = (End + A).udiv(A);
ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
// Evaluate at the exit value. If we really did fall out of the valid
// range, then we computed our trip count, otherwise wrap around or other
// things must have happened.
ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
if (Range.contains(Val->getValue()))
return SE.getCouldNotCompute(); // Something strange happened
// Ensure that the previous value is in the range. This is a sanity check.
assert(Range.contains(
EvaluateConstantChrecAtConstant(this,
ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
"Linear scev computation is off in a bad way!");
return SE.getConstant(ExitValue);
} else if (isQuadratic()) {
// If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
// quadratic equation to solve it. To do this, we must frame our problem in
// terms of figuring out when zero is crossed, instead of when
// Range.getUpper() is crossed.
SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
// getNoWrapFlags(FlagNW)
FlagAnyWrap);
// Next, solve the constructed addrec
std::pair<const SCEV *,const SCEV *> Roots =
SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
if (R1) {
// Pick the smallest positive root value.
if (ConstantInt *CB =
dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
R1->getValue(), R2->getValue()))) {
if (CB->getZExtValue() == false)
std::swap(R1, R2); // R1 is the minimum root now.
// Make sure the root is not off by one. The returned iteration should
// not be in the range, but the previous one should be. When solving
// for "X*X < 5", for example, we should not return a root of 2.
ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
R1->getValue(),
SE);
if (Range.contains(R1Val->getValue())) {
// The next iteration must be out of the range...
ConstantInt *NextVal =
ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
if (!Range.contains(R1Val->getValue()))
return SE.getConstant(NextVal);
return SE.getCouldNotCompute(); // Something strange happened
}
// If R1 was not in the range, then it is a good return value. Make
// sure that R1-1 WAS in the range though, just in case.
ConstantInt *NextVal =
ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
if (Range.contains(R1Val->getValue()))
return R1;
return SE.getCouldNotCompute(); // Something strange happened
}
}
}
return SE.getCouldNotCompute();
}
static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
APInt A = C1->getValue()->getValue().abs();
APInt B = C2->getValue()->getValue().abs();
uint32_t ABW = A.getBitWidth();
uint32_t BBW = B.getBitWidth();
if (ABW > BBW)
B = B.zext(ABW);
else if (ABW < BBW)
A = A.zext(BBW);
return APIntOps::GreatestCommonDivisor(A, B);
}
static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
APInt A = C1->getValue()->getValue();
APInt B = C2->getValue()->getValue();
uint32_t ABW = A.getBitWidth();
uint32_t BBW = B.getBitWidth();
if (ABW > BBW)
B = B.sext(ABW);
else if (ABW < BBW)
A = A.sext(BBW);
return APIntOps::srem(A, B);
}
static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
APInt A = C1->getValue()->getValue();
APInt B = C2->getValue()->getValue();
uint32_t ABW = A.getBitWidth();
uint32_t BBW = B.getBitWidth();
if (ABW > BBW)
B = B.sext(ABW);
else if (ABW < BBW)
A = A.sext(BBW);
return APIntOps::sdiv(A, B);
}
namespace {
struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> {
public:
// Pattern match Step into Start. When Step is a multiply expression, find
// the largest subexpression of Step that appears in Start. When Start is an
// add expression, try to match Step in the subexpressions of Start, non
// matching subexpressions are returned under Remainder.
static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start,
const SCEV *Step, const SCEV **Remainder) {
assert(Remainder && "Remainder should not be NULL");
SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0));
const SCEV *Res = R.visit(Start);
*Remainder = R.Remainder;
return Res;
}
SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R)
: SE(S), GCD(G), Remainder(R) {
Zero = SE.getConstant(GCD->getType(), 0);
One = SE.getConstant(GCD->getType(), 1);
}
const SCEV *visitConstant(const SCEVConstant *Constant) {
if (GCD == Constant || Constant == Zero)
return GCD;
if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) {
const SCEV *Res = SE.getConstant(gcd(Constant, CGCD));
if (Res != One)
return Res;
Remainder = SE.getConstant(srem(Constant, CGCD));
Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder));
Res = SE.getConstant(gcd(Constant, CGCD));
return Res;
}
// When GCD is not a constant, it could be that the GCD is an Add, Mul,
// AddRec, etc., in which case we want to find out how many times the
// Constant divides the GCD: we then return that as the new GCD.
const SCEV *Rem = Zero;
const SCEV *Res = findGCD(SE, GCD, Constant, &Rem);
if (Res == One || Rem != Zero) {
Remainder = Constant;
return One;
}
assert(isa<SCEVConstant>(Res) && "Res should be a constant");
Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res)));
return Res;
}
const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
if (GCD != Expr)
Remainder = Expr;
return GCD;
}
const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
if (GCD != Expr)
Remainder = Expr;
return GCD;
}
const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
if (GCD != Expr)
Remainder = Expr;
return GCD;
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
if (GCD == Expr)
return GCD;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
const SCEV *Rem = Zero;
const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem);
// FIXME: There may be ambiguous situations: for instance,
// GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m).
// The order in which the AddExpr is traversed computes a different GCD
// and Remainder.
if (Res != One)
GCD = Res;
if (Rem != Zero)
Remainder = SE.getAddExpr(Remainder, Rem);
}
return GCD;
}
const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
if (GCD == Expr)
return GCD;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
if (Expr->getOperand(i) == GCD)
return GCD;
}
// If we have not returned yet, it means that GCD is not part of Expr.
const SCEV *PartialGCD = One;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
const SCEV *Rem = Zero;
const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem);
if (Rem != Zero)
// GCD does not divide Expr->getOperand(i).
continue;
if (Res == GCD)
return GCD;
PartialGCD = SE.getMulExpr(PartialGCD, Res);
if (PartialGCD == GCD)
return GCD;
}
if (PartialGCD != One)
return PartialGCD;
Remainder = Expr;
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD);
if (!Mul)
return PartialGCD;
// When the GCD is a multiply expression, try to decompose it:
// this occurs when Step does not divide the Start expression
// as in: {(-4 + (3 * %m)),+,(2 * %m)}
for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) {
const SCEV *Rem = Zero;
const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem);
if (Rem == Zero) {
Remainder = Rem;
return Res;
}
}
return PartialGCD;
}
const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
if (GCD != Expr)
Remainder = Expr;
return GCD;
}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
if (GCD == Expr)
return GCD;
if (!Expr->isAffine()) {
Remainder = Expr;
return GCD;
}
const SCEV *Rem = Zero;
const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem);
if (Rem != Zero)
Remainder = SE.getAddExpr(Remainder, Rem);
Rem = Zero;
Res = findGCD(SE, Expr->getOperand(1), Res, &Rem);
if (Rem != Zero) {
Remainder = Expr;
return GCD;
}
return Res;
}
const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
if (GCD != Expr)
Remainder = Expr;
return GCD;
}
const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
if (GCD != Expr)
Remainder = Expr;
return GCD;
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
if (GCD != Expr)
Remainder = Expr;
return GCD;
}
const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
return One;
}
private:
ScalarEvolution &SE;
const SCEV *GCD, *Remainder, *Zero, *One;
};
struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> {
public:
// Remove from Start all multiples of Step.
static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start,
const SCEV *Step) {
SCEVDivision D(SE, Step);
const SCEV *Rem = D.Zero;
(void)Rem;
// The division is guaranteed to succeed: Step should divide Start with no
// remainder.
assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero &&
"Step should divide Start with no remainder.");
return D.visit(Start);
}
SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) {
Zero = SE.getConstant(GCD->getType(), 0);
One = SE.getConstant(GCD->getType(), 1);
}
const SCEV *visitConstant(const SCEVConstant *Constant) {
if (GCD == Constant)
return One;
if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD))
return SE.getConstant(sdiv(Constant, CGCD));
return Constant;
}
const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
if (GCD == Expr)
return One;
return Expr;
}
const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
if (GCD == Expr)
return One;
return Expr;
}
const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
if (GCD == Expr)
return One;
return Expr;
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
if (GCD == Expr)
return One;
SmallVector<const SCEV *, 2> Operands;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
if (Operands.size() == 1)
return Operands[0];
return SE.getAddExpr(Operands);
}
const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
if (GCD == Expr)
return One;
bool FoundGCDTerm = false;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
if (Expr->getOperand(i) == GCD)
FoundGCDTerm = true;
SmallVector<const SCEV *, 2> Operands;
if (FoundGCDTerm) {
FoundGCDTerm = false;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
if (FoundGCDTerm)
Operands.push_back(Expr->getOperand(i));
else if (Expr->getOperand(i) == GCD)
FoundGCDTerm = true;
else
Operands.push_back(Expr->getOperand(i));
}
} else {
FoundGCDTerm = false;
const SCEV *PartialGCD = One;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
if (PartialGCD == GCD) {
Operands.push_back(Expr->getOperand(i));
continue;
}
const SCEV *Rem = Zero;
const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem);
if (Rem == Zero) {
PartialGCD = SE.getMulExpr(PartialGCD, Res);
Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
} else {
Operands.push_back(Expr->getOperand(i));
}
}
}
if (Operands.size() == 1)
return Operands[0];
return SE.getMulExpr(Operands);
}
const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
if (GCD == Expr)
return One;
return Expr;
}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
if (GCD == Expr)
return One;
assert(Expr->isAffine() && "Expr should be affine");
const SCEV *Start = divide(SE, Expr->getStart(), GCD);
const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD);
return SE.getAddRecExpr(Start, Step, Expr->getLoop(),
Expr->getNoWrapFlags());
}
const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
if (GCD == Expr)
return One;
return Expr;
}
const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
if (GCD == Expr)
return One;
return Expr;
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
if (GCD == Expr)
return One;
return Expr;
}
const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
return Expr;
}
private:
ScalarEvolution &SE;
const SCEV *GCD, *Zero, *One;
};
}
/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
/// sizes of an array access. Returns the remainder of the delinearization that
/// is the offset start of the array. The SCEV->delinearize algorithm computes
/// the multiples of SCEV coefficients: that is a pattern matching of sub
/// expressions in the stride and base of a SCEV corresponding to the
/// computation of a GCD (greatest common divisor) of base and stride. When
/// SCEV->delinearize fails, it returns the SCEV unchanged.
///
/// For example: when analyzing the memory access A[i][j][k] in this loop nest
///
/// void foo(long n, long m, long o, double A[n][m][o]) {
///
/// for (long i = 0; i < n; i++)
/// for (long j = 0; j < m; j++)
/// for (long k = 0; k < o; k++)
/// A[i][j][k] = 1.0;
/// }
///
/// the delinearization input is the following AddRec SCEV:
///
/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
///
/// From this SCEV, we are able to say that the base offset of the access is %A
/// because it appears as an offset that does not divide any of the strides in
/// the loops:
///
/// CHECK: Base offset: %A
///
/// and then SCEV->delinearize determines the size of some of the dimensions of
/// the array as these are the multiples by which the strides are happening:
///
/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
///
/// Note that the outermost dimension remains of UnknownSize because there are
/// no strides that would help identifying the size of the last dimension: when
/// the array has been statically allocated, one could compute the size of that
/// dimension by dividing the overall size of the array by the size of the known
/// dimensions: %m * %o * 8.
///
/// Finally delinearize provides the access functions for the array reference
/// that does correspond to A[i][j][k] of the above C testcase:
///
/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
///
/// The testcases are checking the output of a function pass:
/// DelinearizationPass that walks through all loads and stores of a function
/// asking for the SCEV of the memory access with respect to all enclosing
/// loops, calling SCEV->delinearize on that and printing the results.
const SCEV *
SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
SmallVectorImpl<const SCEV *> &Subscripts,
SmallVectorImpl<const SCEV *> &Sizes) const {
// Early exit in case this SCEV is not an affine multivariate function.
if (!this->isAffine())
return this;
const SCEV *Start = this->getStart();
const SCEV *Step = this->getStepRecurrence(SE);
// Build the SCEV representation of the cannonical induction variable in the
// loop of this SCEV.
const SCEV *Zero = SE.getConstant(this->getType(), 0);
const SCEV *One = SE.getConstant(this->getType(), 1);
const SCEV *IV =
SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags());
DEBUG(dbgs() << "(delinearize: " << *this << "\n");
// Currently we fail to delinearize when the stride of this SCEV is 1. We
// could decide to not fail in this case: we could just return 1 for the size
// of the subscript, and this same SCEV for the access function.
if (Step == One) {
DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
return this;
}
// Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
const SCEV *Remainder = NULL;
const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
DEBUG(dbgs() << "GCD: " << *GCD << "\n");
DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
// Same remark as above: we currently fail the delinearization, although we
// can very well handle this special case.
if (GCD == One) {
DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
return this;
}
// As findGCD computed Remainder, GCD divides "Start - Remainder." The
// Quotient is then this SCEV without Remainder, scaled down by the GCD. The
// Quotient is what will be used in the next subscript delinearization.
const SCEV *Quotient =
SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
const SCEV *Rem;
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient))
// Recursively call delinearize on the Quotient until there are no more
// multiples that can be recognized.
Rem = AR->delinearize(SE, Subscripts, Sizes);
else
Rem = Quotient;
// Scale up the cannonical induction variable IV by whatever remains from the
// Step after division by the GCD: the GCD is the size of all the sub-array.
if (Step != GCD) {
Step = SCEVDivision::divide(SE, Step, GCD);
IV = SE.getMulExpr(IV, Step);
}
// The access function in the current subscript is computed as the cannonical
// induction variable IV (potentially scaled up by the step) and offset by
// Rem, the offset of delinearization in the sub-array.
const SCEV *Index = SE.getAddExpr(IV, Rem);
// Record the access function and the size of the current subscript.
Subscripts.push_back(Index);
Sizes.push_back(GCD);
#ifndef NDEBUG
int Size = Sizes.size();
DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n");
DEBUG(dbgs() << "ArrayDecl[UnknownSize]");
for (int i = 0; i < Size - 1; i++)
DEBUG(dbgs() << "[" << *Sizes[i] << "]");
DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n");
DEBUG(dbgs() << "ArrayRef");
for (int i = 0; i < Size; i++)
DEBUG(dbgs() << "[" << *Subscripts[i] << "]");
DEBUG(dbgs() << "\n)\n");
#endif
return Remainder;
}
//===----------------------------------------------------------------------===//
// SCEVCallbackVH Class Implementation
//===----------------------------------------------------------------------===//
void ScalarEvolution::SCEVCallbackVH::deleted() {
assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
SE->ConstantEvolutionLoopExitValue.erase(PN);
SE->ValueExprMap.erase(getValPtr());
// this now dangles!
}
void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
// Forget all the expressions associated with users of the old value,
// so that future queries will recompute the expressions using the new
// value.
Value *Old = getValPtr();
SmallVector<User *, 16> Worklist;
SmallPtrSet<User *, 8> Visited;
for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
UI != UE; ++UI)
Worklist.push_back(*UI);
while (!Worklist.empty()) {
User *U = Worklist.pop_back_val();
// Deleting the Old value will cause this to dangle. Postpone
// that until everything else is done.
if (U == Old)
continue;
if (!Visited.insert(U))
continue;
if (PHINode *PN = dyn_cast<PHINode>(U))
SE->ConstantEvolutionLoopExitValue.erase(PN);
SE->ValueExprMap.erase(U);
for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
UI != UE; ++UI)
Worklist.push_back(*UI);
}
// Delete the Old value.
if (PHINode *PN = dyn_cast<PHINode>(Old))
SE->ConstantEvolutionLoopExitValue.erase(PN);
SE->ValueExprMap.erase(Old);
// this now dangles!
}
ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
: CallbackVH(V), SE(se) {}
//===----------------------------------------------------------------------===//
// ScalarEvolution Class Implementation
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
: FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) {
initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
}
bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
LI = &getAnalysis<LoopInfo>();
TD = getAnalysisIfAvailable<DataLayout>();
TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTree>();
return false;
}
void ScalarEvolution::releaseMemory() {
// Iterate through all the SCEVUnknown instances and call their
// destructors, so that they release their references to their values.
for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
U->~SCEVUnknown();
FirstUnknown = 0;
ValueExprMap.clear();
// Free any extra memory created for ExitNotTakenInfo in the unlikely event
// that a loop had multiple computable exits.
for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
I != E; ++I) {
I->second.clear();
}
assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
BackedgeTakenCounts.clear();
ConstantEvolutionLoopExitValue.clear();
ValuesAtScopes.clear();
LoopDispositions.clear();
BlockDispositions.clear();
UnsignedRanges.clear();
SignedRanges.clear();
UniqueSCEVs.clear();
SCEVAllocator.Reset();
}
void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequiredTransitive<LoopInfo>();
AU.addRequiredTransitive<DominatorTree>();
AU.addRequired<TargetLibraryInfo>();
}
bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
}
static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
const Loop *L) {
// Print all inner loops first
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
PrintLoopInfo(OS, SE, *I);
OS << "Loop ";
WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
OS << ": ";
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getExitBlocks(ExitBlocks);
if (ExitBlocks.size() != 1)
OS << "<multiple exits> ";
if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
} else {
OS << "Unpredictable backedge-taken count. ";
}
OS << "\n"
"Loop ";
WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
OS << ": ";
if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
} else {
OS << "Unpredictable max backedge-taken count. ";
}
OS << "\n";
}
void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
// ScalarEvolution's implementation of the print method is to print
// out SCEV values of all instructions that are interesting. Doing
// this potentially causes it to create new SCEV objects though,
// which technically conflicts with the const qualifier. This isn't
// observable from outside the class though, so casting away the
// const isn't dangerous.
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
OS << "Classifying expressions for: ";
WriteAsOperand(OS, F, /*PrintType=*/false);
OS << "\n";
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
OS << *I << '\n';
OS << " --> ";
const SCEV *SV = SE.getSCEV(&*I);
SV->print(OS);
const Loop *L = LI->getLoopFor((*I).getParent());
const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
if (AtUse != SV) {
OS << " --> ";
AtUse->print(OS);
}
if (L) {
OS << "\t\t" "Exits: ";
const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
if (!SE.isLoopInvariant(ExitValue, L)) {
OS << "<<Unknown>>";
} else {
OS << *ExitValue;
}
}
OS << "\n";
}
OS << "Determining loop execution counts for: ";
WriteAsOperand(OS, F, /*PrintType=*/false);
OS << "\n";
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
PrintLoopInfo(OS, &SE, *I);
}
ScalarEvolution::LoopDisposition
ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S];
for (unsigned u = 0; u < Values.size(); u++) {
if (Values[u].first == L)
return Values[u].second;
}
Values.push_back(std::make_pair(L, LoopVariant));
LoopDisposition D = computeLoopDisposition(S, L);
SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S];
for (unsigned u = Values2.size(); u > 0; u--) {
if (Values2[u - 1].first == L) {
Values2[u - 1].second = D;
break;
}
}
return D;
}
ScalarEvolution::LoopDisposition
ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
switch (S->getSCEVType()) {
case scConstant:
return LoopInvariant;
case scTruncate:
case scZeroExtend:
case scSignExtend:
return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
case scAddRecExpr: {
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
// If L is the addrec's loop, it's computable.
if (AR->getLoop() == L)
return LoopComputable;
// Add recurrences are never invariant in the function-body (null loop).
if (!L)
return LoopVariant;
// This recurrence is variant w.r.t. L if L contains AR's loop.
if (L->contains(AR->getLoop()))
return LoopVariant;
// This recurrence is invariant w.r.t. L if AR's loop contains L.
if (AR->getLoop()->contains(L))
return LoopInvariant;
// This recurrence is variant w.r.t. L if any of its operands
// are variant.
for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
I != E; ++I)
if (!isLoopInvariant(*I, L))
return LoopVariant;
// Otherwise it's loop-invariant.
return LoopInvariant;
}
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
bool HasVarying = false;
for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
I != E; ++I) {
LoopDisposition D = getLoopDisposition(*I, L);
if (D == LoopVariant)
return LoopVariant;
if (D == LoopComputable)
HasVarying = true;
}
return HasVarying ? LoopComputable : LoopInvariant;
}
case scUDivExpr: {
const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
if (LD == LoopVariant)
return LoopVariant;
LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
if (RD == LoopVariant)
return LoopVariant;
return (LD == LoopInvariant && RD == LoopInvariant) ?
LoopInvariant : LoopComputable;
}
case scUnknown:
// All non-instruction values are loop invariant. All instructions are loop
// invariant if they are not contained in the specified loop.
// Instructions are never considered invariant in the function body
// (null loop) because they are defined within the "loop".
if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
return LoopInvariant;
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
default: llvm_unreachable("Unknown SCEV kind!");
}
}
bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
return getLoopDisposition(S, L) == LoopInvariant;
}
bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
return getLoopDisposition(S, L) == LoopComputable;
}
ScalarEvolution::BlockDisposition
ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S];
for (unsigned u = 0; u < Values.size(); u++) {
if (Values[u].first == BB)
return Values[u].second;
}
Values.push_back(std::make_pair(BB, DoesNotDominateBlock));
BlockDisposition D = computeBlockDisposition(S, BB);
SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S];
for (unsigned u = Values2.size(); u > 0; u--) {
if (Values2[u - 1].first == BB) {
Values2[u - 1].second = D;
break;
}
}
return D;
}
ScalarEvolution::BlockDisposition
ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
switch (S->getSCEVType()) {
case scConstant:
return ProperlyDominatesBlock;
case scTruncate:
case scZeroExtend:
case scSignExtend:
return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
case scAddRecExpr: {
// This uses a "dominates" query instead of "properly dominates" query
// to test for proper dominance too, because the instruction which
// produces the addrec's value is a PHI, and a PHI effectively properly
// dominates its entire containing block.
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
if (!DT->dominates(AR->getLoop()->getHeader(), BB))
return DoesNotDominateBlock;
}
// FALL THROUGH into SCEVNAryExpr handling.
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
bool Proper = true;
for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
I != E; ++I) {
BlockDisposition D = getBlockDisposition(*I, BB);
if (D == DoesNotDominateBlock)
return DoesNotDominateBlock;
if (D == DominatesBlock)
Proper = false;
}
return Proper ? ProperlyDominatesBlock : DominatesBlock;
}
case scUDivExpr: {
const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
BlockDisposition LD = getBlockDisposition(LHS, BB);
if (LD == DoesNotDominateBlock)
return DoesNotDominateBlock;
BlockDisposition RD = getBlockDisposition(RHS, BB);
if (RD == DoesNotDominateBlock)
return DoesNotDominateBlock;
return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
ProperlyDominatesBlock : DominatesBlock;
}
case scUnknown:
if (Instruction *I =
dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
if (I->getParent() == BB)
return DominatesBlock;
if (DT->properlyDominates(I->getParent(), BB))
return ProperlyDominatesBlock;
return DoesNotDominateBlock;
}
return ProperlyDominatesBlock;
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
default:
llvm_unreachable("Unknown SCEV kind!");
}
}
bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
return getBlockDisposition(S, BB) >= DominatesBlock;
}
bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
}
namespace {
// Search for a SCEV expression node within an expression tree.
// Implements SCEVTraversal::Visitor.
struct SCEVSearch {
const SCEV *Node;
bool IsFound;
SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
bool follow(const SCEV *S) {
IsFound |= (S == Node);
return !IsFound;
}
bool isDone() const { return IsFound; }
};
}
bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
SCEVSearch Search(Op);
visitAll(S, Search);
return Search.IsFound;
}
void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
ValuesAtScopes.erase(S);
LoopDispositions.erase(S);
BlockDispositions.erase(S);
UnsignedRanges.erase(S);
SignedRanges.erase(S);
for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
BackedgeTakenInfo &BEInfo = I->second;
if (BEInfo.hasOperand(S, this)) {
BEInfo.clear();
BackedgeTakenCounts.erase(I++);
}
else
++I;
}
}
typedef DenseMap<const Loop *, std::string> VerifyMap;
/// replaceSubString - Replaces all occurences of From in Str with To.
static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
size_t Pos = 0;
while ((Pos = Str.find(From, Pos)) != std::string::npos) {
Str.replace(Pos, From.size(), To.data(), To.size());
Pos += To.size();
}
}
/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
static void
getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) {
getLoopBackedgeTakenCounts(*I, Map, SE); // recurse.
std::string &S = Map[L];
if (S.empty()) {
raw_string_ostream OS(S);
SE.getBackedgeTakenCount(L)->print(OS);
// false and 0 are semantically equivalent. This can happen in dead loops.
replaceSubString(OS.str(), "false", "0");
// Remove wrap flags, their use in SCEV is highly fragile.
// FIXME: Remove this when SCEV gets smarter about them.
replaceSubString(OS.str(), "<nw>", "");
replaceSubString(OS.str(), "<nsw>", "");
replaceSubString(OS.str(), "<nuw>", "");
}
}
}
void ScalarEvolution::verifyAnalysis() const {
if (!VerifySCEV)
return;
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
// Gather stringified backedge taken counts for all loops using SCEV's caches.
// FIXME: It would be much better to store actual values instead of strings,
// but SCEV pointers will change if we drop the caches.
VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
// Gather stringified backedge taken counts for all loops without using
// SCEV's caches.
SE.releaseMemory();
for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE);
// Now compare whether they're the same with and without caches. This allows
// verifying that no pass changed the cache.
assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
"New loops suddenly appeared!");
for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
OldE = BackedgeDumpsOld.end(),
NewI = BackedgeDumpsNew.begin();
OldI != OldE; ++OldI, ++NewI) {
assert(OldI->first == NewI->first && "Loop order changed!");
// Compare the stringified SCEVs. We don't care if undef backedgetaken count
// changes.
// FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This
// means that a pass is buggy or SCEV has to learn a new pattern but is
// usually not harmful.
if (OldI->second != NewI->second &&
OldI->second.find("undef") == std::string::npos &&
NewI->second.find("undef") == std::string::npos &&
OldI->second != "***COULDNOTCOMPUTE***" &&
NewI->second != "***COULDNOTCOMPUTE***") {
dbgs() << "SCEVValidator: SCEV for loop '"
<< OldI->first->getHeader()->getName()
<< "' changed from '" << OldI->second
<< "' to '" << NewI->second << "'!\n";
std::abort();
}
}
// TODO: Verify more things.
}
Index: head/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===================================================================
--- head/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (revision 265924)
+++ head/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (revision 265925)
@@ -1,2263 +1,2264 @@
//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the AsmPrinter class.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
#include "llvm/CodeGen/AsmPrinter.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/DebugInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Timer.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
static const char *const DWARFGroupName = "DWARF Emission";
static const char *const DbgTimerName = "DWARF Debug Writer";
static const char *const EHTimerName = "DWARF Exception Writer";
STATISTIC(EmittedInsts, "Number of machine instrs printed");
char AsmPrinter::ID = 0;
typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type;
static gcp_map_type &getGCMap(void *&P) {
if (P == 0)
P = new gcp_map_type();
return *(gcp_map_type*)P;
}
/// getGVAlignmentLog2 - Return the alignment to use for the specified global
/// value in log2 form. This rounds up to the preferred alignment if possible
/// and legal.
static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
unsigned InBits = 0) {
unsigned NumBits = 0;
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
NumBits = TD.getPreferredAlignmentLog(GVar);
// If InBits is specified, round it to it.
if (InBits > NumBits)
NumBits = InBits;
// If the GV has a specified alignment, take it into account.
if (GV->getAlignment() == 0)
return NumBits;
unsigned GVAlign = Log2_32(GV->getAlignment());
// If the GVAlign is larger than NumBits, or if we are required to obey
// NumBits because the GV has an assigned section, obey it.
if (GVAlign > NumBits || GV->hasSection())
NumBits = GVAlign;
return NumBits;
}
AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
: MachineFunctionPass(ID),
TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()),
OutContext(Streamer.getContext()),
OutStreamer(Streamer),
LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
DD = 0; DE = 0; MMI = 0; LI = 0; MF = 0;
CurrentFnSym = CurrentFnSymForSize = 0;
GCMetadataPrinters = 0;
VerboseAsm = Streamer.isVerboseAsm();
}
AsmPrinter::~AsmPrinter() {
assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized");
if (GCMetadataPrinters != 0) {
gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I)
delete I->second;
delete &GCMap;
GCMetadataPrinters = 0;
}
delete &OutStreamer;
}
/// getFunctionNumber - Return a unique ID for the current function.
///
unsigned AsmPrinter::getFunctionNumber() const {
return MF->getFunctionNumber();
}
const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
return TM.getTargetLowering()->getObjFileLowering();
}
/// getDataLayout - Return information about data layout.
const DataLayout &AsmPrinter::getDataLayout() const {
return *TM.getDataLayout();
}
StringRef AsmPrinter::getTargetTriple() const {
return TM.getTargetTriple();
}
/// getCurrentSection() - Return the current section we are emitting to.
const MCSection *AsmPrinter::getCurrentSection() const {
return OutStreamer.getCurrentSection().first;
}
void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineModuleInfo>();
AU.addRequired<GCModuleInfo>();
if (isVerbose())
AU.addRequired<MachineLoopInfo>();
}
bool AsmPrinter::doInitialization(Module &M) {
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
MMI->AnalyzeModule(M);
// Initialize TargetLoweringObjectFile.
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
.Initialize(OutContext, TM);
OutStreamer.InitStreamer();
Mang = new Mangler(&TM);
// Allow the target to emit any magic that it wants at the start of the file.
EmitStartOfAsmFile(M);
// Very minimal debug info. It is ignored if we emit actual debug info. If we
// don't, this at least helps the user find where a global came from.
if (MAI->hasSingleParameterDotFile()) {
// .file "foo.c"
OutStreamer.EmitFileDirective(M.getModuleIdentifier());
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
MP->beginAssembly(*this);
// Emit module-level inline asm if it exists.
if (!M.getModuleInlineAsm().empty()) {
OutStreamer.AddComment("Start of file scope inline assembly");
OutStreamer.AddBlankLine();
EmitInlineAsm(M.getModuleInlineAsm()+"\n");
OutStreamer.AddComment("End of file scope inline assembly");
OutStreamer.AddBlankLine();
}
if (MAI->doesSupportDebugInformation())
DD = new DwarfDebug(this, &M);
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
return false;
case ExceptionHandling::SjLj:
case ExceptionHandling::DwarfCFI:
DE = new DwarfCFIException(this);
return false;
case ExceptionHandling::ARM:
DE = new ARMException(this);
return false;
case ExceptionHandling::Win64:
DE = new Win64Exception(this);
return false;
}
llvm_unreachable("Unknown exception type.");
}
void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
GlobalValue::LinkageTypes Linkage = GV->getLinkage();
switch (Linkage) {
case GlobalValue::CommonLinkage:
case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
- if (MAI->getWeakDefDirective() != 0) {
+ if (MAI->hasWeakDefDirective()) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
bool CanBeHidden = false;
- if (Linkage == GlobalValue::LinkOnceODRLinkage) {
+ if (Linkage == GlobalValue::LinkOnceODRLinkage &&
+ MAI->hasWeakDefCanBeHiddenDirective()) {
if (GV->hasUnnamedAddr()) {
CanBeHidden = true;
} else {
GlobalStatus GS;
if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared)
CanBeHidden = true;
}
}
if (!CanBeHidden)
// .weak_definition _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
else
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
- } else if (MAI->getLinkOnceDirective() != 0) {
+ } else if (MAI->hasLinkOnceDirective()) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
//NOTE: linkonce is handled by the section the symbol was assigned to.
} else {
// .weak _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
}
return;
case GlobalValue::DLLExportLinkage:
case GlobalValue::AppendingLinkage:
// FIXME: appending linkage variables should go into a section of
// their name or something. For now, just emit them as external.
case GlobalValue::ExternalLinkage:
// If external or appending, declare as a global symbol.
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
return;
case GlobalValue::PrivateLinkage:
case GlobalValue::InternalLinkage:
case GlobalValue::LinkerPrivateLinkage:
return;
case GlobalValue::AvailableExternallyLinkage:
llvm_unreachable("Should never emit this");
case GlobalValue::DLLImportLinkage:
case GlobalValue::ExternalWeakLinkage:
llvm_unreachable("Don't know how to emit these");
}
llvm_unreachable("Unknown linkage type!");
}
MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
return getObjFileLowering().getSymbol(*Mang, GV);
}
/// EmitGlobalVariable - Emit the specified global variable to the .s file.
void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GV->hasInitializer()) {
// Check to see if this is a special global used by LLVM, if so, emit it.
if (EmitSpecialLLVMGlobal(GV))
return;
if (isVerbose()) {
WriteAsOperand(OutStreamer.GetCommentOS(), GV,
/*PrintType=*/false, GV->getParent());
OutStreamer.GetCommentOS() << '\n';
}
}
MCSymbol *GVSym = getSymbol(GV);
EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
if (!GV->hasInitializer()) // External globals require no extra code.
return;
if (MAI->hasDotTypeDotSizeDirective())
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
const DataLayout *DL = TM.getDataLayout();
uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType());
// If the alignment is specified, we *must* obey it. Overaligning a global
// with a specified alignment is a prompt way to break globals emitted to
// sections and expected to be contiguous (e.g. ObjC metadata).
unsigned AlignLog = getGVAlignmentLog2(GV, *DL);
if (DD)
DD->setSymbolSize(GVSym, Size);
// Handle common and BSS local symbols (.lcomm).
if (GVKind.isCommon() || GVKind.isBSSLocal()) {
if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
unsigned Align = 1 << AlignLog;
// Handle common symbols.
if (GVKind.isCommon()) {
if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
Align = 0;
// .comm _foo, 42, 4
OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
return;
}
// Handle local BSS symbols.
if (MAI->hasMachoZeroFillDirective()) {
const MCSection *TheSection =
getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
// .zerofill __DATA, __bss, _foo, 400, 5
OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align);
return;
}
// Use .lcomm only if it supports user-specified alignment.
// Otherwise, while it would still be correct to use .lcomm in some
// cases (e.g. when Align == 1), the external assembler might enfore
// some -unknown- default alignment behavior, which could cause
// spurious differences between external and integrated assembler.
// Prefer to simply fall back to .local / .comm in this case.
if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
// .lcomm _foo, 42
OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
return;
}
if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
Align = 0;
// .local _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
// .comm _foo, 42, 4
OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
return;
}
const MCSection *TheSection =
getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
// Handle the zerofill directive on darwin, which is a special form of BSS
// emission.
if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined.
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
// .zerofill __DATA, __common, _foo, 400, 5
OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
return;
}
// Handle thread local data for mach-o which requires us to output an
// additional structure of data and mangle the original symbol so that we
// can reference it later.
//
// TODO: This should become an "emit thread local global" method on TLOF.
// All of this macho specific stuff should be sunk down into TLOFMachO and
// stuff like "TLSExtraDataSection" should no longer be part of the parent
// TLOF class. This will also make it more obvious that stuff like
// MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
// specific code.
if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
// Emit the .tbss symbol
MCSymbol *MangSym =
OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
if (GVKind.isThreadBSS()) {
TheSection = getObjFileLowering().getTLSBSSSection();
OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
} else if (GVKind.isThreadData()) {
OutStreamer.SwitchSection(TheSection);
EmitAlignment(AlignLog, GV);
OutStreamer.EmitLabel(MangSym);
EmitGlobalConstant(GV->getInitializer());
}
OutStreamer.AddBlankLine();
// Emit the variable struct for the runtime.
const MCSection *TLVSect
= getObjFileLowering().getTLSExtraDataSection();
OutStreamer.SwitchSection(TLVSect);
// Emit the linkage here.
EmitLinkage(GV, GVSym);
OutStreamer.EmitLabel(GVSym);
// Three pointers in size:
// - __tlv_bootstrap - used to make sure support exists
// - spare pointer, used when mapped by the runtime
// - pointer to mangled symbol above with initializer
unsigned PtrSize = DL->getPointerTypeSize(GV->getType());
OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
PtrSize);
OutStreamer.EmitIntValue(0, PtrSize);
OutStreamer.EmitSymbolValue(MangSym, PtrSize);
OutStreamer.AddBlankLine();
return;
}
OutStreamer.SwitchSection(TheSection);
EmitLinkage(GV, GVSym);
EmitAlignment(AlignLog, GV);
OutStreamer.EmitLabel(GVSym);
EmitGlobalConstant(GV->getInitializer());
if (MAI->hasDotTypeDotSizeDirective())
// .size foo, 42
OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
OutStreamer.AddBlankLine();
}
/// EmitFunctionHeader - This method emits the header for the current
/// function.
void AsmPrinter::EmitFunctionHeader() {
// Print out constants referenced by the function
EmitConstantPool();
// Print the 'header' of function.
const Function *F = MF->getFunction();
OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
EmitVisibility(CurrentFnSym, F->getVisibility());
EmitLinkage(F, CurrentFnSym);
EmitAlignment(MF->getAlignment(), F);
if (MAI->hasDotTypeDotSizeDirective())
OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
if (isVerbose()) {
WriteAsOperand(OutStreamer.GetCommentOS(), F,
/*PrintType=*/false, F->getParent());
OutStreamer.GetCommentOS() << '\n';
}
// Emit the CurrentFnSym. This is a virtual function to allow targets to
// do their wild and crazy things as required.
EmitFunctionEntryLabel();
// If the function had address-taken blocks that got deleted, then we have
// references to the dangling symbols. Emit them at the start of the function
// so that we don't get references to undefined symbols.
std::vector<MCSymbol*> DeadBlockSyms;
MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
OutStreamer.AddComment("Address taken block that was later removed");
OutStreamer.EmitLabel(DeadBlockSyms[i]);
}
// Emit pre-function debug and/or EH information.
if (DE) {
NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
DE->BeginFunction(MF);
}
if (DD) {
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
DD->beginFunction(MF);
}
// Emit the prefix data.
if (F->hasPrefixData())
EmitGlobalConstant(F->getPrefixData());
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
/// function. This can be overridden by targets as required to do custom stuff.
void AsmPrinter::EmitFunctionEntryLabel() {
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
if (CurrentFnSym->isUndefined())
return OutStreamer.EmitLabel(CurrentFnSym);
report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
"' label emitted multiple times to assembly file");
}
/// emitComments - Pretty-print comments for instructions.
static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetMachine &TM = MF->getTarget();
// Check for spills and reloads
int FI;
const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
// We assume a single instruction only has a spill or reload, not
// both.
const MachineMemOperand *MMO;
if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Reload\n";
}
} else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Reload\n";
} else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Spill\n";
}
} else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Spill\n";
}
// Check for spill-induced copies
if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
CommentOS << " Reload Reuse\n";
}
/// emitImplicitDef - This method emits the specified machine instruction
/// that is an implicit def.
void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
OutStreamer.AddComment(Twine("implicit-def: ") +
TM.getRegisterInfo()->getName(RegNo));
OutStreamer.AddBlankLine();
}
static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
std::string Str = "kill:";
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &Op = MI->getOperand(i);
assert(Op.isReg() && "KILL instruction must have only register operands");
Str += ' ';
Str += AP.TM.getRegisterInfo()->getName(Op.getReg());
Str += (Op.isDef() ? "<def>" : "<kill>");
}
AP.OutStreamer.AddComment(Str);
AP.OutStreamer.AddBlankLine();
}
/// emitDebugValueComment - This method handles the target-independent form
/// of DBG_VALUE, returning true if it was able to do so. A false return
/// means the target will need to handle MI in EmitInstruction.
static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
// This code handles only the 3-operand target-independent form.
if (MI->getNumOperands() != 3)
return false;
SmallString<128> Str;
raw_svector_ostream OS(Str);
OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
// cast away const; DIetc do not take const operands for some reason.
DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
if (V.getContext().isSubprogram()) {
StringRef Name = DISubprogram(V.getContext()).getDisplayName();
if (!Name.empty())
OS << Name << ":";
}
OS << V.getName() << " <- ";
// The second operand is only an offset if it's an immediate.
bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
// Register or immediate value. Register 0 means undef.
if (MI->getOperand(0).isFPImm()) {
APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
OS << (double)APF.convertToFloat();
} else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
OS << APF.convertToDouble();
} else {
// There is no good way to print long double. Convert a copy to
// double. Ah well, it's only a comment.
bool ignored;
APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
&ignored);
OS << "(long double) " << APF.convertToDouble();
}
} else if (MI->getOperand(0).isImm()) {
OS << MI->getOperand(0).getImm();
} else if (MI->getOperand(0).isCImm()) {
MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
} else {
unsigned Reg;
if (MI->getOperand(0).isReg()) {
Reg = MI->getOperand(0).getReg();
} else {
assert(MI->getOperand(0).isFI() && "Unknown operand type");
const TargetFrameLowering *TFI = AP.TM.getFrameLowering();
Offset += TFI->getFrameIndexReference(*AP.MF,
MI->getOperand(0).getIndex(), Reg);
Deref = true;
}
if (Reg == 0) {
// Suppress offset, it is not meaningful here.
OS << "undef";
// NOTE: Want this comment at start of line, don't emit with AddComment.
AP.OutStreamer.EmitRawText(OS.str());
return true;
}
if (Deref)
OS << '[';
OS << AP.TM.getRegisterInfo()->getName(Reg);
}
if (Deref)
OS << '+' << Offset << ']';
// NOTE: Want this comment at start of line, don't emit with AddComment.
AP.OutStreamer.EmitRawText(OS.str());
return true;
}
AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
MF->getFunction()->needsUnwindTableEntry())
return CFI_M_EH;
if (MMI->hasDebugInfo())
return CFI_M_Debug;
return CFI_M_None;
}
bool AsmPrinter::needsSEHMoves() {
return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 &&
MF->getFunction()->needsUnwindTableEntry();
}
bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
return MAI->doesDwarfUseRelocationsAcrossSections();
}
void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
const MCSymbol *Label = MI.getOperand(0).getMCSymbol();
if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
return;
if (needsCFIMoves() == CFI_M_None)
return;
if (MMI->getCompactUnwindEncoding() != 0)
OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding());
const MachineModuleInfo &MMI = MF->getMMI();
const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions();
bool FoundOne = false;
(void)FoundOne;
for (std::vector<MCCFIInstruction>::const_iterator I = Instrs.begin(),
E = Instrs.end(); I != E; ++I) {
if (I->getLabel() == Label) {
emitCFIInstruction(*I);
FoundOne = true;
}
}
assert(FoundOne);
}
/// EmitFunctionBody - This method emits the body and trailer for a
/// function.
void AsmPrinter::EmitFunctionBody() {
// Emit target-specific gunk before the function body.
EmitFunctionBodyStart();
bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo();
// Print out code for the function.
bool HasAnyRealCode = false;
const MachineInstr *LastMI = 0;
for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
I != E; ++I) {
// Print a label for the basic block.
EmitBasicBlockStart(I);
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
LastMI = II;
// Print the assembly for the instruction.
if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
!II->isDebugValue()) {
HasAnyRealCode = true;
++EmittedInsts;
}
if (ShouldPrintDebugScopes) {
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
DD->beginInstruction(II);
}
if (isVerbose())
emitComments(*II, OutStreamer.GetCommentOS());
switch (II->getOpcode()) {
case TargetOpcode::PROLOG_LABEL:
emitPrologLabel(*II);
break;
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
break;
case TargetOpcode::INLINEASM:
EmitInlineAsm(II);
break;
case TargetOpcode::DBG_VALUE:
if (isVerbose()) {
if (!emitDebugValueComment(II, *this))
EmitInstruction(II);
}
break;
case TargetOpcode::IMPLICIT_DEF:
if (isVerbose()) emitImplicitDef(II);
break;
case TargetOpcode::KILL:
if (isVerbose()) emitKill(II, *this);
break;
default:
if (!TM.hasMCUseLoc())
MCLineEntry::Make(&OutStreamer, getCurrentSection());
EmitInstruction(II);
break;
}
if (ShouldPrintDebugScopes) {
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
DD->endInstruction(II);
}
}
}
// If the last instruction was a prolog label, then we have a situation where
// we emitted a prolog but no function body. This results in the ending prolog
// label equaling the end of function label and an invalid "row" in the
// FDE. We need to emit a noop in this situation so that the FDE's rows are
// valid.
bool RequiresNoop = LastMI && LastMI->isPrologLabel();
// If the function is empty and the object file uses .subsections_via_symbols,
// then we need to emit *something* to the function body to prevent the
// labels from collapsing together. Just emit a noop.
if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
MCInst Noop;
TM.getInstrInfo()->getNoopForMachoTarget(Noop);
if (Noop.getOpcode()) {
OutStreamer.AddComment("avoids zero-length function");
OutStreamer.EmitInstruction(Noop);
} else // Target not mc-ized yet.
OutStreamer.EmitRawText(StringRef("\tnop\n"));
}
const Function *F = MF->getFunction();
for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) {
const BasicBlock *BB = i;
if (!BB->hasAddressTaken())
continue;
MCSymbol *Sym = GetBlockAddressSymbol(BB);
if (Sym->isDefined())
continue;
OutStreamer.AddComment("Address of block that was removed by CodeGen");
OutStreamer.EmitLabel(Sym);
}
// Emit target-specific gunk after the function body.
EmitFunctionBodyEnd();
// If the target wants a .size directive for the size of the function, emit
// it.
if (MAI->hasDotTypeDotSizeDirective()) {
// Create a symbol for the end of function, so we can get the size as
// difference between the function label and the temp label.
MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
OutStreamer.EmitLabel(FnEndLabel);
const MCExpr *SizeExp =
MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
MCSymbolRefExpr::Create(CurrentFnSymForSize,
OutContext),
OutContext);
OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
}
// Emit post-function debug information.
if (DD) {
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
DD->endFunction(MF);
}
if (DE) {
NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
DE->EndFunction();
}
MMI->EndFunction();
// Print out jump tables referenced by the function.
EmitJumpTableInfo();
OutStreamer.AddBlankLine();
}
/// EmitDwarfRegOp - Emit dwarf register operation.
void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc,
bool Indirect) const {
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0;
++SR) {
Reg = TRI->getDwarfRegNum(*SR, false);
// FIXME: Get the bit range this register uses of the superregister
// so that we can produce a DW_OP_bit_piece
}
// FIXME: Handle cases like a super register being encoded as
// DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33
// FIXME: We have no reasonable way of handling errors in here. The
// caller might be in the middle of an dwarf expression. We should
// probably assert that Reg >= 0 once debug info generation is more mature.
if (MLoc.isIndirect() || Indirect) {
if (Reg < 32) {
OutStreamer.AddComment(
dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
EmitInt8(dwarf::DW_OP_breg0 + Reg);
} else {
OutStreamer.AddComment("DW_OP_bregx");
EmitInt8(dwarf::DW_OP_bregx);
OutStreamer.AddComment(Twine(Reg));
EmitULEB128(Reg);
}
EmitSLEB128(!MLoc.isIndirect() ? 0 : MLoc.getOffset());
if (MLoc.isIndirect() && Indirect)
EmitInt8(dwarf::DW_OP_deref);
} else {
if (Reg < 32) {
OutStreamer.AddComment(
dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
EmitInt8(dwarf::DW_OP_reg0 + Reg);
} else {
OutStreamer.AddComment("DW_OP_regx");
EmitInt8(dwarf::DW_OP_regx);
OutStreamer.AddComment(Twine(Reg));
EmitULEB128(Reg);
}
}
// FIXME: Produce a DW_OP_bit_piece if we used a superregister
}
bool AsmPrinter::doFinalization(Module &M) {
// Emit global variables.
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I)
EmitGlobalVariable(I);
// Emit visibility info for declarations
for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
const Function &F = *I;
if (!F.isDeclaration())
continue;
GlobalValue::VisibilityTypes V = F.getVisibility();
if (V == GlobalValue::DefaultVisibility)
continue;
MCSymbol *Name = getSymbol(&F);
EmitVisibility(Name, V, false);
}
// Emit module flags.
SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
M.getModuleFlagsMetadata(ModuleFlags);
if (!ModuleFlags.empty())
getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM);
// Make sure we wrote out everything we need.
OutStreamer.Flush();
// Finalize debug and EH information.
if (DE) {
{
NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
DE->EndModule();
}
delete DE; DE = 0;
}
if (DD) {
{
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
DD->endModule();
}
delete DD; DD = 0;
}
// If the target wants to know about weak references, print them all.
if (MAI->getWeakRefDirective()) {
// FIXME: This is not lazy, it would be nice to only print weak references
// to stuff that is actually used. Note that doing so would require targets
// to notice uses in operands (due to constant exprs etc). This should
// happen with the MC stuff eventually.
// Print out module-level global variables here.
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
if (!I->hasExternalWeakLinkage()) continue;
OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference);
}
for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->hasExternalWeakLinkage()) continue;
OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference);
}
}
if (MAI->hasSetDirective()) {
OutStreamer.AddBlankLine();
for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I) {
MCSymbol *Name = getSymbol(I);
const GlobalValue *GV = I->getAliasedGlobal();
if (GV->isDeclaration()) {
report_fatal_error(Name->getName() +
": Target doesn't support aliases to declarations");
}
MCSymbol *Target = getSymbol(GV);
if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
else if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
else
assert(I->hasLocalLinkage() && "Invalid alias linkage");
EmitVisibility(Name, I->getVisibility());
// Emit the directives as assignments aka .set:
OutStreamer.EmitAssignment(Name,
MCSymbolRefExpr::Create(Target, OutContext));
}
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
MP->finishAssembly(*this);
// Emit llvm.ident metadata in an '.ident' directive.
EmitModuleIdents(M);
// If we don't have any trampolines, then we don't require stack memory
// to be executable. Some targets have a directive to declare this.
Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
OutStreamer.SwitchSection(S);
// Allow the target to emit any magic that it wants at the end of the file,
// after everything else has gone out.
EmitEndOfAsmFile(M);
delete Mang; Mang = 0;
MMI = 0;
OutStreamer.Finish();
OutStreamer.reset();
return false;
}
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
// Get the function symbol.
CurrentFnSym = getSymbol(MF.getFunction());
CurrentFnSymForSize = CurrentFnSym;
if (isVerbose())
LI = &getAnalysis<MachineLoopInfo>();
}
namespace {
// SectionCPs - Keep track the alignment, constpool entries per Section.
struct SectionCPs {
const MCSection *S;
unsigned Alignment;
SmallVector<unsigned, 4> CPEs;
SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}
};
}
/// EmitConstantPool - Print to the current output stream assembly
/// representations of the constants in the constant pool MCP. This is
/// used to print out constants which have been "spilled to memory" by
/// the code generator.
///
void AsmPrinter::EmitConstantPool() {
const MachineConstantPool *MCP = MF->getConstantPool();
const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
if (CP.empty()) return;
// Calculate sections for constant pool entries. We collect entries to go into
// the same section together to reduce amount of section switch statements.
SmallVector<SectionCPs, 4> CPSections;
for (unsigned i = 0, e = CP.size(); i != e; ++i) {
const MachineConstantPoolEntry &CPE = CP[i];
unsigned Align = CPE.getAlignment();
SectionKind Kind;
switch (CPE.getRelocationInfo()) {
default: llvm_unreachable("Unknown section kind");
case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
case 1:
Kind = SectionKind::getReadOnlyWithRelLocal();
break;
case 0:
switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) {
case 4: Kind = SectionKind::getMergeableConst4(); break;
case 8: Kind = SectionKind::getMergeableConst8(); break;
case 16: Kind = SectionKind::getMergeableConst16();break;
default: Kind = SectionKind::getMergeableConst(); break;
}
}
const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
// The number of sections are small, just do a linear search from the
// last section to the first.
bool Found = false;
unsigned SecIdx = CPSections.size();
while (SecIdx != 0) {
if (CPSections[--SecIdx].S == S) {
Found = true;
break;
}
}
if (!Found) {
SecIdx = CPSections.size();
CPSections.push_back(SectionCPs(S, Align));
}
if (Align > CPSections[SecIdx].Alignment)
CPSections[SecIdx].Alignment = Align;
CPSections[SecIdx].CPEs.push_back(i);
}
// Now print stuff into the calculated sections.
for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
OutStreamer.SwitchSection(CPSections[i].S);
EmitAlignment(Log2_32(CPSections[i].Alignment));
unsigned Offset = 0;
for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
unsigned CPI = CPSections[i].CPEs[j];
MachineConstantPoolEntry CPE = CP[CPI];
// Emit inter-object padding for alignment.
unsigned AlignMask = CPE.getAlignment() - 1;
unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
OutStreamer.EmitZeros(NewOffset - Offset);
Type *Ty = CPE.getType();
Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty);
OutStreamer.EmitLabel(GetCPISymbol(CPI));
if (CPE.isMachineConstantPoolEntry())
EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
else
EmitGlobalConstant(CPE.Val.ConstVal);
}
}
}
/// EmitJumpTableInfo - Print assembly representations of the jump tables used
/// by the current function to the current output stream.
///
void AsmPrinter::EmitJumpTableInfo() {
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return;
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;
// Pick the directive to use to print the jump table entries, and switch to
// the appropriate section.
const Function *F = MF->getFunction();
bool JTInDiffSection = false;
if (// In PIC mode, we need to emit the jump table to the same section as the
// function body itself, otherwise the label differences won't make sense.
// FIXME: Need a better predicate for this: what about custom entries?
MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
// We should also do if the section name is NULL or function is declared
// in discardable section
// FIXME: this isn't the right predicate, should be based on the MCSection
// for the function.
F->isWeakForLinker()) {
OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
} else {
// Otherwise, drop it in the readonly section.
const MCSection *ReadOnlySection =
getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
OutStreamer.SwitchSection(ReadOnlySection);
JTInDiffSection = true;
}
EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout())));
// Jump tables in code sections are marked with a data_region directive
// where that's supported.
if (!JTInDiffSection)
OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
// If this jump table was deleted, ignore it.
if (JTBBs.empty()) continue;
// For the EK_LabelDifference32 entry, if the target supports .set, emit a
// .set directive for each unique entry. This reduces the number of
// relocations the assembler will generate for the jump table.
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
MAI->hasSetDirective()) {
SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
const TargetLowering *TLI = TM.getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
const MachineBasicBlock *MBB = JTBBs[ii];
if (!EmittedSets.insert(MBB)) continue;
// .set LJTSet, LBB32-base
const MCExpr *LHS =
MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
MCBinaryExpr::CreateSub(LHS, Base, OutContext));
}
}
// On some targets (e.g. Darwin) we want to emit two consecutive labels
// before each jump table. The first label is never referenced, but tells
// the assembler and linker the extents of the jump table object. The
// second label is actually referenced by the code.
if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0])
// FIXME: This doesn't have to have any specific name, just any randomly
// named and numbered 'l' label would work. Simplify GetJTISymbol.
OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
OutStreamer.EmitLabel(GetJTISymbol(JTI));
for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
}
if (!JTInDiffSection)
OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
}
/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
/// current stream.
void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned UID) const {
assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
const MCExpr *Value = 0;
switch (MJTI->getEntryKind()) {
case MachineJumpTableInfo::EK_Inline:
llvm_unreachable("Cannot emit EK_Inline jump table entry");
case MachineJumpTableInfo::EK_Custom32:
Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
OutContext);
break;
case MachineJumpTableInfo::EK_BlockAddress:
// EK_BlockAddress - Each entry is a plain address of block, e.g.:
// .word LBB123
Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
break;
case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
// EK_GPRel32BlockAddress - Each entry is an address of block, encoded
// with a relocation as gp-relative, e.g.:
// .gprel32 LBB123
MCSymbol *MBBSym = MBB->getSymbol();
OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
return;
}
case MachineJumpTableInfo::EK_GPRel64BlockAddress: {
// EK_GPRel64BlockAddress - Each entry is an address of block, encoded
// with a relocation as gp-relative, e.g.:
// .gpdword LBB123
MCSymbol *MBBSym = MBB->getSymbol();
OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
return;
}
case MachineJumpTableInfo::EK_LabelDifference32: {
// EK_LabelDifference32 - Each entry is the address of the block minus
// the address of the jump table. This is used for PIC jump tables where
// gprel32 is not supported. e.g.:
// .word LBB123 - LJTI1_2
// If the .set directive is supported, this is emitted as:
// .set L4_5_set_123, LBB123 - LJTI1_2
// .word L4_5_set_123
// If we have emitted set directives for the jump table entries, print
// them rather than the entries themselves. If we're emitting PIC, then
// emit the table entries as differences between two text section labels.
if (MAI->hasSetDirective()) {
// If we used .set, reference the .set's symbol.
Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
OutContext);
break;
}
// Otherwise, use the difference as the jump table entry.
Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
break;
}
}
assert(Value && "Unknown entry kind!");
unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout());
OutStreamer.EmitValue(Value, EntrySize);
}
/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
/// special global used by LLVM. If so, emit it and return true, otherwise
/// do nothing and return false.
bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
if (GV->getName() == "llvm.used") {
if (MAI->hasNoDeadStrip()) // No need to emit this at all.
EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer()));
return true;
}
// Ignore debug and non-emitted data. This handles llvm.compiler.used.
if (GV->getSection() == "llvm.metadata" ||
GV->hasAvailableExternallyLinkage())
return true;
if (!GV->hasAppendingLinkage()) return false;
assert(GV->hasInitializer() && "Not a special LLVM global!");
if (GV->getName() == "llvm.global_ctors") {
EmitXXStructorList(GV->getInitializer(), /* isCtor */ true);
if (TM.getRelocationModel() == Reloc::Static &&
MAI->hasStaticCtorDtorReferenceInStaticMode()) {
StringRef Sym(".constructors_used");
OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
MCSA_Reference);
}
return true;
}
if (GV->getName() == "llvm.global_dtors") {
EmitXXStructorList(GV->getInitializer(), /* isCtor */ false);
if (TM.getRelocationModel() == Reloc::Static &&
MAI->hasStaticCtorDtorReferenceInStaticMode()) {
StringRef Sym(".destructors_used");
OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
MCSA_Reference);
}
return true;
}
return false;
}
/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
/// global in the specified llvm.used list for which emitUsedDirectiveFor
/// is true, as being used with this directive.
void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
// Should be an array of 'i8*'.
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
const GlobalValue *GV =
dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
}
}
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
// Should be an array of '{ int, void ()* }' structs. The first value is the
// init priority.
if (!isa<ConstantArray>(List)) return;
// Sanity check the structors list.
const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
if (!InitList) return; // Not an array!
StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
if (!ETy || ETy->getNumElements() != 2) return; // Not an array of pairs!
if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
!isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
// Gather the structors in a form that's convenient for sorting by priority.
typedef std::pair<unsigned, Constant *> Structor;
SmallVector<Structor, 8> Structors;
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
if (!CS) continue; // Malformed.
if (CS->getOperand(1)->isNullValue())
break; // Found a null terminator, skip the rest.
ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
if (!Priority) continue; // Malformed.
Structors.push_back(std::make_pair(Priority->getLimitedValue(65535),
CS->getOperand(1)));
}
// Emit the function pointers in the target-specific order
const DataLayout *DL = TM.getDataLayout();
unsigned Align = Log2_32(DL->getPointerPrefAlignment());
std::stable_sort(Structors.begin(), Structors.end(), less_first());
for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
const MCSection *OutputSection =
(isCtor ?
getObjFileLowering().getStaticCtorSection(Structors[i].first) :
getObjFileLowering().getStaticDtorSection(Structors[i].first));
OutStreamer.SwitchSection(OutputSection);
if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
EmitAlignment(Align);
EmitXXStructor(Structors[i].second);
}
}
void AsmPrinter::EmitModuleIdents(Module &M) {
if (!MAI->hasIdentDirective())
return;
if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
const MDNode *N = NMD->getOperand(i);
assert(N->getNumOperands() == 1 &&
"llvm.ident metadata entry can have only one operand");
const MDString *S = cast<MDString>(N->getOperand(0));
OutStreamer.EmitIdent(S->getString());
}
}
}
//===--------------------------------------------------------------------===//
// Emission and print routines
//
/// EmitInt8 - Emit a byte directive and value.
///
void AsmPrinter::EmitInt8(int Value) const {
OutStreamer.EmitIntValue(Value, 1);
}
/// EmitInt16 - Emit a short directive and value.
///
void AsmPrinter::EmitInt16(int Value) const {
OutStreamer.EmitIntValue(Value, 2);
}
/// EmitInt32 - Emit a long directive and value.
///
void AsmPrinter::EmitInt32(int Value) const {
OutStreamer.EmitIntValue(Value, 4);
}
/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
/// in bytes of the directive is specified by Size and Hi/Lo specify the
/// labels. This implicitly uses .set if it is available.
void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
unsigned Size) const {
// Get the Hi-Lo expression.
const MCExpr *Diff =
MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
MCSymbolRefExpr::Create(Lo, OutContext),
OutContext);
if (!MAI->hasSetDirective()) {
OutStreamer.EmitValue(Diff, Size);
return;
}
// Otherwise, emit with .set (aka assignment).
MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
OutStreamer.EmitAssignment(SetLabel, Diff);
OutStreamer.EmitSymbolValue(SetLabel, Size);
}
/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
/// where the size in bytes of the directive is specified by Size and Hi/Lo
/// specify the labels. This implicitly uses .set if it is available.
void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
const MCSymbol *Lo, unsigned Size)
const {
// Emit Hi+Offset - Lo
// Get the Hi+Offset expression.
const MCExpr *Plus =
MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext),
MCConstantExpr::Create(Offset, OutContext),
OutContext);
// Get the Hi+Offset-Lo expression.
const MCExpr *Diff =
MCBinaryExpr::CreateSub(Plus,
MCSymbolRefExpr::Create(Lo, OutContext),
OutContext);
if (!MAI->hasSetDirective())
OutStreamer.EmitValue(Diff, Size);
else {
// Otherwise, emit with .set (aka assignment).
MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
OutStreamer.EmitAssignment(SetLabel, Diff);
OutStreamer.EmitSymbolValue(SetLabel, Size);
}
}
/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
/// where the size in bytes of the directive is specified by Size and Label
/// specifies the label. This implicitly uses .set if it is available.
void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
unsigned Size, bool IsSectionRelative)
const {
if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
OutStreamer.EmitCOFFSecRel32(Label);
return;
}
// Emit Label+Offset (or just Label if Offset is zero)
const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext);
if (Offset)
Expr = MCBinaryExpr::CreateAdd(Expr,
MCConstantExpr::Create(Offset, OutContext),
OutContext);
OutStreamer.EmitValue(Expr, Size);
}
//===----------------------------------------------------------------------===//
// EmitAlignment - Emit an alignment directive to the specified power of
// two boundary. For example, if you pass in 3 here, you will get an 8
// byte alignment. If a global value is specified, and if that global has
// an explicit alignment requested, it will override the alignment request
// if required for correctness.
//
void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits);
if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
if (getCurrentSection()->getKind().isText())
OutStreamer.EmitCodeAlignment(1 << NumBits);
else
OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
}
//===----------------------------------------------------------------------===//
// Constant emission.
//===----------------------------------------------------------------------===//
/// lowerConstant - Lower the specified LLVM Constant to an MCExpr.
///
static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
MCContext &Ctx = AP.OutContext;
if (CV->isNullValue() || isa<UndefValue>(CV))
return MCConstantExpr::Create(0, Ctx);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
if (CE == 0) {
llvm_unreachable("Unknown constant value to lower!");
}
switch (CE->getOpcode()) {
default:
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
if (Constant *C =
ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
if (C != CE)
return lowerConstant(C, AP);
// Otherwise report the problem to the user.
{
std::string S;
raw_string_ostream OS(S);
OS << "Unsupported expression in static initializer: ";
WriteAsOperand(OS, CE, /*PrintType=*/false,
!AP.MF ? 0 : AP.MF->getFunction()->getParent());
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
const DataLayout &DL = *AP.TM.getDataLayout();
// Generate a symbolic expression for the byte address
APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
const MCExpr *Base = lowerConstant(CE->getOperand(0), AP);
if (!OffsetAI)
return Base;
int64_t Offset = OffsetAI.getSExtValue();
return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
Ctx);
}
case Instruction::Trunc:
// We emit the value and depend on the assembler to truncate the generated
// expression properly. This is important for differences between
// blockaddress labels. Since the two labels are in the same function, it
// is reasonable to treat their delta as a 32-bit value.
// FALL THROUGH.
case Instruction::BitCast:
return lowerConstant(CE->getOperand(0), AP);
case Instruction::IntToPtr: {
const DataLayout &DL = *AP.TM.getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
false/*ZExt*/);
return lowerConstant(Op, AP);
}
case Instruction::PtrToInt: {
const DataLayout &DL = *AP.TM.getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
Constant *Op = CE->getOperand(0);
Type *Ty = CE->getType();
const MCExpr *OpExpr = lowerConstant(Op, AP);
// We can emit the pointer value into this slot if the slot is an
// integer slot equal to the size of the pointer.
if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
return OpExpr;
// Otherwise the pointer is smaller than the resultant integer, mask off
// the high bits so we are sure to get a proper truncation if the input is
// a constant expr.
unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
}
// The MC library also has a right-shift operator, but it isn't consistently
// signed or unsigned between different targets.
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::Shl:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP);
const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP);
switch (CE->getOpcode()) {
default: llvm_unreachable("Unknown binary operator constant cast expr");
case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
}
}
}
}
static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP);
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
static int isRepeatedByteSequence(const ConstantDataSequential *V) {
StringRef Data = V->getRawDataValues();
assert(!Data.empty() && "Empty aggregates should be CAZ node");
char C = Data[0];
for (unsigned i = 1, e = Data.size(); i != e; ++i)
if (Data[i] != C) return -1;
return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
}
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64) return -1;
uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType());
uint64_t Value = CI->getZExtValue();
// Make sure the constant is at least 8 bits long and has a power
// of 2 bit width. This guarantees the constant bit width is
// always a multiple of 8 bits, avoiding issues with padding out
// to Size and other such corner cases.
if (CI->getBitWidth() < 8 || !isPowerOf2_64(CI->getBitWidth())) return -1;
uint8_t Byte = static_cast<uint8_t>(Value);
for (unsigned i = 1; i < Size; ++i) {
Value >>= 8;
if (static_cast<uint8_t>(Value) != Byte) return -1;
}
return Byte;
}
if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
// Make sure all array elements are sequences of the same repeated
// byte.
assert(CA->getNumOperands() != 0 && "Should be a CAZ");
int Byte = isRepeatedByteSequence(CA->getOperand(0), TM);
if (Byte == -1) return -1;
for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
int ThisByte = isRepeatedByteSequence(CA->getOperand(i), TM);
if (ThisByte == -1) return -1;
if (Byte != ThisByte) return -1;
}
return Byte;
}
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
return isRepeatedByteSequence(CDS);
return -1;
}
static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
AsmPrinter &AP){
// See if we can aggregate this into a .fill, if so, emit it as such.
int Value = isRepeatedByteSequence(CDS, AP.TM);
if (Value != -1) {
uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType());
// Don't emit a 1-byte object as a .fill.
if (Bytes > 1)
return AP.OutStreamer.EmitFill(Bytes, Value);
}
// If this can be emitted with .ascii/.asciz, emit it as such.
if (CDS->isString())
return AP.OutStreamer.EmitBytes(CDS->getAsString());
// Otherwise, emit the values in successive locations.
unsigned ElementByteSize = CDS->getElementByteSize();
if (isa<IntegerType>(CDS->getElementType())) {
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
if (AP.isVerbose())
AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
CDS->getElementAsInteger(i));
AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i),
ElementByteSize);
}
} else if (ElementByteSize == 4) {
// FP Constants are printed as integer constants to avoid losing
// precision.
assert(CDS->getElementType()->isFloatTy());
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
union {
float F;
uint32_t I;
};
F = CDS->getElementAsFloat(i);
if (AP.isVerbose())
AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
AP.OutStreamer.EmitIntValue(I, 4);
}
} else {
assert(CDS->getElementType()->isDoubleTy());
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
union {
double F;
uint64_t I;
};
F = CDS->getElementAsDouble(i);
if (AP.isVerbose())
AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
AP.OutStreamer.EmitIntValue(I, 8);
}
}
const DataLayout &DL = *AP.TM.getDataLayout();
unsigned Size = DL.getTypeAllocSize(CDS->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
CDS->getNumElements();
if (unsigned Padding = Size - EmittedSize)
AP.OutStreamer.EmitZeros(Padding);
}
static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) {
// See if we can aggregate some values. Make sure it can be
// represented as a series of bytes of the constant value.
int Value = isRepeatedByteSequence(CA, AP.TM);
if (Value != -1) {
uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType());
AP.OutStreamer.EmitFill(Bytes, Value);
}
else {
for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
emitGlobalConstantImpl(CA->getOperand(i), AP);
}
}
static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
emitGlobalConstantImpl(CV->getOperand(i), AP);
const DataLayout &DL = *AP.TM.getDataLayout();
unsigned Size = DL.getTypeAllocSize(CV->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
CV->getType()->getNumElements();
if (unsigned Padding = Size - EmittedSize)
AP.OutStreamer.EmitZeros(Padding);
}
static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) {
// Print the fields in successive locations. Pad to align if needed!
const DataLayout *DL = AP.TM.getDataLayout();
unsigned Size = DL->getTypeAllocSize(CS->getType());
const StructLayout *Layout = DL->getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
const Constant *Field = CS->getOperand(i);
// Check if padding is needed and insert one or more 0s.
uint64_t FieldSize = DL->getTypeAllocSize(Field->getType());
uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
- Layout->getElementOffset(i)) - FieldSize;
SizeSoFar += FieldSize + PadSize;
// Now print the actual field value.
emitGlobalConstantImpl(Field, AP);
// Insert padding - this may include padding to increase the size of the
// current field up to the ABI size (if the struct is not packed) as well
// as padding to ensure that the next field starts at the right offset.
AP.OutStreamer.EmitZeros(PadSize);
}
assert(SizeSoFar == Layout->getSizeInBytes() &&
"Layout of constant struct may be incorrect!");
}
static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
APInt API = CFP->getValueAPF().bitcastToAPInt();
// First print a comment with what we think the original floating-point value
// should have been.
if (AP.isVerbose()) {
SmallString<8> StrVal;
CFP->getValueAPF().toString(StrVal);
CFP->getType()->print(AP.OutStreamer.GetCommentOS());
AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n';
}
// Now iterate through the APInt chunks, emitting them in endian-correct
// order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit
// floats).
unsigned NumBytes = API.getBitWidth() / 8;
unsigned TrailingBytes = NumBytes % sizeof(uint64_t);
const uint64_t *p = API.getRawData();
// PPC's long double has odd notions of endianness compared to how LLVM
// handles it: p[0] goes first for *big* endian on PPC.
if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) {
int Chunk = API.getNumWords() - 1;
if (TrailingBytes)
AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes);
for (; Chunk >= 0; --Chunk)
AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t));
} else {
unsigned Chunk;
for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t));
if (TrailingBytes)
AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes);
}
// Emit the tail padding for the long double.
const DataLayout &DL = *AP.TM.getDataLayout();
AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
DL.getTypeStoreSize(CFP->getType()));
}
static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
const DataLayout *DL = AP.TM.getDataLayout();
unsigned BitWidth = CI->getBitWidth();
// Copy the value as we may massage the layout for constants whose bit width
// is not a multiple of 64-bits.
APInt Realigned(CI->getValue());
uint64_t ExtraBits = 0;
unsigned ExtraBitsSize = BitWidth & 63;
if (ExtraBitsSize) {
// The bit width of the data is not a multiple of 64-bits.
// The extra bits are expected to be at the end of the chunk of the memory.
// Little endian:
// * Nothing to be done, just record the extra bits to emit.
// Big endian:
// * Record the extra bits to emit.
// * Realign the raw data to emit the chunks of 64-bits.
if (DL->isBigEndian()) {
// Basically the structure of the raw data is a chunk of 64-bits cells:
// 0 1 BitWidth / 64
// [chunk1][chunk2] ... [chunkN].
// The most significant chunk is chunkN and it should be emitted first.
// However, due to the alignment issue chunkN contains useless bits.
// Realign the chunks so that they contain only useless information:
// ExtraBits 0 1 (BitWidth / 64) - 1
// chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN]
ExtraBits = Realigned.getRawData()[0] &
(((uint64_t)-1) >> (64 - ExtraBitsSize));
Realigned = Realigned.lshr(ExtraBitsSize);
} else
ExtraBits = Realigned.getRawData()[BitWidth / 64];
}
// We don't expect assemblers to support integer data directives
// for more than 64 bits, so we emit the data in at most 64-bit
// quantities at a time.
const uint64_t *RawData = Realigned.getRawData();
for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i];
AP.OutStreamer.EmitIntValue(Val, 8);
}
if (ExtraBitsSize) {
// Emit the extra bits after the 64-bits chunks.
// Emit a directive that fills the expected size.
uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(CI->getType());
Size -= (BitWidth / 64) * 8;
assert(Size && Size * 8 >= ExtraBitsSize &&
(ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
== ExtraBits && "Directive too small for extra bits.");
AP.OutStreamer.EmitIntValue(ExtraBits, Size);
}
}
static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
const DataLayout *DL = AP.TM.getDataLayout();
uint64_t Size = DL->getTypeAllocSize(CV->getType());
if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
return AP.OutStreamer.EmitZeros(Size);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
switch (Size) {
case 1:
case 2:
case 4:
case 8:
if (AP.isVerbose())
AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
CI->getZExtValue());
AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size);
return;
default:
emitGlobalConstantLargeInt(CI, AP);
return;
}
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
return emitGlobalConstantFP(CFP, AP);
if (isa<ConstantPointerNull>(CV)) {
AP.OutStreamer.EmitIntValue(0, Size);
return;
}
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
return emitGlobalConstantDataSequential(CDS, AP);
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
return emitGlobalConstantArray(CVA, AP);
if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
return emitGlobalConstantStruct(CVS, AP);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
// Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
// vectors).
if (CE->getOpcode() == Instruction::BitCast)
return emitGlobalConstantImpl(CE->getOperand(0), AP);
if (Size > 8) {
// If the constant expression's size is greater than 64-bits, then we have
// to emit the value in chunks. Try to constant fold the value and emit it
// that way.
Constant *New = ConstantFoldConstantExpression(CE, DL);
if (New && New != CE)
return emitGlobalConstantImpl(New, AP);
}
}
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
return emitGlobalConstantVector(V, AP);
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size);
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
if (Size)
emitGlobalConstantImpl(CV, *this);
else if (MAI->hasSubsectionsViaSymbols()) {
// If the global has zero size, emit a single byte so that two labels don't
// look like they are at the same location.
OutStreamer.EmitIntValue(0, 1);
}
}
void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
// Target doesn't support this yet!
llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
}
void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
if (Offset > 0)
OS << '+' << Offset;
else if (Offset < 0)
OS << Offset;
}
//===----------------------------------------------------------------------===//
// Symbol Lowering Routines.
//===----------------------------------------------------------------------===//
/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
/// temporary label with the specified stem and unique ID.
MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const {
return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
Name + Twine(ID));
}
/// GetTempSymbol - Return an assembler temporary label with the specified
/// stem.
MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const {
return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+
Name);
}
MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
return MMI->getAddrLabelSymbol(BA->getBasicBlock());
}
MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
return MMI->getAddrLabelSymbol(BB);
}
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
return OutContext.GetOrCreateSymbol
(Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+ "_" + Twine(CPID));
}
/// GetJTISymbol - Return the symbol for the specified jump table entry.
MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
}
/// GetJTSetSymbol - Return the symbol for the specified jump table .set
/// FIXME: privatize to AsmPrinter.
MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
return OutContext.GetOrCreateSymbol
(Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
Twine(UID) + "_set_" + Twine(MBBID));
}
/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
/// global value name as its base, with the specified suffix, and where the
/// symbol is forced to have private linkage if ForcePrivate is true.
MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV,
StringRef Suffix,
bool ForcePrivate) const {
SmallString<60> NameStr;
Mang->getNameWithPrefix(NameStr, GV, ForcePrivate);
NameStr.append(Suffix.begin(), Suffix.end());
return OutContext.GetOrCreateSymbol(NameStr.str());
}
/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
/// ExternalSymbol.
MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
SmallString<60> NameStr;
Mang->getNameWithPrefix(NameStr, Sym);
return OutContext.GetOrCreateSymbol(NameStr.str());
}
/// PrintParentLoopComment - Print comments about parent loops of this one.
static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
unsigned FunctionNumber) {
if (Loop == 0) return;
PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
OS.indent(Loop->getLoopDepth()*2)
<< "Parent Loop BB" << FunctionNumber << "_"
<< Loop->getHeader()->getNumber()
<< " Depth=" << Loop->getLoopDepth() << '\n';
}
/// PrintChildLoopComment - Print comments about child loops within
/// the loop for this basic block, with nesting.
static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
unsigned FunctionNumber) {
// Add child loop information
for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){
OS.indent((*CL)->getLoopDepth()*2)
<< "Child Loop BB" << FunctionNumber << "_"
<< (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth()
<< '\n';
PrintChildLoopComment(OS, *CL, FunctionNumber);
}
}
/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks.
static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
const MachineLoopInfo *LI,
const AsmPrinter &AP) {
// Add loop depth information
const MachineLoop *Loop = LI->getLoopFor(&MBB);
if (Loop == 0) return;
MachineBasicBlock *Header = Loop->getHeader();
assert(Header && "No header for loop");
// If this block is not a loop header, just print out what is the loop header
// and return.
if (Header != &MBB) {
AP.OutStreamer.AddComment(" in Loop: Header=BB" +
Twine(AP.getFunctionNumber())+"_" +
Twine(Loop->getHeader()->getNumber())+
" Depth="+Twine(Loop->getLoopDepth()));
return;
}
// Otherwise, it is a loop header. Print out information about child and
// parent loops.
raw_ostream &OS = AP.OutStreamer.GetCommentOS();
PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
OS << "=>";
OS.indent(Loop->getLoopDepth()*2-2);
OS << "This ";
if (Loop->empty())
OS << "Inner ";
OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
}
/// EmitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
// Emit an alignment directive for this block, if needed.
if (unsigned Align = MBB->getAlignment())
EmitAlignment(Align);
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
// here, because multiple LLVM BB's may have been RAUW'd to this block after
// the references were generated.
if (MBB->hasAddressTaken()) {
const BasicBlock *BB = MBB->getBasicBlock();
if (isVerbose())
OutStreamer.AddComment("Block address taken");
std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
for (unsigned i = 0, e = Syms.size(); i != e; ++i)
OutStreamer.EmitLabel(Syms[i]);
}
// Print some verbose block comments.
if (isVerbose()) {
if (const BasicBlock *BB = MBB->getBasicBlock())
if (BB->hasName())
OutStreamer.AddComment("%" + BB->getName());
emitBasicBlockLoopComments(*MBB, LI, *this);
}
// Print the main label for the block.
if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
Twine(MBB->getNumber()) + ":");
}
} else {
OutStreamer.EmitLabel(MBB->getSymbol());
}
}
void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
bool IsDefinition) const {
MCSymbolAttr Attr = MCSA_Invalid;
switch (Visibility) {
default: break;
case GlobalValue::HiddenVisibility:
if (IsDefinition)
Attr = MAI->getHiddenVisibilityAttr();
else
Attr = MAI->getHiddenDeclarationVisibilityAttr();
break;
case GlobalValue::ProtectedVisibility:
Attr = MAI->getProtectedVisibilityAttr();
break;
}
if (Attr != MCSA_Invalid)
OutStreamer.EmitSymbolAttribute(Sym, Attr);
}
/// isBlockOnlyReachableByFallthough - Return true if the basic block has
/// exactly one predecessor and the control transfer mechanism between
/// the predecessor and this block is a fall-through.
bool AsmPrinter::
isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
// If this is a landing pad, it isn't a fall through. If it has no preds,
// then nothing falls through to it.
if (MBB->isLandingPad() || MBB->pred_empty())
return false;
// If there isn't exactly one predecessor, it can't be a fall through.
MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
++PI2;
if (PI2 != MBB->pred_end())
return false;
// The predecessor has to be immediately before this block.
MachineBasicBlock *Pred = *PI;
if (!Pred->isLayoutSuccessor(MBB))
return false;
// If the block is completely empty, then it definitely does fall through.
if (Pred->empty())
return true;
// Check the terminators in the previous blocks
for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(),
IE = Pred->end(); II != IE; ++II) {
MachineInstr &MI = *II;
// If it is not a simple branch, we are in a table somewhere.
if (!MI.isBranch() || MI.isIndirectBranch())
return false;
// If we are the operands of one of the branches, this is not a fall
// through. Note that targets with delay slots will usually bundle
// terminators with the delay slot instruction.
for (ConstMIBundleOperands OP(&MI); OP.isValid(); ++OP) {
if (OP->isJTI())
return false;
if (OP->isMBB() && OP->getMBB() == MBB)
return false;
}
}
return true;
}
GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
if (!S->usesMetadata())
return 0;
gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
gcp_map_type::iterator GCPI = GCMap.find(S);
if (GCPI != GCMap.end())
return GCPI->second;
const char *Name = S->getName().c_str();
for (GCMetadataPrinterRegistry::iterator
I = GCMetadataPrinterRegistry::begin(),
E = GCMetadataPrinterRegistry::end(); I != E; ++I)
if (strcmp(Name, I->getName()) == 0) {
GCMetadataPrinter *GMP = I->instantiate();
GMP->S = S;
GCMap.insert(std::make_pair(S, GMP));
return GMP;
}
report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
}
Index: head/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- head/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 265924)
+++ head/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 265925)
@@ -1,11051 +1,11054 @@
//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
// both before and after the DAG is legalized.
//
// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
// primarily intended to handle simplification opportunities that are implicit
// in the LLVM IR and exposed by the various codegen lowering phases.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "dagcombine"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
STATISTIC(SlicedLoads, "Number of load sliced");
namespace {
static cl::opt<bool>
CombinerAA("combiner-alias-analysis", cl::Hidden,
cl::desc("Turn on alias analysis during testing"));
static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Include global information in alias analysis"));
/// Hidden option to stress test load slicing, i.e., when this option
/// is enabled, load slicing bypasses most of its profitability guards.
static cl::opt<bool>
StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
cl::desc("Bypass the profitability model of load "
"slicing"),
cl::init(false));
//------------------------------ DAGCombiner ---------------------------------//
class DAGCombiner {
SelectionDAG &DAG;
const TargetLowering &TLI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
bool LegalOperations;
bool LegalTypes;
bool ForCodeSize;
// Worklist of all of the nodes that need to be simplified.
//
// This has the semantics that when adding to the worklist,
// the item added must be next to be processed. It should
// also only appear once. The naive approach to this takes
// linear time.
//
// To reduce the insert/remove time to logarithmic, we use
// a set and a vector to maintain our worklist.
//
// The set contains the items on the worklist, but does not
// maintain the order they should be visited.
//
// The vector maintains the order nodes should be visited, but may
// contain duplicate or removed nodes. When choosing a node to
// visit, we pop off the order stack until we find an item that is
// also in the contents set. All operations are O(log N).
SmallPtrSet<SDNode*, 64> WorkListContents;
SmallVector<SDNode*, 64> WorkListOrder;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
/// AddUsersToWorkList - When an instruction is simplified, add all users of
/// the instruction to the work lists because they might get more simplified
/// now.
///
void AddUsersToWorkList(SDNode *N) {
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
UI != UE; ++UI)
AddToWorkList(*UI);
}
/// visit - call the node-specific routine that knows how to fold each
/// particular type of node.
SDValue visit(SDNode *N);
public:
/// AddToWorkList - Add to the work list making sure its instance is at the
/// back (next to be processed.)
void AddToWorkList(SDNode *N) {
WorkListContents.insert(N);
WorkListOrder.push_back(N);
}
/// removeFromWorkList - remove all instances of N from the worklist.
///
void removeFromWorkList(SDNode *N) {
WorkListContents.erase(N);
}
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo = true);
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
return CombineTo(N, &Res, 1, AddTo);
}
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
bool AddTo = true) {
SDValue To[] = { Res0, Res1 };
return CombineTo(N, To, 2, AddTo);
}
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
private:
/// SimplifyDemandedBits - Check the specified integer node value to see if
/// it can be simplified or if things it uses can be simplified by bit
/// propagation. If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
APInt Demanded = APInt::getAllOnesValue(BitWidth);
return SimplifyDemandedBits(Op, Demanded);
}
bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
bool SliceUpLoad(SDNode *N);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
SDValue PromoteIntBinOp(SDValue Op);
SDValue PromoteIntShiftOp(SDValue Op);
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
SDValue Trunc, SDValue ExtLoad, SDLoc DL,
ISD::NodeType ExtType);
/// combine - call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
SDValue combine(SDNode *N);
// Visitation implementation - Implement dag node combining for different
// node types. The semantics are as follows:
// Return Value:
// SDValue.getNode() == 0 - No change was made
// SDValue.getNode() == N - N was replaced, is dead and has been handled.
// otherwise - N should be replaced by the returned Operand.
//
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
SDValue visitSUBC(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
SDValue visitSREM(SDNode *N);
SDValue visitUREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitSMULO(SDNode *N);
SDValue visitUMULO(SDNode *N);
SDValue visitSDIVREM(SDNode *N);
SDValue visitUDIVREM(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitOR(SDNode *N);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVBinOp(SDNode *N);
SDValue SimplifyVUnaryOp(SDNode *N);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitCTLZ(SDNode *N);
SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
SDValue visitBITCAST(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
SDValue visitSINT_TO_FP(SDNode *N);
SDValue visitUINT_TO_FP(SDNode *N);
SDValue visitFP_TO_SINT(SDNode *N);
SDValue visitFP_TO_UINT(SDNode *N);
SDValue visitFP_ROUND(SDNode *N);
SDValue visitFP_ROUND_INREG(SDNode *N);
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
SDValue visitFCEIL(SDNode *N);
SDValue visitFTRUNC(SDNode *N);
SDValue visitFFLOOR(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
SDValue visitSTORE(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
SDValue N3, ISD::CondCode CC,
bool NotExtCompare = false);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
SDLoc DL, bool foldBooleans = true);
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVectorImpl<SDValue> &Aliases);
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2, bool IsVolatile2,
const Value *SrcValue2, int SrcValueOffset2,
unsigned SrcValueAlign2,
const MDNode *TBAAInfo2) const;
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size, bool &IsVolatile,
const Value *&SrcValue, int &SrcValueOffset,
unsigned &SrcValueAlignment,
const MDNode *&TBAAInfo) const;
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
/// looking for a better chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
bool MergeConsecutiveStores(StoreSDNode *N);
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
AttributeSet FnAttrs =
DAG.getMachineFunction().getFunction()->getAttributes();
ForCodeSize =
FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize) ||
FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
}
/// Run - runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
SelectionDAG &getDAG() const { return DAG; }
/// getShiftAmountTy - Returns a type large enough to hold any valid
/// shift amount - before type legalization these can be huge.
EVT getShiftAmountTy(EVT LHSTy) {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
: TLI.getPointerTy();
}
/// isTypeLegal - This method returns true if we are running before type
/// legalization or if the specified VT is legal.
bool isTypeLegal(const EVT &VT) {
if (!LegalTypes) return true;
return TLI.isTypeLegal(VT);
}
/// getSetCCResultType - Convenience wrapper around
/// TargetLowering::getSetCCResultType
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(*DAG.getContext(), VT);
}
};
}
namespace {
/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
class WorkListRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
explicit WorkListRemover(DAGCombiner &dc)
: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
virtual void NodeDeleted(SDNode *N, SDNode *E) {
DC.removeFromWorkList(N);
}
};
}
//===----------------------------------------------------------------------===//
// TargetLowering::DAGCombinerInfo implementation
//===----------------------------------------------------------------------===//
void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
((DAGCombiner*)DC)->AddToWorkList(N);
}
void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
((DAGCombiner*)DC)->removeFromWorkList(N);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
}
void TargetLowering::DAGCombinerInfo::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
}
//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//
/// isNegatibleForFree - Return 1 if we can compute the negated form of the
/// specified expression for the same cost as the expression itself, or 2 if we
/// can compute the negated form more cheaply than the expression itself.
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
const TargetLowering &TLI,
const TargetOptions *Options,
unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return 2;
// Don't allow anything with multiple uses.
if (!Op.hasOneUse()) return 0;
// Don't recurse exponentially.
if (Depth > 6) return 0;
switch (Op.getOpcode()) {
default: return false;
case ISD::ConstantFP:
// Don't invert constant FP values after legalize. The negated constant
// isn't necessarily legal.
return LegalOperations ? 0 : 1;
case ISD::FADD:
// FIXME: determine better conditions for this xform.
if (!Options->UnsafeFPMath) return 0;
// After operation legalization, it might not be legal to create new FSUBs.
if (LegalOperations &&
!TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
return 0;
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
if (!Options->UnsafeFPMath) return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
return 1;
case ISD::FMUL:
case ISD::FDIV:
if (Options->HonorSignDependentRoundingFPMath()) return 0;
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))
return V;
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
Depth + 1);
}
}
/// GetNegatedExpression - If isNegatibleForFree returns true, this function
/// returns the newly negated expression.
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOperations, unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
// Don't allow anything with multiple uses.
assert(Op.hasOneUse() && "Unknown reuse!");
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
V.changeSign();
return DAG.getConstantFP(V, Op.getValueType());
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options, Depth+1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
Op.getOperand(1));
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, Depth+1),
Op.getOperand(0));
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
if (N0CFP->getValueAPF().isZero())
return Op.getOperand(1);
// fold (fneg (fsub A, B)) -> (fsub B, A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(0));
case ISD::FMUL:
case ISD::FDIV:
assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options, Depth+1))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
Op.getOperand(1));
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, Depth+1));
case ISD::FP_EXTEND:
case ISD::FSIN:
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1));
case ISD::FP_ROUND:
return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
Op.getOperand(1));
}
}
// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
// that selects between the values 1 and 0, making it equivalent to a setcc.
// Also, set the incoming LHS, RHS, and CC references to the appropriate
// nodes based on the type of node we are checking. This simplifies life a
// bit for the callers.
static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) {
if (N.getOpcode() == ISD::SETCC) {
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(2);
return true;
}
if (N.getOpcode() == ISD::SELECT_CC &&
N.getOperand(2).getOpcode() == ISD::Constant &&
N.getOperand(3).getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(4);
return true;
}
return false;
}
// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
// one use. If this is true, it allows the users to invert the operation for
// free when it is profitable to do so.
static bool isOneUseSetCC(SDValue N) {
SDValue N0, N1, N2;
if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
return true;
return false;
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue N0, SDValue N1) {
EVT VT = N0.getValueType();
if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
if (isa<ConstantSDNode>(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
SDValue OpNode =
DAG.FoldConstantArithmetic(Opc, VT,
cast<ConstantSDNode>(N0.getOperand(1)),
cast<ConstantSDNode>(N1));
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
}
if (N0.hasOneUse()) {
// reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
N0.getOperand(0), N1);
AddToWorkList(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
if (isa<ConstantSDNode>(N0)) {
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
SDValue OpNode =
DAG.FoldConstantArithmetic(Opc, VT,
cast<ConstantSDNode>(N1.getOperand(1)),
cast<ConstantSDNode>(N0));
return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
}
if (N1.hasOneUse()) {
// reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
N1.getOperand(0), N0);
AddToWorkList(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
}
}
return SDValue();
}
SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo) {
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
++NodesCombined;
DEBUG(dbgs() << "\nReplacing.1 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
To[0].getNode()->dump(&DAG);
dbgs() << " and " << NumTo-1 << " other values\n";
for (unsigned i = 0, e = NumTo; i != e; ++i)
assert((!To[i].getNode() ||
N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!"));
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To);
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
if (To[i].getNode()) {
AddToWorkList(To[i].getNode());
AddUsersToWorkList(To[i].getNode());
}
}
}
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
if (N->use_empty()) {
// Nodes can be reintroduced into the worklist. Make sure we do not
// process a node that has been replaced.
removeFromWorkList(N);
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
}
return SDValue(N, 0);
}
void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
AddToWorkList(TLO.New.getNode());
AddUsersToWorkList(TLO.New.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
if (TLO.Old.getNode()->use_empty()) {
removeFromWorkList(TLO.Old.getNode());
// If the operands of this node are only used by the node, they will now
// be dead. Make sure to visit them first to delete dead nodes early.
for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
DAG.DeleteNode(TLO.Old.getNode());
}
}
/// SimplifyDemandedBits - Check the specified integer node value to see if
/// it can be simplified or if things it uses can be simplified by bit
/// propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownZero, KnownOne;
if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
return false;
// Revisit the node.
AddToWorkList(Op.getNode());
// Replace the old value with the new one.
++NodesCombined;
DEBUG(dbgs() << "\nReplacing.2 ";
TLO.Old.getNode()->dump(&DAG);
dbgs() << "\nWith: ";
TLO.New.getNode()->dump(&DAG);
dbgs() << '\n');
CommitTargetLoweringOpt(TLO);
return true;
}
void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
SDLoc dl(Load);
EVT VT = Load->getValueType(0);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
DEBUG(dbgs() << "\nReplacing.9 ";
Load->dump(&DAG);
dbgs() << "\nWith: ";
Trunc.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
removeFromWorkList(Load);
DAG.DeleteNode(Load);
AddToWorkList(Trunc.getNode());
}
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
Replace = false;
SDLoc dl(Op);
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
: ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
return DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
}
unsigned Opc = Op.getOpcode();
switch (Opc) {
default: break;
case ISD::AssertSext:
return DAG.getNode(ISD::AssertSext, dl, PVT,
SExtPromoteOperand(Op.getOperand(0), PVT),
Op.getOperand(1));
case ISD::AssertZext:
return DAG.getNode(ISD::AssertZext, dl, PVT,
ZExtPromoteOperand(Op.getOperand(0), PVT),
Op.getOperand(1));
case ISD::Constant: {
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, PVT, Op);
}
}
if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
return SDValue();
return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
}
SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
return SDValue();
EVT OldVT = Op.getValueType();
SDLoc dl(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (NewOp.getNode() == 0)
return SDValue();
AddToWorkList(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
DAG.getValueType(OldVT));
}
SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
EVT OldVT = Op.getValueType();
SDLoc dl(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (NewOp.getNode() == 0)
return SDValue();
AddToWorkList(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
}
/// PromoteIntBinOp - Promote the specified integer binary operation if the
/// target indicates it is beneficial. e.g. On x86, it's usually better to
/// promote i16 operations to i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
if (NN0.getNode() == 0)
return SDValue();
bool Replace1 = false;
SDValue N1 = Op.getOperand(1);
SDValue NN1;
if (N0 == N1)
NN1 = NN0;
else {
NN1 = PromoteOperand(N1, PVT, Replace1);
if (NN1.getNode() == 0)
return SDValue();
}
AddToWorkList(NN0.getNode());
if (NN1.getNode())
AddToWorkList(NN1.getNode());
if (Replace0)
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
if (Replace1)
ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
SDLoc dl(Op);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Opc, dl, PVT, NN0, NN1));
}
return SDValue();
}
/// PromoteIntShiftOp - Promote the specified integer shift operation if the
/// target indicates it is beneficial. e.g. On x86, it's usually better to
/// promote i16 operations to i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
bool Replace = false;
SDValue N0 = Op.getOperand(0);
if (Opc == ISD::SRA)
N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
else if (Opc == ISD::SRL)
N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
else
N0 = PromoteOperand(N0, PVT, Replace);
if (N0.getNode() == 0)
return SDValue();
AddToWorkList(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
SDLoc dl(Op);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
}
return SDValue();
}
SDValue DAGCombiner::PromoteExtend(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
}
return SDValue();
}
bool DAGCombiner::PromoteLoad(SDValue Op) {
if (!LegalOperations)
return false;
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return false;
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return false;
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
SDLoc dl(Op);
SDNode *N = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
: ISD::EXTLOAD)
: LD->getExtensionType();
SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
DEBUG(dbgs() << "\nPromoting ";
N->dump(&DAG);
dbgs() << "\nTo: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
removeFromWorkList(N);
DAG.DeleteNode(N);
AddToWorkList(Result.getNode());
return true;
}
return false;
}
//===----------------------------------------------------------------------===//
// Main DAG Combiner implementation
//===----------------------------------------------------------------------===//
void DAGCombiner::Run(CombineLevel AtLevel) {
// set the instance variables, so that the various visit routines may use it.
Level = AtLevel;
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
// Add all the dag nodes to the worklist.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
AddToWorkList(I);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
// changes of the root.
HandleSDNode Dummy(DAG.getRoot());
// The root of the dag may dangle to deleted nodes until the dag combiner is
// done. Set it to null to avoid confusion.
DAG.setRoot(SDValue());
// while the worklist isn't empty, find a node and
// try and combine it.
while (!WorkListContents.empty()) {
SDNode *N;
// The WorkListOrder holds the SDNodes in order, but it may contain
// duplicates.
// In order to avoid a linear scan, we use a set (O(log N)) to hold what the
// worklist *should* contain, and check the node we want to visit is should
// actually be visited.
do {
N = WorkListOrder.pop_back_val();
} while (!WorkListContents.erase(N));
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
// reduced number of uses, allowing other xforms.
if (N->use_empty() && N != &Dummy) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
AddToWorkList(N->getOperand(i).getNode());
DAG.DeleteNode(N);
continue;
}
SDValue RV = combine(N);
if (RV.getNode() == 0)
continue;
++NodesCombined;
// If we get back the same node we passed in, rather than a new node or
// zero, we know that the node must have defined multiple values and
// CombineTo was used. Since CombineTo takes care of the worklist
// mechanics for us, we have no work to do in this case.
if (RV.getNode() == N)
continue;
assert(N->getOpcode() != ISD::DELETED_NODE &&
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
DEBUG(dbgs() << "\nReplacing.3 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
RV.getNode()->dump(&DAG);
dbgs() << '\n');
// Transfer debug value.
DAG.TransferDbgValues(SDValue(N, 0), RV);
WorkListRemover DeadNodes(*this);
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
SDValue OpV = RV;
DAG.ReplaceAllUsesWith(N, &OpV);
}
// Push the new node and any users onto the worklist
AddToWorkList(RV.getNode());
AddUsersToWorkList(RV.getNode());
// Add any uses of the old node to the worklist in case this node is the
// last one that uses them. They may become dead after this node is
// deleted.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
AddToWorkList(N->getOperand(i).getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
if (N->use_empty()) {
// Nodes can be reintroduced into the worklist. Make sure we do not
// process a node that has been replaced.
removeFromWorkList(N);
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
}
}
// If the root changed (e.g. it was a dead load, update the root).
DAG.setRoot(Dummy.getValue());
DAG.RemoveDeadNodes();
}
SDValue DAGCombiner::visit(SDNode *N) {
switch (N->getOpcode()) {
default: break;
case ISD::TokenFactor: return visitTokenFactor(N);
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
case ISD::SUBC: return visitSUBC(N);
case ISD::ADDE: return visitADDE(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
case ISD::SREM: return visitSREM(N);
case ISD::UREM: return visitUREM(N);
case ISD::MULHU: return visitMULHU(N);
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO: return visitSMULO(N);
case ISD::UMULO: return visitUMULO(N);
case ISD::SDIVREM: return visitSDIVREM(N);
case ISD::UDIVREM: return visitUDIVREM(N);
case ISD::AND: return visitAND(N);
case ISD::OR: return visitOR(N);
case ISD::XOR: return visitXOR(N);
case ISD::SHL: return visitSHL(N);
case ISD::SRA: return visitSRA(N);
case ISD::SRL: return visitSRL(N);
case ISD::CTLZ: return visitCTLZ(N);
case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
case ISD::FP_ROUND: return visitFP_ROUND(N);
case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
case ISD::FFLOOR: return visitFFLOOR(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
case ISD::STORE: return visitSTORE(N);
case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
}
return SDValue();
}
SDValue DAGCombiner::combine(SDNode *N) {
SDValue RV = visit(N);
// If nothing happened, try a target-specific DAG combine.
if (RV.getNode() == 0) {
assert(N->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned NULL!");
if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
// Expose the DAG combiner to the target combiner impls.
TargetLowering::DAGCombinerInfo
DagCombineInfo(DAG, Level, false, this);
RV = TLI.PerformDAGCombine(N, DagCombineInfo);
}
}
// If nothing happened still, try promoting the operation.
if (RV.getNode() == 0) {
switch (N->getOpcode()) {
default: break;
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
RV = PromoteIntBinOp(SDValue(N, 0));
break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
RV = PromoteIntShiftOp(SDValue(N, 0));
break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
RV = PromoteExtend(SDValue(N, 0));
break;
case ISD::LOAD:
if (PromoteLoad(SDValue(N, 0)))
RV = SDValue(N, 0);
break;
}
}
// If N is a commutative binary node, try commuting it to enable more
// sdisel CSE.
if (RV.getNode() == 0 &&
SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
SDValue Ops[] = { N1, N0 };
SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
Ops, 2);
if (CSENode)
return SDValue(CSENode, 0);
}
}
return RV;
}
/// getInputChainForNode - Given a node, return its input chain if it has one,
/// otherwise return a null sd operand.
static SDValue getInputChainForNode(SDNode *N) {
if (unsigned NumOps = N->getNumOperands()) {
if (N->getOperand(0).getValueType() == MVT::Other)
return N->getOperand(0);
if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
return N->getOperand(NumOps-1);
for (unsigned i = 1; i < NumOps-1; ++i)
if (N->getOperand(i).getValueType() == MVT::Other)
return N->getOperand(i);
}
return SDValue();
}
SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// If N has two operands, where one has an input chain equal to the other,
// the 'other' chain is redundant.
if (N->getNumOperands() == 2) {
if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
return N->getOperand(0);
if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
return N->getOperand(1);
}
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
bool Changed = false; // If we should replace this token factor.
// Start out with this token factor.
TFs.push_back(N);
// Iterate through token factors. The TFs grows when new token factors are
// encountered.
for (unsigned i = 0; i < TFs.size(); ++i) {
SDNode *TF = TFs[i];
// Check each of the operands.
for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
SDValue Op = TF->getOperand(i);
switch (Op.getOpcode()) {
case ISD::EntryToken:
// Entry tokens don't need to be added to the list. They are
// rededundant.
Changed = true;
break;
case ISD::TokenFactor:
if (Op.hasOneUse() &&
std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
// Queue up for processing.
TFs.push_back(Op.getNode());
// Clean up in case the token factor is removed.
AddToWorkList(Op.getNode());
Changed = true;
break;
}
// Fall thru
default:
// Only add if it isn't already in the list.
if (SeenOps.insert(Op.getNode()))
Ops.push_back(Op);
else
Changed = true;
break;
}
}
}
SDValue Result;
// If we've change things around then replace token factor.
if (Changed) {
if (Ops.empty()) {
// The entry token is the only possible outcome.
Result = DAG.getEntryNode();
} else {
// New and improved token factor.
Result = DAG.getNode(ISD::TokenFactor, SDLoc(N),
MVT::Other, &Ops[0], Ops.size());
}
// Don't add users to work list.
return CombineTo(N, Result, false);
}
return Result;
}
/// MERGE_VALUES can always be eliminated.
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
WorkListRemover DeadNodes(*this);
// Replacing results may cause a different MERGE_VALUES to suddenly
// be CSE'd with N, and carry its uses with it. Iterate until no
// uses remain, to ensure that the node can be safely deleted.
// First add the users of this node to the work list so that they
// can be tried again once they have new operands.
AddUsersToWorkList(N);
do {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
} while (!N->use_empty());
removeFromWorkList(N);
DAG.DeleteNode(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
static
SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1,
SelectionDAG &DAG) {
EVT VT = N0.getValueType();
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N00.getOperand(1))) {
// fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT,
DAG.getNode(ISD::SHL, SDLoc(N00), VT,
N00.getOperand(0), N01),
DAG.getNode(ISD::SHL, SDLoc(N01), VT,
N00.getOperand(1), N01));
return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
}
return SDValue();
}
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
// fold (add x, 0) -> x, vector edition
if (ISD::isBuildVectorAllZeros(N1.getNode()))
return N0;
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return N1;
}
// fold (add x, undef) -> undef
if (N0.getOpcode() == ISD::UNDEF)
return N0;
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (add c1, c2) -> c1+c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
// fold (add x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
// fold (add Sym, c) -> Sym+c
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
GA->getOpcode() == ISD::GlobalAddress)
return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
GA->getOffset() +
(uint64_t)N1C->getSExtValue());
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N1C && N0.getOpcode() == ISD::SUB)
if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(N1C->getAPIntValue()+
N0C->getAPIntValue(), VT),
N0.getOperand(1));
// reassociate add
SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
if (RADD.getNode() != 0)
return RADD;
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
// fold (A + (0-B)) -> A-B
if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
// fold (A+(B-A)) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
return N1.getOperand(0);
// fold ((B-A)+A) -> B
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
return N0.getOperand(0);
// fold (A+(B-(A+C))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(0))
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
N1.getOperand(1).getOperand(1));
// fold (A+(B-(C+A))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(1))
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
N1.getOperand(1).getOperand(0));
// fold (A+((B-A)+or-C)) to (B+or-C)
if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
N1.getOperand(0).getOpcode() == ISD::SUB &&
N0 == N1.getOperand(0).getOperand(1))
return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
N1.getOperand(0).getOperand(0), N1.getOperand(1));
// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
SDValue N10 = N1.getOperand(0);
SDValue N11 = N1.getOperand(1);
if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (a+b) -> (a|b) iff a and b share no bits.
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
}
}
// fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG);
if (Result.getNode()) return Result;
}
if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG);
if (Result.getNode()) return Result;
}
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL &&
N1.getOperand(0).getOpcode() == ISD::SUB)
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
if (C->getAPIntValue() == 0)
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
DAG.getNode(ISD::SHL, SDLoc(N), VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
if (N0.getOpcode() == ISD::SHL &&
N0.getOperand(0).getOpcode() == ISD::SUB)
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
if (C->getAPIntValue() == 0)
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
DAG.getNode(ISD::SHL, SDLoc(N), VT,
N0.getOperand(0).getOperand(1),
N0.getOperand(1)));
if (N1.getOpcode() == ISD::AND) {
SDValue AndOp0 = N1.getOperand(0);
ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
unsigned DestBits = VT.getScalarType().getSizeInBits();
// (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
// and similar xforms where the inner op is either ~0 or 0.
if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
}
}
// add (sext i1), X -> sub X, (zext i1)
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
N0.getOperand(0).getValueType() == MVT::i1 &&
!TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
return SDValue();
}
SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
if (!N->hasAnyUseOfValue(1))
return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
SDLoc(N), MVT::Glue));
// canonicalize constant to RHS.
if (N0C && !N1C)
return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
// fold (addc x, 0) -> x + no carry out
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
SDLoc(N), MVT::Glue));
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
SDLoc(N), MVT::Glue));
}
return SDValue();
}
SDValue DAGCombiner::visitADDE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
N1, N0, CarryIn);
// fold (adde x, y, false) -> (addc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
return SDValue();
}
// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
SelectionDAG &DAG,
bool LegalOperations, bool LegalTypes) {
if (!VT.isVector())
return DAG.getConstant(0, VT);
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
return DAG.getConstant(0, VT);
return SDValue();
}
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
// fold (sub x, 0) -> x, vector edition
if (ISD::isBuildVectorAllZeros(N1.getNode()))
return N0;
}
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
// fold (sub c1, c2) -> c1-c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
// fold (sub x, c) -> (add x, -c)
if (N1C)
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
DAG.getConstant(-N1C->getAPIntValue(), VT));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
if (N0C && N0C->isAllOnesValue())
return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
// fold A-(A-B) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
return N1.getOperand(1);
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
// fold (A+B)-B -> A
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
// fold C2-(A+C1) -> (C2-C1)-A
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
VT);
return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
N1.getOperand(0));
}
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
(N0.getOperand(1).getOpcode() == ISD::SUB ||
N0.getOperand(1).getOpcode() == ISD::ADD) &&
N0.getOperand(1).getOperand(0) == N1)
return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1).getOperand(1));
// fold ((A+(C+B))-B) -> A+C
if (N0.getOpcode() == ISD::ADD &&
N0.getOperand(1).getOpcode() == ISD::ADD &&
N0.getOperand(1).getOperand(1) == N1)
return DAG.getNode(ISD::ADD, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1).getOperand(0));
// fold ((A-(B-C))-C) -> A-B
if (N0.getOpcode() == ISD::SUB &&
N0.getOperand(1).getOpcode() == ISD::SUB &&
N0.getOperand(1).getOperand(1) == N1)
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1).getOperand(0));
// If either operand of a sub is undef, the result is undef
if (N0.getOpcode() == ISD::UNDEF)
return N0;
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
// fold (sub Sym, c) -> Sym-c
if (N1C && GA->getOpcode() == ISD::GlobalAddress)
return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
GA->getOffset() -
(uint64_t)N1C->getSExtValue());
// fold (sub Sym+c1, Sym+c2) -> c1-c2
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
VT);
}
return SDValue();
}
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an SUB.
if (!N->hasAnyUseOfValue(1))
return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
// fold (subc x, x) -> 0 + no borrow
if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, VT),
DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
// fold (subc x, 0) -> x + no borrow
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (N0C && N0C->isAllOnesValue())
return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
return SDValue();
}
SDValue DAGCombiner::visitSUBE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
// fold (sube x, y, false) -> (subc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
return SDValue();
}
/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
/// elements are all the same constant or undefined.
static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
if (!C)
return false;
APInt SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
EVT EltVT = N->getValueType(0).getVectorElementType();
return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs) &&
EltVT.getSizeInBits() >= SplatBitSize);
}
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
// fold (mul x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
bool N0IsConst = false;
bool N1IsConst = false;
APInt ConstValue0, ConstValue1;
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
} else {
N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0;
ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
: APInt();
N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0;
ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
: APInt();
}
// fold (mul c1, c2) -> c1*c2
if (N0IsConst && N1IsConst)
return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
// canonicalize constant to RHS
if (N0IsConst && !N1IsConst)
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1 == 0)
return N1;
// We require a splat of the entire scalar bit width for non-contiguous
// bit patterns.
bool IsFullSplat =
ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
// fold (mul x, 1) -> x
if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnesValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(0, VT), N0);
// fold (mul x, (1 << c)) -> x << c
if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getConstant(ConstValue1.logBase2(),
getShiftAmountTy(N0.getValueType())));
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
unsigned Log2Val = (-ConstValue1).logBase2();
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(0, VT),
DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getConstant(Log2Val,
getShiftAmountTy(N0.getValueType()))));
}
APInt Val;
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
if (N1IsConst && N0.getOpcode() == ISD::SHL &&
(isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
isa<ConstantSDNode>(N0.getOperand(1)))) {
SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
N1, N0.getOperand(1));
AddToWorkList(C3.getNode());
return DAG.getNode(ISD::MUL, SDLoc(N), VT,
N0.getOperand(0), C3);
}
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
// use.
{
SDValue Sh(0,0), Y(0,0);
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
(isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
isa<ConstantSDNode>(N0.getOperand(1))) &&
N0.getNode()->hasOneUse()) {
Sh = N0; Y = N1;
} else if (N1.getOpcode() == ISD::SHL &&
isa<ConstantSDNode>(N1.getOperand(1)) &&
N1.getNode()->hasOneUse()) {
Sh = N1; Y = N0;
}
if (Sh.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
Sh.getOperand(0), Y);
return DAG.getNode(ISD::SHL, SDLoc(N), VT,
Mul, Sh.getOperand(1));
}
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
(isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
isa<ConstantSDNode>(N0.getOperand(1))))
return DAG.getNode(ISD::ADD, SDLoc(N), VT,
DAG.getNode(ISD::MUL, SDLoc(N0), VT,
N0.getOperand(0), N1),
DAG.getNode(ISD::MUL, SDLoc(N1), VT,
N0.getOperand(1), N1));
// reassociate mul
SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
if (RMUL.getNode() != 0)
return RMUL;
return SDValue();
}
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (sdiv c1, c2) -> c1/c2
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
if (N1C && N1C->getAPIntValue() == 1LL)
return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(0, VT), N0);
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
if (!VT.isVector()) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
if (N1C && !N1C->isNullValue() &&
(N1C->getAPIntValue().isPowerOf2() ||
(-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
// fold.
if (TLI.isPow2DivCheap())
return SDValue();
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
// Splat the sign bit into the register
SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
DAG.getConstant(VT.getSizeInBits()-1,
getShiftAmountTy(N0.getValueType())));
AddToWorkList(SGN.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
DAG.getConstant(VT.getSizeInBits() - lg2,
getShiftAmountTy(SGN.getValueType())));
SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
AddToWorkList(SRL.getNode());
AddToWorkList(ADD.getNode()); // Divide by pow2
SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (N1C->getAPIntValue().isNonNegative())
return SRA;
AddToWorkList(SRA.getNode());
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(0, VT), SRA);
}
// if integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence.
if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
SDValue Op = BuildSDIV(N);
if (Op.getNode()) return Op;
}
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (udiv c1, c2) -> c1/c2
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
// fold (udiv x, (1 << c)) -> x >>u c
if (N1C && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
DAG.getConstant(N1C->getAPIntValue().logBase2(),
getShiftAmountTy(N0.getValueType())));
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
EVT ADDVT = N1.getOperand(1).getValueType();
SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
N1.getOperand(1),
DAG.getConstant(SHC->getAPIntValue()
.logBase2(),
ADDVT));
AddToWorkList(Add.getNode());
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
}
}
}
// fold (udiv x, c) -> alternate
if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
SDValue Op = BuildUDIV(N);
if (Op.getNode()) return Op;
}
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
// fold (srem c1, c2) -> c1%c2
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
if (!VT.isVector()) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
}
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
if (N1C && !N1C->isNullValue()) {
SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
AddToWorkList(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
AddToWorkList(Mul.getNode());
return Sub;
}
}
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitUREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
// fold (urem c1, c2) -> c1%c2
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
// fold (urem x, pow2) -> (and x, pow2-1)
if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
DAG.getConstant(N1C->getAPIntValue()-1,VT));
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
SDValue Add =
DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
VT));
AddToWorkList(Add.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
}
}
}
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
if (N1C && !N1C->isNullValue()) {
SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
AddToWorkList(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
AddToWorkList(Mul.getNode());
return Sub;
}
}
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
// fold (mulhs x, 0) -> 0
if (N1C && N1C->isNullValue())
return N1;
// fold (mulhs x, 1) -> (sra x, size(x)-1)
if (N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0,
DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
getShiftAmountTy(N0.getValueType())));
// fold (mulhs x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
return SDValue();
}
SDValue DAGCombiner::visitMULHU(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
// fold (mulhu x, 0) -> 0
if (N1C && N1C->isNullValue())
return N1;
// fold (mulhu x, 1) -> 0
if (N1C && N1C->getAPIntValue() == 1)
return DAG.getConstant(0, N0.getValueType());
// fold (mulhu x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
return SDValue();
}
/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
/// compute two values. LoOp and HiOp give the opcodes for the two computations
/// that are being performed. Return true if a simplification was made.
///
SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp) {
// If the high half is not needed, just compute the low half.
bool HiExists = N->hasAnyUseOfValue(1);
if (!HiExists &&
(!LegalOperations ||
TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
N->op_begin(), N->getNumOperands());
return CombineTo(N, Res, Res);
}
// If the low half is not needed, just compute the high half.
bool LoExists = N->hasAnyUseOfValue(0);
if (!LoExists &&
(!LegalOperations ||
TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
N->op_begin(), N->getNumOperands());
return CombineTo(N, Res, Res);
}
// If both halves are used, return as it is.
if (LoExists && HiExists)
return SDValue();
// If the two computed results can be simplified separately, separate them.
if (LoExists) {
SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
N->op_begin(), N->getNumOperands());
AddToWorkList(Lo.getNode());
SDValue LoOpt = combine(Lo.getNode());
if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
(!LegalOperations ||
TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
return CombineTo(N, LoOpt, LoOpt);
}
if (HiExists) {
SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
N->op_begin(), N->getNumOperands());
AddToWorkList(Hi.getNode());
SDValue HiOpt = combine(Hi.getNode());
if (HiOpt.getNode() && HiOpt != Hi &&
(!LegalOperations ||
TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
return CombineTo(N, HiOpt, HiOpt);
}
return SDValue();
}
SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
if (Res.getNode()) return Res;
EVT VT = N->getValueType(0);
SDLoc DL(N);
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
return CombineTo(N, Lo, Hi);
}
}
return SDValue();
}
SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
if (Res.getNode()) return Res;
EVT VT = N->getValueType(0);
SDLoc DL(N);
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
return CombineTo(N, Lo, Hi);
}
}
return SDValue();
}
SDValue DAGCombiner::visitSMULO(SDNode *N) {
// (smulo x, 2) -> (saddo x, x)
if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
if (C2->getAPIntValue() == 2)
return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
N->getOperand(0), N->getOperand(0));
return SDValue();
}
SDValue DAGCombiner::visitUMULO(SDNode *N) {
// (umulo x, 2) -> (uaddo x, x)
if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
if (C2->getAPIntValue() == 2)
return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
N->getOperand(0), N->getOperand(0));
return SDValue();
}
SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
if (Res.getNode()) return Res;
return SDValue();
}
SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
if (Res.getNode()) return Res;
return SDValue();
}
/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
/// two operands of the same opcode, try to simplify it.
SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
EVT VT = N0.getValueType();
assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
// Bail early if none of these transforms apply.
if (N0.getNode()->getNumOperands() == 0) return SDValue();
// For each of OP in AND/OR/XOR:
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
// fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
//
// do not sink logical op inside of a vector extend, since it may combine
// into a vsetcc.
EVT Op0VT = N0.getOperand(0).getValueType();
if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
// Avoid infinite looping with PromoteIntBinOp.
(N0.getOpcode() == ISD::ANY_EXTEND &&
(!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
(N0.getOpcode() == ISD::TRUNCATE &&
(!TLI.isZExtFree(VT, Op0VT) ||
!TLI.isTruncateFree(Op0VT, VT)) &&
TLI.isTypeLegal(Op0VT))) &&
!VT.isVector() &&
Op0VT == N1.getOperand(0).getValueType() &&
(!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
N0.getOperand(0).getValueType(),
N0.getOperand(0), N1.getOperand(0));
AddToWorkList(ORNode.getNode());
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
}
// For each of OP in SHL/SRL/SRA/AND...
// fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
// fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
// fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
N0.getOperand(1) == N1.getOperand(1)) {
SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
N0.getOperand(0).getValueType(),
N0.getOperand(0), N1.getOperand(0));
AddToWorkList(ORNode.getNode());
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
ORNode, N0.getOperand(1));
}
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
// Only perform this optimization after type legalization and before
// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
// adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
// we don't want to undo this promotion.
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
// on scalars.
if ((N0.getOpcode() == ISD::BITCAST ||
N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
Level == AfterLegalizeTypes) {
SDValue In0 = N0.getOperand(0);
SDValue In1 = N1.getOperand(0);
EVT In0Ty = In0.getValueType();
EVT In1Ty = In1.getValueType();
SDLoc DL(N);
// If both incoming values are integers, and the original types are the
// same.
if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
AddToWorkList(Op.getNode());
return BC;
}
}
// Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
// Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
// If both shuffles use the same mask, and both shuffle within a single
// vector, then it is worthwhile to move the swizzle after the operation.
// The type-legalizer generates this pattern when loading illegal
// vector types from memory. In many cases this allows additional shuffle
// optimizations.
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
N0.getOperand(1).getOpcode() == ISD::UNDEF &&
N1.getOperand(1).getOpcode() == ISD::UNDEF) {
ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
"Inputs to shuffles are not the same type");
unsigned NumElts = VT.getVectorNumElements();
// Check that both shuffles use the same mask. The masks are known to be of
// the same length because the result vector type is the same.
bool SameMask = true;
for (unsigned i = 0; i != NumElts; ++i) {
int Idx0 = SVN0->getMaskElt(i);
int Idx1 = SVN1->getMaskElt(i);
if (Idx0 != Idx1) {
SameMask = false;
break;
}
}
if (SameMask) {
SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
N0.getOperand(0), N1.getOperand(0));
AddToWorkList(Op.getNode());
return DAG.getVectorShuffle(VT, SDLoc(N), Op,
DAG.getUNDEF(VT), &SVN0->getMask()[0]);
}
}
return SDValue();
}
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue LL, LR, RL, RR, CC0, CC1;
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
unsigned BitWidth = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return N0;
if (ISD::isBuildVectorAllZeros(N1.getNode()))
return N1;
// fold (and x, -1) -> x, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
return N1;
if (ISD::isBuildVectorAllOnes(N1.getNode()))
return N0;
}
// fold (and x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// fold (and c1, c2) -> c1&c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
// fold (and x, -1) -> x
if (N1C && N1C->isAllOnesValue())
return N0;
// if (and x, c) is known to be zero, return 0
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, VT);
// reassociate and
SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
if (RAND.getNode() != 0)
return RAND;
// fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
return N1;
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
APInt Mask = ~N1C->getAPIntValue();
Mask = Mask.trunc(N0Op0.getValueSizeInBits());
if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
N0.getValueType(), N0Op0);
// Replace uses of the AND with uses of the Zero extend node.
CombineTo(N, Zext);
// We actually want to replace all uses of the any_extend with the
// zero_extend, to avoid duplicating things. This will later cause this
// AND to be folded.
CombineTo(N0.getNode(), Zext);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
// (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
// already be zero by virtue of the width of the base type of the load.
//
// the 'X' node here can either be nothing or an extract_vector_elt to catch
// more cases.
if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
N0.getOperand(0).getOpcode() == ISD::LOAD) ||
N0.getOpcode() == ISD::LOAD) {
LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
N0 : N0.getOperand(0) );
// Get the constant (if applicable) the zero'th operand is being ANDed with.
// This can be a pure constant or a vector splat, in which case we treat the
// vector as a scalar and use the splat value.
APInt Constant = APInt::getNullValue(1);
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
SplatBitSize, HasAnyUndefs);
if (IsSplat) {
// Undef bits can contribute to a possible optimisation if set, so
// set them.
SplatValue |= SplatUndef;
// The splat value may be something like "0x00FFFFFF", which means 0 for
// the first vector value and FF for the rest, repeating. We need a mask
// that will apply equally to all members of the vector, so AND all the
// lanes of the constant together.
EVT VT = Vector->getValueType(0);
unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
// If the splat value has been compressed to a bitlength lower
// than the size of the vector lane, we need to re-expand it to
// the lane size.
if (BitWidth > SplatBitSize)
for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
SplatBitSize < BitWidth;
SplatBitSize = SplatBitSize * 2)
SplatValue |= SplatValue.shl(SplatBitSize);
Constant = APInt::getAllOnesValue(BitWidth);
for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
}
}
// If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
// actually legal and isn't going to get expanded, else this is a false
// optimisation.
bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
Load->getMemoryVT());
// Resize the constant to the same size as the original memory access before
// extension. If it is still the AllOnesValue then this AND is completely
// unneeded.
Constant =
Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
bool B;
switch (Load->getExtensionType()) {
default: B = false; break;
case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
case ISD::ZEXTLOAD:
case ISD::NON_EXTLOAD: B = true; break;
}
if (B && Constant.isAllOnesValue()) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
if (Load->getExtensionType() == ISD::EXTLOAD) {
NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
Load->getValueType(0), SDLoc(Load),
Load->getChain(), Load->getBasePtr(),
Load->getOffset(), Load->getMemoryVT(),
Load->getMemOperand());
// Replace uses of the EXTLOAD with the new ZEXTLOAD.
if (Load->getNumValues() == 3) {
// PRE/POST_INC loads have 3 values.
SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
NewLoad.getValue(2) };
CombineTo(Load, To, 3, true);
} else {
CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
}
}
// Fold the AND away, taking care not to fold to the old load node if we
// replaced it.
CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
LL.getValueType().isInteger()) {
// fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
LR.getValueType(), LL, RL);
AddToWorkList(ORNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
// fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
LR.getValueType(), LL, RL);
AddToWorkList(ANDNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
}
// fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
LR.getValueType(), LL, RL);
AddToWorkList(ORNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
}
// Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
Op0 == Op1 && LL.getValueType().isInteger() &&
Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
(cast<ConstantSDNode>(LR)->isAllOnesValue() &&
cast<ConstantSDNode>(RR)->isNullValue()))) {
SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
LL, DAG.getConstant(1, LL.getValueType()));
AddToWorkList(ADDNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ADDNode,
DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
}
// canonicalize equivalent to ll == rl
if (LL == RR && LR == RL) {
Op1 = ISD::getSetCCSwappedOperands(Op1);
std::swap(RL, RR);
}
if (LL == RL && LR == RR) {
bool isInteger = LL.getValueType().isInteger();
ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
if (Result != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
TLI.isOperationLegal(ISD::SETCC,
getSetCCResultType(N0.getSimpleValueType())))))
return DAG.getSetCC(SDLoc(N), N0.getValueType(),
LL, LR, Result);
}
}
// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
if (N0.getOpcode() == N1.getOpcode()) {
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
if (Tmp.getNode()) return Tmp;
}
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
// fold (and (sra)) -> (and (srl)) when possible.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (zext_inreg (extload x)) -> (zextload x)
if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
AddToWorkList(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
AddToWorkList(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
if (N1C && (N0.getOpcode() == ISD::LOAD ||
(N0.getOpcode() == ISD::ANY_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::LOAD))) {
bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
LoadSDNode *LN0 = HasAnyExt
? cast<LoadSDNode>(N0.getOperand(0))
: cast<LoadSDNode>(N0);
if (LN0->getExtensionType() != ISD::SEXTLOAD &&
LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
EVT LoadedVT = LN0->getMemoryVT();
if (ExtVT == LoadedVT &&
(!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue NewLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(), ExtVT,
LN0->getMemOperand());
AddToWorkList(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// Do not change the width of a volatile load.
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
(!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
EVT PtrType = LN0->getOperand(1).getValueType();
unsigned Alignment = LN0->getAlignment();
SDValue NewPtr = LN0->getBasePtr();
// For big endian targets, we need to add an offset to the pointer
// to load the correct bytes. For little endian systems, we merely
// need to read fewer bytes from the same pointer.
if (TLI.isBigEndian()) {
unsigned LVTStoreBytes = LoadedVT.getStoreSize();
unsigned EVTStoreBytes = ExtVT.getStoreSize();
unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType,
NewPtr, DAG.getConstant(PtrOff, PtrType));
Alignment = MinAlign(Alignment, PtrOff);
}
AddToWorkList(NewPtr.getNode());
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
LN0->getChain(), NewPtr,
LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
Alignment, LN0->getTBAAInfo());
AddToWorkList(N);
CombineTo(LN0, Load, Load.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
}
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
VT.getSizeInBits() <= 64) {
if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
APInt ADDC = ADDI->getAPIntValue();
if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
// immediate for an add, but it is legal if its top c2 bits are set,
// transform the ADD so the immediate doesn't need to be materialized
// in a register.
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
SRLI->getZExtValue());
if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
ADDC |= Mask;
if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
SDValue NewAdd =
DAG.getNode(ISD::ADD, SDLoc(N0), VT,
N0.getOperand(0), DAG.getConstant(ADDC, VT));
CombineTo(N0.getNode(), NewAdd);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
}
}
}
// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
N0.getOperand(1), false);
if (BSwap.getNode())
return BSwap;
}
return SDValue();
}
/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
///
SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits) {
if (!LegalOperations)
return SDValue();
EVT VT = N->getValueType(0);
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
return SDValue();
if (!TLI.isOperationLegal(ISD::BSWAP, VT))
return SDValue();
// Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
bool LookPassAnd0 = false;
bool LookPassAnd1 = false;
if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
std::swap(N0, N1);
if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() == ISD::AND) {
if (!N0.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!N01C || N01C->getZExtValue() != 0xFF00)
return SDValue();
N0 = N0.getOperand(0);
LookPassAnd0 = true;
}
if (N1.getOpcode() == ISD::AND) {
if (!N1.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C || N11C->getZExtValue() != 0xFF)
return SDValue();
N1 = N1.getOperand(0);
LookPassAnd1 = true;
}
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
return SDValue();
if (!N0.getNode()->hasOneUse() ||
!N1.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N01C || !N11C)
return SDValue();
if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
return SDValue();
// Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
SDValue N00 = N0->getOperand(0);
if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
if (!N00.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
if (!N001C || N001C->getZExtValue() != 0xFF)
return SDValue();
N00 = N00.getOperand(0);
LookPassAnd0 = true;
}
SDValue N10 = N1->getOperand(0);
if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
if (!N10.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
if (!N101C || N101C->getZExtValue() != 0xFF00)
return SDValue();
N10 = N10.getOperand(0);
LookPassAnd1 = true;
}
if (N00 != N10)
return SDValue();
// Make sure everything beyond the low halfword gets set to zero since the SRL
// 16 will clear the top bits.
unsigned OpSizeInBits = VT.getSizeInBits();
if (DemandHighBits && OpSizeInBits > 16) {
// If the left-shift isn't masked out then the only way this is a bswap is
// if all bits beyond the low 8 are 0. In that case the entire pattern
// reduces to a left shift anyway: leave it for other parts of the combiner.
if (!LookPassAnd0)
return SDValue();
// However, if the right shift isn't masked out then it might be because
// it's not needed. See if we can spot that too.
if (!LookPassAnd1 &&
!DAG.MaskedValueIsZero(
N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
return SDValue();
}
SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
if (OpSizeInBits > 16)
Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res,
DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
return Res;
}
/// isBSwapHWordElement - Return true if the specified node is an element
/// that makes up a 32-bit packed halfword byteswap. i.e.
/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
if (!N.getNode()->hasOneUse())
return false;
unsigned Opc = N.getOpcode();
if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
return false;
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!N1C)
return false;
unsigned Num;
switch (N1C->getZExtValue()) {
default:
return false;
case 0xFF: Num = 0; break;
case 0xFF00: Num = 1; break;
case 0xFF0000: Num = 2; break;
case 0xFF000000: Num = 3; break;
}
// Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
SDValue N0 = N.getOperand(0);
if (Opc == ISD::AND) {
if (Num == 0 || Num == 2) {
// (x >> 8) & 0xff
// (x >> 8) & 0xff0000
if (N0.getOpcode() != ISD::SRL)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 8)
return false;
} else {
// (x << 8) & 0xff00
// (x << 8) & 0xff000000
if (N0.getOpcode() != ISD::SHL)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 8)
return false;
}
} else if (Opc == ISD::SHL) {
// (x & 0xff) << 8
// (x & 0xff0000) << 8
if (Num != 0 && Num != 2)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!C || C->getZExtValue() != 8)
return false;
} else { // Opc == ISD::SRL
// (x & 0xff00) >> 8
// (x & 0xff000000) >> 8
if (Num != 1 && Num != 3)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!C || C->getZExtValue() != 8)
return false;
}
if (Parts[Num])
return false;
Parts[Num] = N0.getOperand(0).getNode();
return true;
}
/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
/// => (rotl (bswap x), 16)
SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!LegalOperations)
return SDValue();
EVT VT = N->getValueType(0);
if (VT != MVT::i32)
return SDValue();
if (!TLI.isOperationLegal(ISD::BSWAP, VT))
return SDValue();
SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
// Look for either
// (or (or (and), (and)), (or (and), (and)))
// (or (or (or (and), (and)), (and)), (and))
if (N0.getOpcode() != ISD::OR)
return SDValue();
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
if (N1.getOpcode() == ISD::OR &&
N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
// (or (or (and), (and)), (or (and), (and)))
SDValue N000 = N00.getOperand(0);
if (!isBSwapHWordElement(N000, Parts))
return SDValue();
SDValue N001 = N00.getOperand(1);
if (!isBSwapHWordElement(N001, Parts))
return SDValue();
SDValue N010 = N01.getOperand(0);
if (!isBSwapHWordElement(N010, Parts))
return SDValue();
SDValue N011 = N01.getOperand(1);
if (!isBSwapHWordElement(N011, Parts))
return SDValue();
} else {
// (or (or (or (and), (and)), (and)), (and))
if (!isBSwapHWordElement(N1, Parts))
return SDValue();
if (!isBSwapHWordElement(N01, Parts))
return SDValue();
if (N00.getOpcode() != ISD::OR)
return SDValue();
SDValue N000 = N00.getOperand(0);
if (!isBSwapHWordElement(N000, Parts))
return SDValue();
SDValue N001 = N00.getOperand(1);
if (!isBSwapHWordElement(N001, Parts))
return SDValue();
}
// Make sure the parts are all coming from the same node.
if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
return SDValue();
SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
SDValue(Parts[0],0));
// Result of the bswap should be rotated by 16. If it's not legal, then
// do (x << 16) | (x >> 16).
SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt);
if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt);
return DAG.getNode(ISD::OR, SDLoc(N), VT,
DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt),
DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
}
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue LL, LR, RL, RR, CC0, CC1;
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
// fold (or x, 0) -> x, vector edition
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return N1;
if (ISD::isBuildVectorAllZeros(N1.getNode()))
return N0;
// fold (or x, -1) -> -1, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
return N0;
if (ISD::isBuildVectorAllOnes(N1.getNode()))
return N1;
}
// fold (or x, undef) -> -1
if (!LegalOperations &&
(N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
}
// fold (or c1, c2) -> c1|c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
// fold (or x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
// fold (or x, -1) -> -1
if (N1C && N1C->isAllOnesValue())
return N1;
// fold (or x, c) -> c iff (x & ~c) == 0
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
SDValue BSwap = MatchBSwapHWord(N, N0, N1);
if (BSwap.getNode() != 0)
return BSwap;
BSwap = MatchBSwapHWordLow(N, N0, N1);
if (BSwap.getNode() != 0)
return BSwap;
// reassociate or
SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
if (ROR.getNode() != 0)
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
// iff (c1 & c2) == 0.
if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(0), N1),
DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
}
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
LL.getValueType().isInteger()) {
// fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
// fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
if (cast<ConstantSDNode>(LR)->isNullValue() &&
(Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
LR.getValueType(), LL, RL);
AddToWorkList(ORNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
// fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
// fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
(Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
LR.getValueType(), LL, RL);
AddToWorkList(ANDNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
}
}
// canonicalize equivalent to ll == rl
if (LL == RR && LR == RL) {
Op1 = ISD::getSetCCSwappedOperands(Op1);
std::swap(RL, RR);
}
if (LL == RL && LR == RR) {
bool isInteger = LL.getValueType().isInteger();
ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
if (Result != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
TLI.isOperationLegal(ISD::SETCC,
getSetCCResultType(N0.getValueType())))))
return DAG.getSetCC(SDLoc(N), N0.getValueType(),
LL, LR, Result);
}
}
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode()) {
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
if (Tmp.getNode()) return Tmp;
}
// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
if (N0.getOpcode() == ISD::AND &&
N1.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
N1.getOperand(1).getOpcode() == ISD::Constant &&
// Don't increase # computations.
(N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
// We can only do this xform if we know that bits from X that are set in C2
// but not in C1 are already zero. Likewise for Y.
const APInt &LHSMask =
cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
const APInt &RHSMask =
cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(0), N1.getOperand(0));
return DAG.getNode(ISD::AND, SDLoc(N), VT, X,
DAG.getConstant(LHSMask | RHSMask, VT));
}
}
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
return SDValue(Rot, 0);
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
}
/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
if (Op.getOpcode() == ISD::AND) {
if (isa<ConstantSDNode>(Op.getOperand(1))) {
Mask = Op.getOperand(1);
Op = Op.getOperand(0);
} else {
return false;
}
}
if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
Shift = Op;
return true;
}
return false;
}
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr].
SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
if (!TLI.isTypeLegal(VT)) return 0;
// The target must have at least one rotate flavor.
bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
if (!HasROTL && !HasROTR) return 0;
// Match "(X shl/srl V1) & V2" where V2 may not be present.
SDValue LHSShift; // The shift.
SDValue LHSMask; // AND value if any.
if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
return 0; // Not part of a rotate.
SDValue RHSShift; // The shift.
SDValue RHSMask; // AND value if any.
if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
return 0; // Not part of a rotate.
if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
return 0; // Not shifting the same value.
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return 0; // Shifts must disagree.
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
std::swap(LHSShift, RHSShift);
std::swap(LHSMask , RHSMask );
}
unsigned OpSizeInBits = VT.getSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
SDValue RHSShiftAmt = RHSShift.getOperand(1);
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
if (LHSShiftAmt.getOpcode() == ISD::Constant &&
RHSShiftAmt.getOpcode() == ISD::Constant) {
uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
if ((LShVal + RShVal) != OpSizeInBits)
return 0;
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
if (LHSMask.getNode()) {
APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
}
if (RHSMask.getNode()) {
APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
}
Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
}
return Rot.getNode();
}
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
return 0;
// If the shift amount is sign/zext/any-extended just peel it off.
SDValue LExtOp0 = LHSShiftAmt;
SDValue RExtOp0 = RHSShiftAmt;
if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
(RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
LExtOp0 = LHSShiftAmt.getOperand(0);
RExtOp0 = RHSShiftAmt.getOperand(0);
}
if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) {
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotl x, y)
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotr x, (sub 32, y))
if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0)))
if (SUBC->getAPIntValue() == OpSizeInBits)
return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
} else if (LExtOp0.getOpcode() == ISD::SUB &&
RExtOp0 == LExtOp0.getOperand(1)) {
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
// (rotr x, y)
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
// (rotl x, (sub 32, y))
if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0)))
if (SUBC->getAPIntValue() == OpSizeInBits)
return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
}
return 0;
}
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue LHS, RHS, CC;
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
// fold (xor x, 0) -> x, vector edition
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return N1;
if (ISD::isBuildVectorAllZeros(N1.getNode()))
return N0;
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// fold (xor x, undef) -> undef
if (N0.getOpcode() == ISD::UNDEF)
return N0;
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (xor c1, c2) -> c1^c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
// fold (xor x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
// reassociate xor
SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
if (RXOR.getNode() != 0)
return RXOR;
// fold !(x cc y) -> (x !cc y)
if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
bool isInt = LHS.getValueType().isInteger();
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
isInt);
if (!LegalOperations ||
TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
switch (N0.getOpcode()) {
default:
llvm_unreachable("Unhandled SetCC Equivalent!");
case ISD::SETCC:
return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
case ISD::SELECT_CC:
return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
N0.getOperand(3), NotCC);
}
}
}
// fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
N0.getNode()->hasOneUse() &&
isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
SDValue V = N0.getOperand(0);
V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
DAG.getConstant(1, V.getValueType()));
AddToWorkList(V.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
(N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
}
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
if (N1C && N1C->isAllOnesValue() &&
(N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
}
}
// fold (xor (and x, y), y) -> (and (not x), y)
if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
N0->getOperand(1) == N1) {
SDValue X = N0->getOperand(0);
SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
AddToWorkList(NotX.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
}
// fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
if (N1C && N0.getOpcode() == ISD::XOR) {
ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (N00C)
return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1),
DAG.getConstant(N1C->getAPIntValue() ^
N00C->getAPIntValue(), VT));
if (N01C)
return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0),
DAG.getConstant(N1C->getAPIntValue() ^
N01C->getAPIntValue(), VT));
}
// fold (xor x, x) -> 0
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0.getOpcode() == N1.getOpcode()) {
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
if (Tmp.getNode()) return Tmp;
}
// Simplify the expression using non-local knowledge.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
}
/// visitShiftByConstant - Handle transforms common to the three shifts, when
/// the shift amount is a constant.
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
SDNode *LHS = N->getOperand(0).getNode();
if (!LHS->hasOneUse()) return SDValue();
// We want to pull some binops through shifts, so that we have (and (shift))
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
// thing happens with address calculations, so it's important to canonicalize
// it.
bool HighBitSet = false; // Can we transform this if the high bit is set?
switch (LHS->getOpcode()) {
default: return SDValue();
case ISD::OR:
case ISD::XOR:
HighBitSet = false; // We can only transform sra if the high bit is clear.
break;
case ISD::AND:
HighBitSet = true; // We can only transform sra if the high bit is set.
break;
case ISD::ADD:
if (N->getOpcode() != ISD::SHL)
return SDValue(); // only shl(add) not sr[al](add).
HighBitSet = false; // We can only transform sra if the high bit is clear.
break;
}
// We require the RHS of the binop to be a constant as well.
ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
if (!BinOpCst) return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant.
// If it is not a shift, it pessimizes some common cases like:
//
// void foo(int *X, int i) { X[i & 1235] = 1; }
// int bar(int *X, int i) { return X[i & 255]; }
SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
BinOpLHSVal->getOpcode() != ISD::SRA &&
BinOpLHSVal->getOpcode() != ISD::SRL) ||
!isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
return SDValue();
EVT VT = N->getValueType(0);
// If this is a signed shift right, and the high bit is modified by the
// logical operation, do not perform the transformation. The highBitSet
// boolean indicates the value of the high bit of the constant which would
// cause it to be modified for this operation.
if (N->getOpcode() == ISD::SRA) {
bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
if (BinOpRHSSignSet != HighBitSet)
return SDValue();
}
// Fold the constants, shifting the binop RHS by the shift amount.
SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
N->getValueType(0),
LHS->getOperand(1), N->getOperand(1));
// Create the new shift.
SDValue NewShift = DAG.getNode(N->getOpcode(),
SDLoc(LHS->getOperand(0)),
VT, LHS->getOperand(0), N->getOperand(1));
// Create the new binop.
return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
}
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (shl c1, c2) -> c1<<c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
// fold (shl 0, x) -> 0
if (N0C && N0C->isNullValue())
return N0;
// fold (shl x, c >= size(x)) -> undef
if (N1C && N1C->getZExtValue() >= OpSizeInBits)
return DAG.getUNDEF(VT);
// fold (shl x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
// fold (shl undef, x) -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
return DAG.getConstant(0, VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND &&
N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
SDValue N101 = N1.getOperand(0).getOperand(1);
if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
TruncC = TruncC.trunc(TruncVT.getSizeInBits());
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
DAG.getNode(ISD::TRUNCATE,
SDLoc(N),
TruncVT, N100),
DAG.getConstant(TruncC, TruncVT)));
}
}
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SHL &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
// fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
// For this to be valid, the second form must not preserve any of the bits
// that are shifted out by the inner shift in the first form. This means
// the outer shift size must be >= the number of bits added by the ext.
// As a corollary, we don't care what kind of ext it is.
if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND) &&
N0.getOperand(0).getOpcode() == ISD::SHL &&
isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
uint64_t c1 =
cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
EVT InnerShiftVT = N0.getOperand(0).getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
if (c2 >= OpSizeInBits - InnerShiftSize) {
if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
N0.getOperand(0)->getOperand(0)),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
}
// fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
// Only fold this if the inner zext has no other uses to avoid increasing
// the total number of instructions.
if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::SRL &&
isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
uint64_t c1 =
cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
if (c1 < VT.getSizeInBits()) {
uint64_t c2 = N1C->getZExtValue();
if (c1 == c2) {
SDValue NewOp0 = N0.getOperand(0);
EVT CountVT = NewOp0.getOperand(1).getValueType();
SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
NewOp0, DAG.getConstant(c2, CountVT));
AddToWorkList(NewSHL.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
}
}
}
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
if (c1 < VT.getSizeInBits()) {
uint64_t c2 = N1C->getZExtValue();
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - c1);
SDValue Shift;
if (c2 > c1) {
Mask = Mask.shl(c2-c1);
Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
DAG.getConstant(c2-c1, N1.getValueType()));
} else {
Mask = Mask.lshr(c1-c2);
Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
DAG.getConstant(c1-c2, N1.getValueType()));
}
return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
DAG.getConstant(Mask, VT));
}
}
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
SDValue HiBitsMask =
DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() -
N1C->getZExtValue()),
VT);
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
HiBitsMask);
}
if (N1C) {
SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
if (NewSHL.getNode())
return NewSHL;
}
return SDValue();
}
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (sra c1, c2) -> (sra c1, c2)
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
// fold (sra 0, x) -> 0
if (N0C && N0C->isNullValue())
return N0;
// fold (sra -1, x) -> -1
if (N0C && N0C->isAllOnesValue())
return N0;
// fold (sra x, (setge c, size(x))) -> undef
if (N1C && N1C->getZExtValue() >= OpSizeInBits)
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
if (VT.isVector())
ExtVT = EVT::getVectorVT(*DAG.getContext(),
ExtVT, VT.getVectorNumElements());
if ((!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
N0.getOperand(0), DAG.getValueType(ExtVT));
}
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRA) {
if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
DAG.getConstant(Sum, N1C->getValueType(0)));
}
}
// fold (sra (shl X, m), (sub result_size, n))
// -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
// result_size - n != m.
// If truncate is free for the target sext(shl) is likely to result in better
// code.
if (N0.getOpcode() == ISD::SHL) {
// Get the two constanst of the shifts, CN0 = m, CN = n.
const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (N01C && N1C) {
// Determine what the truncate's result bitsize and type would be.
EVT TruncVT =
EVT::getIntegerVT(*DAG.getContext(),
OpSizeInBits - N1C->getZExtValue());
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
// If the shift is not a no-op (in which case this should be just a sign
// extend already), the truncated to type is legal, sign_extend is legal
// on that type, and the truncate to that type is both legal and free,
// perform the transform.
if ((ShiftAmt > 0) &&
TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
TLI.isTruncateFree(VT, TruncVT)) {
SDValue Amt = DAG.getConstant(ShiftAmt,
getShiftAmountTy(N0.getOperand(0).getValueType()));
SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT,
N0.getOperand(0), Amt);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT,
Shift);
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N),
N->getValueType(0), Trunc);
}
}
}
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND &&
N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
SDValue N101 = N1.getOperand(0).getOperand(1);
if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
DAG.getNode(ISD::AND, SDLoc(N),
TruncVT,
DAG.getNode(ISD::TRUNCATE,
SDLoc(N),
TruncVT, N100),
DAG.getConstant(TruncC, TruncVT)));
}
}
// fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
// if c1 is equal to the number of bits the trunc removes
if (N0.getOpcode() == ISD::TRUNCATE &&
(N0.getOperand(0).getOpcode() == ISD::SRL ||
N0.getOperand(0).getOpcode() == ISD::SRA) &&
N0.getOperand(0).hasOneUse() &&
N0.getOperand(0).getOperand(1).hasOneUse() &&
N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
EVT LargeVT = N0.getOperand(0).getValueType();
ConstantSDNode *LargeShiftAmt =
cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
LargeShiftAmt->getZExtValue()) {
SDValue Amt =
DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
N0.getOperand(0).getOperand(0), Amt);
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
}
}
// Simplify, based on bits shifted out of the LHS.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// If the sign bit is known to be zero, switch this to a SRL.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
if (N1C) {
SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
if (NewSRA.getNode())
return NewSRA;
}
return SDValue();
}
SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (srl c1, c2) -> c1 >>u c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
// fold (srl 0, x) -> 0
if (N0C && N0C->isNullValue())
return N0;
// fold (srl x, c >= size(x)) -> undef
if (N1C && N1C->getZExtValue() >= OpSizeInBits)
return DAG.getUNDEF(VT);
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
// if (srl x, c) is known to be zero, return 0
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
return DAG.getConstant(0, VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRL &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
N0.getOperand(0).getOpcode() == ISD::SRL &&
isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
uint64_t c1 =
cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
EVT InnerShiftVT = N0.getOperand(0).getValueType();
EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
if (c1 + c2 >= InnerShiftSize)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT,
DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT,
N0.getOperand(0)->getOperand(0),
DAG.getConstant(c1 + c2, ShiftCountVT)));
}
}
// fold (srl (shl x, c), c) -> (and x, cst2)
if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
N0.getValueSizeInBits() <= 64) {
uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
DAG.getConstant(~0ULL >> ShAmt, VT));
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
// Shifting in all undef bits?
EVT SmallVT = N0.getOperand(0).getValueType();
if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
uint64_t ShiftAmt = N1C->getZExtValue();
SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
N0.getOperand(0),
DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
AddToWorkList(SmallShift.getNode());
APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
DAG.getConstant(Mask, VT));
}
}
// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
// bit, which is unmodified by sra.
if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
if (N0.getOpcode() == ISD::SRA)
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
}
// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
APInt KnownZero, KnownOne;
DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
// If all of the bits input the to ctlz node are known to be zero, then
// the result of the ctlz is "32" and the result of the shift is one.
APInt UnknownBits = ~KnownZero;
if (UnknownBits == 0) return DAG.getConstant(1, VT);
// Otherwise, check to see if there is exactly one bit input to the ctlz.
if ((UnknownBits & (UnknownBits - 1)) == 0) {
// Okay, we know that only that the single bit specified by UnknownBits
// could be set on input to the CTLZ node. If this bit is set, the SRL
// will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
// to an SRL/XOR pair, which is likely to simplify more.
unsigned ShAmt = UnknownBits.countTrailingZeros();
SDValue Op = N0.getOperand(0);
if (ShAmt) {
Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
AddToWorkList(Op.getNode());
}
return DAG.getNode(ISD::XOR, SDLoc(N), VT,
Op, DAG.getConstant(1, VT));
}
}
// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND &&
N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
SDValue N101 = N1.getOperand(0).getOperand(1);
if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
TruncC = TruncC.trunc(TruncVT.getSizeInBits());
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
DAG.getNode(ISD::AND, SDLoc(N),
TruncVT,
DAG.getNode(ISD::TRUNCATE,
SDLoc(N),
TruncVT, N100),
DAG.getConstant(TruncC, TruncVT)));
}
}
// fold operands of srl based on knowledge that the low bits are not
// demanded.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
if (N1C) {
SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
if (NewSRL.getNode())
return NewSRL;
}
// Attempt to convert a srl of a load into a narrower zero-extending load.
SDValue NarrowLoad = ReduceLoadWidth(N);
if (NarrowLoad.getNode())
return NarrowLoad;
// Here is a common situation. We want to optimize:
//
// %a = ...
// %b = and i32 %a, 2
// %c = srl i32 %b, 1
// brcond i32 %c ...
//
// into
//
// %a = ...
// %b = and %a, 2
// %c = setcc eq %b, 0
// brcond %c ...
//
// However when after the source operand of SRL is optimized into AND, the SRL
// itself may not be optimized further. Look for it and add the BRCOND into
// the worklist.
if (N->hasOneUse()) {
SDNode *Use = *N->use_begin();
if (Use->getOpcode() == ISD::BRCOND)
AddToWorkList(Use);
else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
// Also look pass the truncate.
Use = *Use->use_begin();
if (Use->getOpcode() == ISD::BRCOND)
AddToWorkList(Use);
}
}
return SDValue();
}
SDValue DAGCombiner::visitCTLZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (ctlz c1) -> c2
if (isa<ConstantSDNode>(N0))
return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (ctlz_zero_undef c1) -> c2
if (isa<ConstantSDNode>(N0))
return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (cttz c1) -> c2
if (isa<ConstantSDNode>(N0))
return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (cttz_zero_undef c1) -> c2
if (isa<ConstantSDNode>(N0))
return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (ctpop c1) -> c2
if (isa<ConstantSDNode>(N0))
return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
// fold (select C, X, X) -> X
if (N1 == N2)
return N1;
// fold (select true, X, Y) -> X
if (N0C && !N0C->isNullValue())
return N1;
// fold (select false, X, Y) -> Y
if (N0C && N0C->isNullValue())
return N2;
// fold (select C, 1, X) -> (or C, X)
if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select C, 0, 1) -> (xor C, 1)
if (VT.isInteger() &&
(VT0 == MVT::i1 ||
(VT0.isInteger() &&
TLI.getBooleanContents(false) ==
TargetLowering::ZeroOrOneBooleanContent)) &&
N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
SDValue XORNode;
if (VT == VT0)
return DAG.getNode(ISD::XOR, SDLoc(N), VT0,
N0, DAG.getConstant(1, VT0));
XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
N0, DAG.getConstant(1, VT0));
AddToWorkList(XORNode.getNode());
if (VT.bitsGT(VT0))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
}
// fold (select C, 0, X) -> (and (not C), X)
if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
AddToWorkList(NOTNode.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
}
// fold (select C, X, 1) -> (or (not C), X)
if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
AddToWorkList(NOTNode.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
}
// fold (select C, X, 0) -> (and C, X)
if (VT == MVT::i1 && N2C && N2C->isNullValue())
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
// fold (select X, X, Y) -> (or X, Y)
// fold (select X, 1, Y) -> (or X, Y)
if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select X, Y, X) -> (and X, Y)
// fold (select X, Y, 0) -> (and X, Y)
if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
// If we can fold this based on the true/false value, do so.
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
// fold selects based on a setcc into other things, such as min/max/abs
if (N0.getOpcode() == ISD::SETCC) {
// FIXME:
// Check against MVT::Other for SELECT_CC, which is a workaround for targets
// having to say they don't support SELECT_CC on every type the DAG knows
// about, since there is no way to mark an opcode illegal at all value types
if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
N1, N2, N0.getOperand(2));
return SimplifySelect(SDLoc(N), N0, N1, N2);
}
return SDValue();
}
static
std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT LoVT, HiVT;
llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// Split the inputs.
SDValue Lo, Hi, LL, LH, RL, RH;
llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
return std::make_pair(Lo, Hi);
}
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
SDLoc DL(N);
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
// vselect (setl[te] X, 0), -X, X ->
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
if (N0.getOpcode() == ISD::SETCC) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
bool isAbs = false;
bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
(ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
if (isAbs) {
EVT VT = LHS.getValueType();
SDValue Shift = DAG.getNode(
ISD::SRA, DL, VT, LHS,
DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
AddToWorkList(Shift.getNode());
AddToWorkList(Add.getNode());
return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
}
}
// If the VSELECT result requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
// and enables future optimizations (e.g. min/max pattern matching on X86).
if (N0.getOpcode() == ISD::SETCC) {
EVT VT = N->getValueType(0);
// Check if any splitting is required.
if (TLI.getTypeAction(*DAG.getContext(), VT) !=
TargetLowering::TypeSplitVector)
return SDValue();
SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
// Add the new VSELECT nodes to the work list in case they need to be split
// again.
AddToWorkList(Lo.getNode());
AddToWorkList(Hi.getNode());
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
}
return SDValue();
}
SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
SDValue N3 = N->getOperand(3);
SDValue N4 = N->getOperand(4);
ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
// fold select_cc lhs, rhs, x, x, cc -> x
if (N2 == N3)
return N2;
// Determine if the condition we're dealing with is constant
SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
N0, N1, CC, SDLoc(N), false);
if (SCC.getNode()) {
AddToWorkList(SCC.getNode());
if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
if (!SCCC->isNullValue())
return N2; // cond always true -> true val
else
return N3; // cond always false -> false val
}
// Fold to a simpler select_cc
if (SCC.getOpcode() == ISD::SETCC)
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
SCC.getOperand(0), SCC.getOperand(1), N2, N3,
SCC.getOperand(2));
}
// If we can fold this based on the true/false value, do so.
if (SimplifySelectOps(N, N2, N3))
return SDValue(N, 0); // Don't revisit N.
// fold select_cc into other things, such as min/max/abs
return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
}
SDValue DAGCombiner::visitSETCC(SDNode *N) {
return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
cast<CondCodeSDNode>(N->getOperand(2))->get(),
SDLoc(N));
}
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
// transformation. Returns true if extension are possible and the above
// mentioned transformation is profitable.
static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
unsigned ExtOpc,
SmallVectorImpl<SDNode *> &ExtendNodes,
const TargetLowering &TLI) {
bool HasCopyToRegUses = false;
bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
UE = N0.getNode()->use_end();
UI != UE; ++UI) {
SDNode *User = *UI;
if (User == N)
continue;
if (UI.getUse().getResNo() != N0.getResNo())
continue;
// FIXME: Only extend SETCC N, N and SETCC N, c for now.
if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
// Sign bits will be lost after a zext.
return false;
bool Add = false;
for (unsigned i = 0; i != 2; ++i) {
SDValue UseOp = User->getOperand(i);
if (UseOp == N0)
continue;
if (!isa<ConstantSDNode>(UseOp))
return false;
Add = true;
}
if (Add)
ExtendNodes.push_back(User);
continue;
}
// If truncates aren't free and there are users we can't
// extend, it isn't worthwhile.
if (!isTruncFree)
return false;
// Remember if this value is live-out.
if (User->getOpcode() == ISD::CopyToReg)
HasCopyToRegUses = true;
}
if (HasCopyToRegUses) {
bool BothLiveOut = false;
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
UI != UE; ++UI) {
SDUse &Use = UI.getUse();
if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
BothLiveOut = true;
break;
}
}
if (BothLiveOut)
// Both unextended and extended values are live out. There had better be
// a good reason for the transformation.
return ExtendNodes.size();
}
return true;
}
void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
SDValue Trunc, SDValue ExtLoad, SDLoc DL,
ISD::NodeType ExtType) {
// Extend SetCC uses if necessary.
for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
SDNode *SetCC = SetCCs[i];
SmallVector<SDValue, 4> Ops;
for (unsigned j = 0; j != 2; ++j) {
SDValue SOp = SetCC->getOperand(j);
if (SOp == Trunc)
Ops.push_back(ExtLoad);
else
Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
}
Ops.push_back(SetCC->getOperand(2));
CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
&Ops[0], Ops.size()));
}
}
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (sext c1) -> c1
if (isa<ConstantSDNode>(N0))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0);
// fold (sext (sext x)) -> (sext x)
// fold (sext (aext x)) -> (sext x)
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
N0.getOperand(0));
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
if (NarrowLoad.getNode()) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorkList(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// See if the value being truncated is already sign extended. If so, just
// eliminate the trunc/sext pair.
SDValue Op = N0.getOperand(0);
unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
unsigned DestBits = VT.getScalarType().getSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
if (OpBits == DestBits) {
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
// bits, it is already ready.
if (NumSignBits > DestBits-MidBits)
return Op;
} else if (OpBits < DestBits) {
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
// bits, just sext from i32.
if (NumSignBits > OpBits-MidBits)
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
} else {
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
// bits, just truncate to i32.
if (NumSignBits > OpBits-MidBits)
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
}
// fold (sext (truncate x)) -> (sextinreg x).
if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
N0.getValueType())) {
if (OpBits < DestBits)
Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
else if (OpBits > DestBits)
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
DAG.getValueType(N0.getValueType()));
}
}
// fold (sext (load x)) -> (sext (truncate (sextload x)))
// None of the supported targets knows how to perform load and sign extend
// on vectors in one instruction. We only perform this transformation on
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (sext (sextload x)) -> (sext (truncate (sextload x)))
// fold (sext ( extload x)) -> (sext (truncate (sextload x)))
if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), MemVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad),
ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (sext (and/or/xor (load x), cst)) ->
// (and/or/xor (sextload x), (sext cst))
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
SetCCs, TLI);
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(), LN0->getBasePtr(),
LN0->getMemoryVT(),
LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
ExtLoad, DAG.getConstant(Mask, VT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
CombineTo(N, And);
CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
if (N0.getOpcode() == ISD::SETCC) {
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations &&
TLI.getBooleanContents(true) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
EVT N0VT = N0.getOperand(0).getValueType();
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
// if this is the case.
EVT SVT = getSetCCResultType(N0VT);
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
// for that matter). Check to see that they are the same size. If so,
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == SVT.getSizeInBits())
return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend
EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
if (SVT == MatchingVectorType) {
SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
}
}
// sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
unsigned ElementWidth = VT.getScalarType().getSizeInBits();
SDValue NegOne =
DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
SDValue SCC =
SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
NegOne, DAG.getConstant(0, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
if (!VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) {
return DAG.getSelect(SDLoc(N), VT,
DAG.getSetCC(SDLoc(N),
getSetCCResultType(VT),
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
NegOne, DAG.getConstant(0, VT));
}
}
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
return SDValue();
}
// isTruncateOf - If N is a truncate of some other value, return true, record
// the value being truncated in Op and which of Op's bits are zero in KnownZero.
// This function computes KnownZero to avoid a duplicated call to
// ComputeMaskedBits in the caller.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
APInt &KnownZero) {
APInt KnownOne;
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
return true;
}
if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
return false;
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
assert(Op0.getValueType() == Op1.getValueType());
ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
if (COp0 && COp0->isNullValue())
Op = Op1;
else if (COp1 && COp1->isNullValue())
Op = Op0;
else
return false;
DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
return false;
return true;
}
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (zext c1) -> c1
if (isa<ConstantSDNode>(N0))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
// fold (zext (zext x)) -> (zext x)
// fold (zext (aext x)) -> (zext x)
if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
N0.getOperand(0));
// fold (zext (truncate x)) -> (zext x) or
// (zext (truncate x)) -> (truncate x)
// This is valid when the truncated bits of x are already zero.
// FIXME: We should extend this to work for vectors too.
SDValue Op;
APInt KnownZero;
if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
APInt TruncatedBits =
(Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
APInt(Op.getValueSizeInBits(), 0) :
APInt::getBitsSet(Op.getValueSizeInBits(),
N0.getValueSizeInBits(),
std::min(Op.getValueSizeInBits(),
VT.getSizeInBits()));
if (TruncatedBits == (KnownZero & TruncatedBits)) {
if (VT.bitsGT(Op.getValueType()))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
if (VT.bitsLT(Op.getValueType()))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
return Op;
}
}
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
if (NarrowLoad.getNode()) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorkList(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
(!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
if (NarrowLoad.getNode()) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorkList(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
AddToWorkList(Op.getNode());
} else if (Op.getValueType().bitsGT(VT)) {
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
AddToWorkList(Op.getNode());
}
return DAG.getZeroExtendInReg(Op, SDLoc(N),
N0.getValueType().getScalarType());
}
// Fold (zext (and (trunc x), cst)) -> (and x, cst),
// if either of the casts is not free.
if (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
N0.getValueType()) ||
!TLI.isZExtFree(N0.getValueType(), VT))) {
SDValue X = N0.getOperand(0).getOperand(0);
if (X.getValueType().bitsLT(VT)) {
X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
} else if (X.getValueType().bitsGT(VT)) {
X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, SDLoc(N), VT,
X, DAG.getConstant(Mask, VT));
}
// fold (zext (load x)) -> (zext (truncate (zextload x)))
// None of the supported targets knows how to perform load and vector_zext
// on vectors in one instruction. We only perform this transformation on
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
ISD::ZERO_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
if (LN0->getExtensionType() != ISD::SEXTLOAD) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
SetCCs, TLI);
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(), LN0->getBasePtr(),
LN0->getMemoryVT(),
LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
ExtLoad, DAG.getConstant(Mask, VT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
CombineTo(N, And);
CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
ISD::ZERO_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
// fold (zext (zextload x)) -> (zext (truncate (zextload x)))
// fold (zext ( extload x)) -> (zext (truncate (zextload x)))
if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), MemVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
ExtLoad),
ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
if (N0.getOpcode() == ISD::SETCC) {
if (!LegalOperations && VT.isVector()) {
// zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
// Only do this before legalize for now.
EVT N0VT = N0.getOperand(0).getValueType();
EVT EltVT = VT.getVectorElementType();
SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
DAG.getConstant(1, EltVT));
if (VT.getSizeInBits() == N0VT.getSizeInBits())
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
// for that matter). Check to see that they are the same size. If so,
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
&OneOps[0], OneOps.size()));
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend
EVT MatchingElementType =
EVT::getIntegerVT(*DAG.getContext(),
N0VT.getScalarType().getSizeInBits());
EVT MatchingVectorType =
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
SDValue VsetCC =
DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
&OneOps[0], OneOps.size()));
}
// zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
SDValue SCC =
SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
DAG.getConstant(1, VT), DAG.getConstant(0, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
}
// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
N0.hasOneUse()) {
SDValue ShAmt = N0.getOperand(1);
unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
if (N0.getOpcode() == ISD::SHL) {
SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
InnerZExt.getOperand(0).getValueType().getSizeInBits();
if (ShAmtVal > KnownZeroBits)
return SDValue();
}
SDLoc DL(N);
// Ensure that the shift amount is wide enough for the shifted value.
if (VT.getSizeInBits() >= 256)
ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
return DAG.getNode(N0.getOpcode(), DL, VT,
DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
ShAmt);
}
return SDValue();
}
SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (aext c1) -> c1
if (isa<ConstantSDNode>(N0))
return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0);
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
if (N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND)
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
// fold (aext (truncate (load x))) -> (aext (smaller load x))
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
if (NarrowLoad.getNode()) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorkList(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (aext (truncate x))
if (N0.getOpcode() == ISD::TRUNCATE) {
SDValue TruncOp = N0.getOperand(0);
if (TruncOp.getValueType() == VT)
return TruncOp; // x iff x size == zext size.
if (TruncOp.getValueType().bitsGT(VT))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
}
// Fold (aext (and (trunc x), cst)) -> (and x, cst)
// if the trunc is not free.
if (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
N0.getValueType())) {
SDValue X = N0.getOperand(0).getOperand(0);
if (X.getValueType().bitsLT(VT)) {
X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
} else if (X.getValueType().bitsGT(VT)) {
X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, SDLoc(N), VT,
X, DAG.getConstant(Mask, VT));
}
// fold (aext (load x)) -> (aext (truncate (extload x)))
// None of the supported targets knows how to perform load and any_ext
// on vectors in one instruction. We only perform this transformation on
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
ISD::ANY_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (aext (zextload x)) -> (aext (truncate (zextload x)))
// fold (aext (sextload x)) -> (aext (truncate (sextload x)))
// fold (aext ( extload x)) -> (aext (truncate (extload x)))
if (N0.getOpcode() == ISD::LOAD &&
!ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N),
VT, LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad),
ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
if (N0.getOpcode() == ISD::SETCC) {
// aext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations) {
EVT N0VT = N0.getOperand(0).getValueType();
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
// for that matter). Check to see that they are the same size. If so,
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N0VT.getSizeInBits())
return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend
else {
EVT MatchingElementType =
EVT::getIntegerVT(*DAG.getContext(),
N0VT.getScalarType().getSizeInBits());
EVT MatchingVectorType =
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
SDValue VsetCC =
DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
}
}
// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
SDValue SCC =
SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
DAG.getConstant(1, VT), DAG.getConstant(0, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode())
return SCC;
}
return SDValue();
}
/// GetDemandedBits - See if the specified operand can be simplified with the
/// knowledge that only the bits specified by Mask are used. If so, return the
/// simpler operand, otherwise return a null SDValue.
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
switch (V.getOpcode()) {
default: break;
case ISD::Constant: {
const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
assert(CV != 0 && "Const value should be ConstSDNode.");
const APInt &CVal = CV->getAPIntValue();
APInt NewVal = CVal & Mask;
if (NewVal != CVal)
return DAG.getConstant(NewVal, V.getValueType());
break;
}
case ISD::OR:
case ISD::XOR:
// If the LHS or RHS don't contribute bits to the or, drop them.
if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
return V.getOperand(1);
if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
return V.getOperand(0);
break;
case ISD::SRL:
// Only look at single-use SRLs.
if (!V.getNode()->hasOneUse())
break;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
// See if we can recursively simplify the LHS.
unsigned Amt = RHSC->getZExtValue();
// Watch out for shift count overflow though.
if (Amt >= Mask.getBitWidth()) break;
APInt NewMask = Mask << Amt;
SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
if (SimplifyLHS.getNode())
return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
SimplifyLHS, V.getOperand(1));
}
}
return SDValue();
}
/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
/// bits and then truncated to a narrower type and where N is a multiple
/// of number of bits of the narrower type, transform it to a narrower load
/// from address + N / num of bits of new type. If the result is to be
/// extended, also fold the extension to form a extending load.
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT ExtVT = VT;
// This transformation isn't valid for vector loads.
if (VT.isVector())
return SDValue();
// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
// extended to VT.
if (Opc == ISD::SIGN_EXTEND_INREG) {
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
} else if (Opc == ISD::SRL) {
// Another special-case: SRL is basically zero-extending a narrower value.
ExtType = ISD::ZEXTLOAD;
N0 = SDValue(N, 0);
ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!N01) return SDValue();
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
VT.getSizeInBits() - N01->getZExtValue());
}
if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
return SDValue();
unsigned EVTBits = ExtVT.getSizeInBits();
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
if (!ExtVT.isRound())
return SDValue();
unsigned ShAmt = 0;
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
ShAmt = N01->getZExtValue();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
N0 = N0.getOperand(0);
// Is the load width a multiple of size of VT?
if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
return SDValue();
}
// At this point, we must have a load or else we can't do the transform.
if (!isa<LoadSDNode>(N0)) return SDValue();
// Because a SRL must be assumed to *need* to zero-extend the high bits
// (as opposed to anyext the high bits), we can't combine the zextload
// lowering of SRL and an sextload.
if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
return SDValue();
// If the shift amount is larger than the input type then we're not
// accessing any of the loaded bytes. If the load was a zextload/extload
// then the result of the shift+trunc is zero/undef (handled elsewhere).
if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
return SDValue();
}
}
// If the load is shifted left (and the result isn't shifted back right),
// we can fold the truncate through the shift.
unsigned ShLeftAmt = 0;
if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
ShLeftAmt = N01->getZExtValue();
N0 = N0.getOperand(0);
}
}
// If we haven't found a load, we can't narrow it. Don't transform one with
// multiple uses, this would require adding a new load.
if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
return SDValue();
// Don't change the width of a volatile load.
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
if (LN0->isVolatile())
return SDValue();
// Verify that we are actually reducing a load width here.
if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
return SDValue();
// For the transform to be legal, the load must produce only two values
// (the value loaded and the chain). Don't transform a pre-increment
// load, for example, which produces an extra value. Otherwise the
// transformation is not equivalent, and the downstream logic to replace
// uses gets things wrong.
if (LN0->getNumValues() > 2)
return SDValue();
// If the load that we're shrinking is an extload and we're not just
// discarding the extension we can't simply shrink the load. Bail.
// TODO: It would be possible to merge the extensions in some cases.
if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
return SDValue();
EVT PtrType = N0.getOperand(1).getValueType();
if (PtrType == MVT::Untyped || PtrType.isExtended())
// It's not possible to generate a constant of extended or untyped type.
return SDValue();
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (TLI.isBigEndian()) {
unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
}
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
PtrType, LN0->getBasePtr(),
DAG.getConstant(PtrOff, PtrType));
AddToWorkList(NewPtr.getNode());
SDValue Load;
if (ExtType == ISD::NON_EXTLOAD)
Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->isInvariant(), NewAlign, LN0->getTBAAInfo());
else
Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
NewAlign, LN0->getTBAAInfo());
// Replace the old load's chain with the new load's chain.
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
// Shift the result left, if we've swallowed a left shift.
SDValue Result = Load;
if (ShLeftAmt != 0) {
EVT ShImmTy = getShiftAmountTy(Result.getValueType());
if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
ShImmTy = VT;
// If the shift amount is as large as the result size (but, presumably,
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
// of that operation is undefined.
if (ShLeftAmt >= VT.getSizeInBits())
Result = DAG.getConstant(0, VT);
else
Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT,
Result, DAG.getConstant(ShLeftAmt, ShImmTy));
}
// Return the new loaded value.
return Result;
}
SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT EVT = cast<VTSDNode>(N1)->getVT();
unsigned VTBits = VT.getScalarType().getSizeInBits();
unsigned EVTBits = EVT.getScalarType().getSizeInBits();
// fold (sext_in_reg c1) -> c1
if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
N0.getOperand(0), N1);
// fold (sext_in_reg (sext x)) -> (sext x)
// fold (sext_in_reg (aext x)) -> (sext x)
// if x is small enough.
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
// fold operands of sext_in_reg based on knowledge that the top bits are not
// demanded.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (sext_in_reg (load x)) -> (smaller sextload x)
// fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
SDValue NarrowLoad = ReduceLoadWidth(N);
if (NarrowLoad.getNode())
return NarrowLoad;
// fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
// fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
if (N0.getOpcode() == ISD::SRL) {
if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
// We can turn this into an SRA iff the input to the SRL is already sign
// extended enough.
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
return DAG.getNode(ISD::SRA, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1));
}
}
// fold (sext_inreg (extload x)) -> (sextload x)
if (ISD::isEXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), EVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
AddToWorkList(ExtLoad.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse() &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), EVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
N0.getOperand(1), false);
if (BSwap.getNode() != 0)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
BSwap, N1);
}
return SDValue();
}
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
bool isLE = TLI.isLittleEndian();
// noop truncate
if (N0.getValueType() == N->getValueType(0))
return N0;
// fold (truncate c1) -> c1
if (isa<ConstantSDNode>(N0))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
if (N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND) {
if (N0.getOperand(0).getValueType().bitsLT(VT))
// if the source is smaller than the dest, we still need an extend
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
N0.getOperand(0));
if (N0.getOperand(0).getValueType().bitsGT(VT))
// if the source is larger than the dest, than we just need the truncate
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
// if the source and dest are the same type, we can drop both the extend
// and the truncate.
return N0.getOperand(0);
}
// Fold extract-and-trunc into a narrow extract. For example:
// i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
// i32 y = TRUNCATE(i64 x)
// -- becomes --
// v16i8 b = BITCAST (v2i64 val)
// i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
//
// Note: We only run this optimization after type legalization (which often
// creates this pattern) and before operation legalization after which
// we need to be more careful about the vector instructions that we generate.
if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
LegalTypes && !LegalOperations && N0->hasOneUse()) {
EVT VecTy = N0.getOperand(0).getValueType();
EVT ExTy = N0.getValueType();
EVT TrTy = N->getValueType(0);
unsigned NumElem = VecTy.getVectorNumElements();
unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
SDValue EltNo = N0->getOperand(1);
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
EVT IndexTy = TLI.getVectorIdxTy();
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
NVT, N0.getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
SDLoc(N), TrTy, V,
DAG.getConstant(Index, IndexTy));
}
}
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
// (2xi32 (buildvector x, y)).
if (Level == AfterLegalizeVectorOps && VT.isVector() &&
N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
N0.getOperand(0).hasOneUse()) {
SDValue BuildVect = N0.getOperand(0);
EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
EVT TruncVecEltTy = VT.getVectorElementType();
// Check that the element types match.
if (BuildVectEltTy == TruncVecEltTy) {
// Now we only need to compute the offset of the truncated elements.
unsigned BuildVecNumElts = BuildVect.getNumOperands();
unsigned TruncVecNumElts = VT.getVectorNumElements();
unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
"Invalid number of elements");
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
Opnds.push_back(BuildVect.getOperand(i));
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
Opnds.size());
}
}
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y
// Currently we only perform this optimization on scalars because vectors
// may have different active low bits.
if (!VT.isVector()) {
SDValue Shorter =
GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
VT.getSizeInBits()));
if (Shorter.getNode())
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
}
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
SDValue Reduced = ReduceLoadWidth(N);
if (Reduced.getNode())
return Reduced;
}
// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
// where ... are all 'undef'.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
SmallVector<EVT, 8> VTs;
SDValue V;
unsigned Idx = 0;
unsigned NumDefs = 0;
for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
SDValue X = N0.getOperand(i);
if (X.getOpcode() != ISD::UNDEF) {
V = X;
Idx = i;
NumDefs++;
}
// Stop if more than one members are non-undef.
if (NumDefs > 1)
break;
VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
VT.getVectorElementType(),
X.getValueType().getVectorNumElements()));
}
if (NumDefs == 0)
return DAG.getUNDEF(VT);
if (NumDefs == 1) {
assert(V.getNode() && "The single defined operand is empty!");
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
if (i != Idx) {
Opnds.push_back(DAG.getUNDEF(VTs[i]));
continue;
}
SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
AddToWorkList(NV.getNode());
Opnds.push_back(NV);
}
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
&Opnds[0], Opnds.size());
}
}
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
}
static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
SDValue Elt = N->getOperand(i);
if (Elt.getOpcode() != ISD::MERGE_VALUES)
return Elt.getNode();
return Elt.getOperand(Elt.getResNo()).getNode();
}
/// CombineConsecutiveLoads - build_pair (load, load) -> load
/// if load locations are consecutive.
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
LD1->getPointerInfo().getAddrSpace() !=
LD2->getPointerInfo().getAddrSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
if (ISD::isNON_EXTLoad(LD2) &&
LD2->hasOneUse() &&
// If both are volatile this would reduce the number of volatile loads.
// If one is volatile it might be ok, but play conservative and bail out.
!LD1->isVolatile() &&
!LD2->isVolatile() &&
DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
unsigned Align = LD1->getAlignment();
unsigned NewAlign = TLI.getDataLayout()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign <= Align &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, SDLoc(N), LD1->getChain(),
LD1->getBasePtr(), LD1->getPointerInfo(),
false, false, false, Align);
}
return SDValue();
}
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
// Only do this before legalize, since afterward the target may be depending
// on the bitconvert.
// First check to see if this is all constant.
if (!LegalTypes &&
N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
VT.isVector()) {
bool isSimple = true;
for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
N0.getOperand(i).getOpcode() != ISD::Constant &&
N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
isSimple = false;
break;
}
EVT DestEltVT = N->getValueType(0).getVectorElementType();
assert(!DestEltVT.isVector() &&
"Element type of vector ValueType must not be vector!");
if (isSimple)
return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
}
// If the input is a constant, let getNode fold it.
if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
if (Res.getNode() != N) {
if (!LegalOperations ||
TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
return Res;
// Folding it resulted in an illegal node, and it's too late to
// do that. Clean up the old node and forego the transformation.
// Ideally this won't happen very often, because instcombine
// and the earlier dagcombine runs (where illegal nodes are
// permitted) should have folded most of them already.
DAG.DeleteNode(Res.getNode());
}
}
// (conv (conv x, t1), t2) -> (conv x, t2)
if (N0.getOpcode() == ISD::BITCAST)
return DAG.getNode(ISD::BITCAST, SDLoc(N), VT,
N0.getOperand(0));
// fold (conv (load x)) -> (load (conv*)x)
// If the resultant load doesn't need a higher alignment than the original!
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile() &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
unsigned Align = TLI.getDataLayout()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
unsigned OrigAlign = LN0->getAlignment();
if (Align <= OrigAlign) {
SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->isInvariant(), OrigAlign,
LN0->getTBAAInfo());
AddToWorkList(N);
CombineTo(N0.getNode(),
DAG.getNode(ISD::BITCAST, SDLoc(N0),
N0.getValueType(), Load),
Load.getValue(1));
return Load;
}
}
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
// This often reduces constant pool loads.
if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
N0.getNode()->hasOneUse() && VT.isInteger() &&
!VT.isVector() && !N0.getValueType().isVector()) {
SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
N0.getOperand(0));
AddToWorkList(NewConv.getNode());
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, SDLoc(N), VT,
NewConv, DAG.getConstant(SignBit, VT));
assert(N0.getOpcode() == ISD::FABS);
return DAG.getNode(ISD::AND, SDLoc(N), VT,
NewConv, DAG.getConstant(~SignBit, VT));
}
// fold (bitconvert (fcopysign cst, x)) ->
// (or (and (bitconvert x), sign), (and cst, (not sign)))
// Note that we don't handle (copysign x, cst) because this can always be
// folded to an fneg or fabs.
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
VT.isInteger() && !VT.isVector()) {
unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
IntXVT, N0.getOperand(1));
AddToWorkList(X.getNode());
// If X has a different width than the result/lhs, sext it or truncate it.
unsigned VTWidth = VT.getSizeInBits();
if (OrigXWidth < VTWidth) {
X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
AddToWorkList(X.getNode());
} else if (OrigXWidth > VTWidth) {
// To get the sign bit in the right place, we have to shift it right
// before truncating.
X = DAG.getNode(ISD::SRL, SDLoc(X),
X.getValueType(), X,
DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
AddToWorkList(X.getNode());
X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
AddToWorkList(X.getNode());
}
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, VT));
AddToWorkList(X.getNode());
SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
Cst, DAG.getConstant(~SignBit, VT));
AddToWorkList(Cst.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
}
}
// bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
if (N0.getOpcode() == ISD::BUILD_PAIR) {
SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
if (CombineLD.getNode())
return CombineLD;
}
return SDValue();
}
SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
EVT VT = N->getValueType(0);
return CombineConsecutiveLoads(N, VT);
}
/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
/// destination element value type.
SDValue DAGCombiner::
ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
// If this is already the right type, we're done.
if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
unsigned SrcBitSize = SrcEltVT.getSizeInBits();
unsigned DstBitSize = DstEltVT.getSizeInBits();
// If this is a conversion of N elements of one type to N elements of another
// type, convert each element. This handles FP<->INT cases.
if (SrcBitSize == DstBitSize) {
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
BV->getValueType(0).getVectorNumElements());
// Due to the FP element handling below calling this routine recursively,
// we can end up with a scalar-to-vector node here.
if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
DAG.getNode(ISD::BITCAST, SDLoc(BV),
DstEltVT, BV->getOperand(0)));
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
SDValue Op = BV->getOperand(i);
// If the vector element type is not legal, the BUILD_VECTOR operands
// are promoted and implicitly truncated. Make that explicit here.
if (Op.getValueType() != SrcEltVT)
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
DstEltVT, Op));
AddToWorkList(Ops.back().getNode());
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
&Ops[0], Ops.size());
}
// Otherwise, we're growing or shrinking the elements. To avoid having to
// handle annoying details of growing/shrinking FP values, we convert them to
// int first.
if (SrcEltVT.isFloatingPoint()) {
// Convert the input float vector to a int vector where the elements are the
// same sizes.
assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
SrcEltVT = IntVT;
}
// Now we know the input is an integer vector. If the output is a FP type,
// convert to integer first, then to FP of the right size.
if (DstEltVT.isFloatingPoint()) {
assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
// Next, convert to FP elements of the same size.
return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
// Okay, we know the src/dst types are both integers of differing types.
// Handling growing first.
assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
if (SrcBitSize < DstBitSize) {
unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e;
i += NumInputsPerOutput) {
bool isLE = TLI.isLittleEndian();
APInt NewBits = APInt(DstBitSize, 0);
bool EltIsUndef = true;
for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
// Shift the previously computed bits over.
NewBits <<= SrcBitSize;
SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
if (Op.getOpcode() == ISD::UNDEF) continue;
EltIsUndef = false;
NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
zextOrTrunc(SrcBitSize).zext(DstBitSize);
}
if (EltIsUndef)
Ops.push_back(DAG.getUNDEF(DstEltVT));
else
Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
}
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
&Ops[0], Ops.size());
}
// Finally, this must be the case where we are shrinking elements: each input
// turns into multiple outputs.
bool isS2V = ISD::isScalarToVector(BV);
unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
NumOutputsPerInput*BV->getNumOperands());
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
for (unsigned j = 0; j != NumOutputsPerInput; ++j)
Ops.push_back(DAG.getUNDEF(DstEltVT));
continue;
}
APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
getAPIntValue().zextOrTrunc(SrcBitSize);
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
APInt ThisVal = OpVal.trunc(DstBitSize);
Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
// Simply turn this into a SCALAR_TO_VECTOR of the new type.
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
Ops[0]);
OpVal = OpVal.lshr(DstBitSize);
}
// For big endian targets, swap the order of the pieces of each element.
if (TLI.isBigEndian())
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
&Ops[0], Ops.size());
}
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
// fold (fadd A, 0) -> A
if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
N1CFP->getValueAPF().isZero())
return N0;
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations));
// If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
DAG.getNode(ISD::FADD, SDLoc(N), VT,
N0.getOperand(1), N1));
// No FP constant should be created after legalization as Instruction
// Selection pass has hard time in dealing with FP constant.
//
// We don't need test this condition for transformation like following, as
// the DAG being transformed implies it is legal to take FP constant as
// operand.
//
// (fadd (fmul c, x), x) -> (fmul c+1, x)
//
bool AllowNewFpConst = (Level < AfterLegalizeDAG);
// If allow, fold (fadd (fneg x), x) -> 0.0
if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, VT);
// If allow, fold (fadd x, (fneg x)) -> 0.0
if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
return DAG.getConstantFP(0.0, VT);
// In unsafe math mode, we can fold chains of FADD's of the same value
// into multiplications. This transform is not safe in general because
// we are reducing the number of rounding steps.
if (DAG.getTarget().Options.UnsafeFPMath &&
TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
!N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
// (fadd (fmul c, x), x) -> (fmul x, c+1)
if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
SDValue(CFP00, 0),
DAG.getConstantFP(1.0, VT));
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N1, NewCFP);
}
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
SDValue(CFP01, 0),
DAG.getConstantFP(1.0, VT));
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N1, NewCFP);
}
// (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2)
if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(1) == N1.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
SDValue(CFP00, 0),
DAG.getConstantFP(2.0, VT));
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N0.getOperand(1), NewCFP);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
SDValue(CFP01, 0),
DAG.getConstantFP(2.0, VT));
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N0.getOperand(0), NewCFP);
}
}
if (N1.getOpcode() == ISD::FMUL) {
ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
// (fadd x, (fmul c, x)) -> (fmul x, c+1)
if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
SDValue(CFP10, 0),
DAG.getConstantFP(1.0, VT));
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N0, NewCFP);
}
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
SDValue(CFP11, 0),
DAG.getConstantFP(1.0, VT));
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N0, NewCFP);
}
// (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2)
if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(1) == N0.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
SDValue(CFP10, 0),
DAG.getConstantFP(2.0, VT));
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N1.getOperand(1), NewCFP);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
SDValue(CFP11, 0),
DAG.getConstantFP(2.0, VT));
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N1.getOperand(0), NewCFP);
}
}
if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1))
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N1, DAG.getConstantFP(3.0, VT));
}
if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0)
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N0, DAG.getConstantFP(3.0, VT));
}
// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
if (AllowNewFpConst &&
N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0))
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N0.getOperand(0),
DAG.getConstantFP(4.0, VT));
}
// FADD -> FMA combines:
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1), N1);
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N1.getOperand(0), N1.getOperand(1), N0);
}
return SDValue();
}
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc dl(N);
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
// fold (fsub A, 0) -> A
if (DAG.getTarget().Options.UnsafeFPMath &&
N1CFP && N1CFP->getValueAPF().isZero())
return N0;
// fold (fsub 0, B) -> -B
if (DAG.getTarget().Options.UnsafeFPMath &&
N0CFP && N0CFP->getValueAPF().isZero()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, dl, VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return DAG.getNode(ISD::FADD, dl, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// If 'unsafe math' is enabled, fold
// (fsub x, x) -> 0.0 &
// (fsub x, (fadd x, y)) -> (fneg y) &
// (fsub x, (fadd y, x)) -> (fneg y)
if (DAG.getTarget().Options.UnsafeFPMath) {
if (N0 == N1)
return DAG.getConstantFP(0.0f, VT);
if (N1.getOpcode() == ISD::FADD) {
SDValue N10 = N1->getOperand(0);
SDValue N11 = N1->getOperand(1);
if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
&DAG.getTarget().Options))
return GetNegatedExpression(N11, DAG, LegalOperations);
if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
&DAG.getTarget().Options))
return GetNegatedExpression(N10, DAG, LegalOperations);
}
}
// FSUB -> FMA combines:
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
return DAG.getNode(ISD::FMA, dl, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, dl, VT, N1));
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
return DAG.getNode(ISD::FMA, dl, VT,
DAG.getNode(ISD::FNEG, dl, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG &&
N0.getOperand(0).getOpcode() == ISD::FMUL &&
N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(ISD::FMA, dl, VT,
DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
DAG.getNode(ISD::FNEG, dl, VT, N1));
}
}
return SDValue();
}
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
// fold (fmul A, 0) -> 0
if (DAG.getTarget().Options.UnsafeFPMath &&
N1CFP && N1CFP->getValueAPF().isZero())
return N1;
// fold (fmul A, 0) -> 0, vector edition.
if (DAG.getTarget().Options.UnsafeFPMath &&
ISD::isBuildVectorAllZeros(N1.getNode()))
return N1;
// fold (fmul A, 1.0) -> A
if (N1CFP && N1CFP->isExactlyValue(1.0))
return N0;
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
&DAG.getTarget().Options)) {
if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
&DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
GetNegatedExpression(N0, DAG, LegalOperations),
GetNegatedExpression(N1, DAG, LegalOperations));
}
}
// If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
if (DAG.getTarget().Options.UnsafeFPMath &&
N1CFP && N0.getOpcode() == ISD::FMUL &&
N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N0.getOperand(1), N1));
return SDValue();
}
SDValue DAGCombiner::visitFMA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc dl(N);
if (DAG.getTarget().Options.UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;
if (N1CFP && N1CFP->isZero())
return N2;
}
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
N2.getOpcode() == ISD::FMUL &&
N0 == N2.getOperand(0) &&
N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
}
// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
if (DAG.getTarget().Options.UnsafeFPMath &&
N0.getOpcode() == ISD::FMUL && N1CFP &&
N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
return DAG.getNode(ISD::FMA, dl, VT,
N0.getOperand(0),
DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
N2);
}
// (fma x, 1, y) -> (fadd x, y)
// (fma x, -1, y) -> (fadd (fneg x), y)
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
AddToWorkList(RHSNeg.getNode());
return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
}
}
// (fma x, c, x) -> (fmul x, (c+1))
if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
N1, DAG.getConstantFP(1.0, VT)));
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
N1, DAG.getConstantFP(-1.0, VT)));
return SDValue();
}
SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
// Compute the reciprocal 1.0 / c2.
APFloat N1APF = N1CFP->getValueAPF();
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
// Only do the transform if the reciprocal is a legal fp immediate that
// isn't too nasty (eg NaN, denormal, ...).
if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
(!LegalOperations ||
// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
// backend)... we should handle this gracefully after Legalize.
// TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT)))
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
DAG.getConstantFP(Recip, VT));
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
&DAG.getTarget().Options)) {
if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
&DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
GetNegatedExpression(N0, DAG, LegalOperations),
GetNegatedExpression(N1, DAG, LegalOperations));
}
}
return SDValue();
}
SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
return SDValue();
}
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
if (N0CFP && N1CFP) // Constant fold
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
if (N1CFP) {
const APFloat& V = N1CFP->getValueAPF();
// copysign(x, c1) -> fabs(x) iff ispos(c1)
// copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
if (!V.isNegative()) {
if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
} else {
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
}
}
// copysign(fabs(x), y) -> copysign(x, y)
// copysign(fneg(x), y) -> copysign(x, y)
// copysign(copysign(x,z), y) -> copysign(x, y)
if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
N0.getOperand(0), N1);
// copysign(x, abs(y)) -> abs(x)
if (N1.getOpcode() == ISD::FABS)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
// copysign(x, copysign(y,z)) -> copysign(x, z)
if (N1.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
N0, N1.getOperand(1));
// copysign(x, fp_extend(y)) -> copysign(x, y)
// copysign(x, fp_round(y)) -> copysign(x, y)
if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
N0, N1.getOperand(0));
return SDValue();
}
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
// If the input is a legal type, and SINT_TO_FP is not legal on this target,
// but UINT_TO_FP is legal on this target, try to convert.
if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
// If the sign bit is known to be zero, we can change this to UINT_TO_FP.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
}
// The next optimizations are desireable only if SELECT_CC can be lowered.
// Check against MVT::Other for SELECT_CC, which is a workaround for targets
// having to say they don't support SELECT_CC on every type the DAG knows
// about, since there is no way to mark an opcode illegal at all value types
// (See also visitSELECT)
if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
// fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
!VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
N0.getOperand(2) };
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
}
// fold (sint_to_fp (zext (setcc x, y, cc))) ->
// (select_cc x, y, 1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::ZERO_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
SDValue Ops[] =
{ N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
N0.getOperand(0).getOperand(2) };
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
}
}
return SDValue();
}
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
// If the input is a legal type, and UINT_TO_FP is not legal on this target,
// but SINT_TO_FP is legal on this target, try to convert.
if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
// If the sign bit is known to be zero, we can change this to SINT_TO_FP.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
}
// The next optimizations are desireable only if SELECT_CC can be lowered.
// Check against MVT::Other for SELECT_CC, which is a workaround for targets
// having to say they don't support SELECT_CC on every type the DAG knows
// about, since there is no way to mark an opcode illegal at all value types
// (See also visitSELECT)
if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
// fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
N0.getOperand(2) };
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
}
}
return SDValue();
}
SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fp_to_sint c1fp) -> c1
if (N0CFP)
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fp_to_uint c1fp) -> c1
if (N0CFP)
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fp_round c1fp) -> c1fp
if (N0CFP)
return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
// fold (fp_round (fp_extend x)) -> x
if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
return N0.getOperand(0);
// fold (fp_round (fp_round x)) -> (fp_round x)
if (N0.getOpcode() == ISD::FP_ROUND) {
// This is a value preserving truncation if both round's are.
bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
N0.getNode()->getConstantOperandVal(1) == 1;
return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
DAG.getIntPtrConstant(IsTrunc));
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
N0.getOperand(0), N1);
AddToWorkList(Tmp.getNode());
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
Tmp, N0.getOperand(1));
}
return SDValue();
}
SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
// fold (fp_round_inreg c1fp) -> c1fp
if (N0CFP && isTypeLegal(EVT)) {
SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round);
}
return SDValue();
}
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() &&
N->use_begin()->getOpcode() == ISD::FP_ROUND)
return SDValue();
// fold (fp_extend c1fp) -> c1fp
if (N0CFP)
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
// value of X.
if (N0.getOpcode() == ISD::FP_ROUND
&& N0.getNode()->getConstantOperandVal(1) == 1) {
SDValue In = N0.getOperand(0);
if (In.getValueType() == VT) return In;
if (VT.bitsLT(In.getValueType()))
return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
In, N0.getOperand(1));
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
}
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
return SDValue();
}
SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVUnaryOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options))
return GetNegatedExpression(N0, DAG, LegalOperations);
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
// constant pool values.
if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
!VT.isVector() &&
N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger()) {
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
return DAG.getNode(ISD::BITCAST, SDLoc(N),
VT, Int);
}
}
// (fneg (fmul c, x)) -> (fmul -c, x)
if (N0.getOpcode() == ISD::FMUL) {
ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
if (CFP1)
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N0.getOperand(0),
DAG.getNode(ISD::FNEG, SDLoc(N), VT,
N0.getOperand(1)));
}
return SDValue();
}
SDValue DAGCombiner::visitFCEIL(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fceil c1) -> fceil(c1)
if (N0CFP)
return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (ftrunc c1) -> ftrunc(c1)
if (N0CFP)
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (ffloor c1) -> ffloor(c1)
if (N0CFP)
return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVUnaryOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (fabs c1) -> fabs(c1)
if (N0CFP)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
// fold (fabs (fabs x)) -> (fabs x)
if (N0.getOpcode() == ISD::FABS)
return N->getOperand(0);
// fold (fabs (fneg x)) -> (fabs x)
// fold (fabs (fcopysign x, y)) -> (fabs x)
if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
// constant pool values.
if (!TLI.isFAbsFree(VT) &&
N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger() &&
!N0.getOperand(0).getValueType().isVector()) {
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
return DAG.getNode(ISD::BITCAST, SDLoc(N),
N->getValueType(0), Int);
}
}
return SDValue();
}
SDValue DAGCombiner::visitBRCOND(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
// If N is a constant we could fold this into a fallthrough or unconditional
// branch. However that doesn't happen very often in normal code, because
// Instcombine/SimplifyCFG should have handled the available opportunities.
// If we did this folding here, it would be necessary to update the
// MachineBasicBlock CFG, which is awkward.
// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
// on the target.
if (N1.getOpcode() == ISD::SETCC &&
TLI.isOperationLegalOrCustom(ISD::BR_CC,
N1.getOperand(0).getValueType())) {
return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
Chain, N1.getOperand(2),
N1.getOperand(0), N1.getOperand(1), N2);
}
if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
(N1.getOperand(0).hasOneUse() &&
N1.getOperand(0).getOpcode() == ISD::SRL))) {
SDNode *Trunc = 0;
if (N1.getOpcode() == ISD::TRUNCATE) {
// Look pass the truncate.
Trunc = N1.getNode();
N1 = N1.getOperand(0);
}
// Match this pattern so that we can generate simpler code:
//
// %a = ...
// %b = and i32 %a, 2
// %c = srl i32 %b, 1
// brcond i32 %c ...
//
// into
//
// %a = ...
// %b = and i32 %a, 2
// %c = setcc eq %b, 0
// brcond %c ...
//
// This applies only when the AND constant value has one bit set and the
// SRL constant is equal to the log2 of the AND constant. The back-end is
// smart enough to convert the result into a TEST/JMP sequence.
SDValue Op0 = N1.getOperand(0);
SDValue Op1 = N1.getOperand(1);
if (Op0.getOpcode() == ISD::AND &&
Op1.getOpcode() == ISD::Constant) {
SDValue AndOp1 = Op0.getOperand(1);
if (AndOp1.getOpcode() == ISD::Constant) {
const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
if (AndConst.isPowerOf2() &&
cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
SDValue SetCC =
DAG.getSetCC(SDLoc(N),
getSetCCResultType(Op0.getValueType()),
Op0, DAG.getConstant(0, Op0.getValueType()),
ISD::SETNE);
SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N),
MVT::Other, Chain, SetCC, N2);
// Don't add the new BRCond into the worklist or else SimplifySelectCC
// will convert it back to (X & C1) >> C2.
CombineTo(N, NewBRCond, false);
// Truncate is dead.
if (Trunc) {
removeFromWorkList(Trunc);
DAG.DeleteNode(Trunc);
}
// Replace the uses of SRL with SETCC
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
if (Trunc)
// Restore N1 if the above transformation doesn't match.
N1 = N->getOperand(1);
}
// Transform br(xor(x, y)) -> br(x != y)
// Transform br(xor(xor(x,y), 1)) -> br (x == y)
if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
SDNode *TheXor = N1.getNode();
SDValue Op0 = TheXor->getOperand(0);
SDValue Op1 = TheXor->getOperand(1);
if (Op0.getOpcode() == Op1.getOpcode()) {
// Avoid missing important xor optimizations.
SDValue Tmp = visitXOR(TheXor);
if (Tmp.getNode()) {
if (Tmp.getNode() != TheXor) {
DEBUG(dbgs() << "\nReplacing.8 ";
TheXor->dump(&DAG);
dbgs() << "\nWith: ";
Tmp.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
removeFromWorkList(TheXor);
DAG.DeleteNode(TheXor);
return DAG.getNode(ISD::BRCOND, SDLoc(N),
MVT::Other, Chain, Tmp, N2);
}
// visitXOR has changed XOR's operands or replaced the XOR completely,
// bail out.
return SDValue(N, 0);
}
}
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
bool Equal = false;
if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
Op0.getOpcode() == ISD::XOR) {
TheXor = Op0.getNode();
Equal = true;
}
EVT SetCCVT = N1.getValueType();
if (LegalTypes)
SetCCVT = getSetCCResultType(SetCCVT);
SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
SetCCVT,
Op0, Op1,
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
return DAG.getNode(ISD::BRCOND, SDLoc(N),
MVT::Other, Chain, SetCC, N2);
}
}
return SDValue();
}
// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
//
SDValue DAGCombiner::visitBR_CC(SDNode *N) {
CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
// If N is a constant we could fold this into a fallthrough or unconditional
// branch. However that doesn't happen very often in normal code, because
// Instcombine/SimplifyCFG should have handled the available opportunities.
// If we did this folding here, it would be necessary to update the
// MachineBasicBlock CFG, which is awkward.
// Use SimplifySetCC to simplify SETCC's.
SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
CondLHS, CondRHS, CC->get(), SDLoc(N),
false);
if (Simp.getNode()) AddToWorkList(Simp.getNode());
// fold to a simpler setcc
if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
N->getOperand(0), Simp.getOperand(2),
Simp.getOperand(0), Simp.getOperand(1),
N->getOperand(4));
return SDValue();
}
/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
/// uses N as its base pointer and that N may be folded in the load / store
/// addressing mode.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
SelectionDAG &DAG,
const TargetLowering &TLI) {
EVT VT;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
return false;
VT = Use->getValueType(0);
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
return false;
VT = ST->getValue().getValueType();
} else
return false;
TargetLowering::AddrMode AM;
if (N->getOpcode() == ISD::ADD) {
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
// [reg +/- imm]
AM.BaseOffs = Offset->getSExtValue();
else
// [reg +/- reg]
AM.Scale = 1;
} else if (N->getOpcode() == ISD::SUB) {
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
// [reg +/- imm]
AM.BaseOffs = -Offset->getSExtValue();
else
// [reg +/- reg]
AM.Scale = 1;
} else
return false;
return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
}
/// CombineToPreIndexedLoadStore - Try turning a load / store into a
/// pre-indexed load / store when the base pointer is an add or subtract
/// and it has other uses besides the load / store. After the
/// transformation, the new indexed load / store has effectively folded
/// the add / subtract in and all of its other uses are redirected to the
/// new load / store.
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
bool isLoad = true;
SDValue Ptr;
EVT VT;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
if (LD->isIndexed())
return false;
VT = LD->getMemoryVT();
if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
!TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
return false;
Ptr = LD->getBasePtr();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
if (ST->isIndexed())
return false;
VT = ST->getMemoryVT();
if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
!TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
return false;
Ptr = ST->getBasePtr();
isLoad = false;
} else {
return false;
}
// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
// out. There is no reason to make this a preinc/predec.
if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
Ptr.getNode()->hasOneUse())
return false;
// Ask the target to do addressing mode selection.
SDValue BasePtr;
SDValue Offset;
ISD::MemIndexedMode AM = ISD::UNINDEXED;
if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
return false;
// Backends without true r+i pre-indexed forms may need to pass a
// constant base with a variable offset so that constant coercion
// will work with the patterns in canonical form.
bool Swapped = false;
if (isa<ConstantSDNode>(BasePtr)) {
std::swap(BasePtr, Offset);
Swapped = true;
}
// Don't create a indexed load / store with zero offset.
if (isa<ConstantSDNode>(Offset) &&
cast<ConstantSDNode>(Offset)->isNullValue())
return false;
// Try turning it into a pre-indexed load / store except when:
// 1) The new base ptr is a frame index.
// 2) If N is a store and the new base ptr is either the same as or is a
// predecessor of the value being stored.
// 3) Another use of old base ptr is a predecessor of N. If ptr is folded
// that would create a cycle.
// 4) All uses are load / store ops that use it as old base ptr.
// Check #1. Preinc'ing a frame index would require copying the stack pointer
// (plus the implicit offset) to a register to preinc anyway.
if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
return false;
// Check #2.
if (!isLoad) {
SDValue Val = cast<StoreSDNode>(N)->getValue();
if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
return false;
}
// If the offset is a constant, there may be other adds of constants that
// can be folded with this one. We should do this to avoid having to keep
// a copy of the original base pointer.
SmallVector<SDNode *, 16> OtherUses;
if (isa<ConstantSDNode>(Offset))
for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
E = BasePtr.getNode()->use_end(); I != E; ++I) {
SDNode *Use = *I;
if (Use == Ptr.getNode())
continue;
if (Use->isPredecessorOf(N))
continue;
if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
OtherUses.clear();
break;
}
SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
if (Op1.getNode() == BasePtr.getNode())
std::swap(Op0, Op1);
assert(Op0.getNode() == BasePtr.getNode() &&
"Use of ADD/SUB but not an operand");
if (!isa<ConstantSDNode>(Op1)) {
OtherUses.clear();
break;
}
// FIXME: In some cases, we can be smarter about this.
if (Op1.getValueType() != Offset.getValueType()) {
OtherUses.clear();
break;
}
OtherUses.push_back(Use);
}
if (Swapped)
std::swap(BasePtr, Offset);
// Now check for #3 and #4.
bool RealUse = false;
// Caches for hasPredecessorHelper
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
E = Ptr.getNode()->use_end(); I != E; ++I) {
SDNode *Use = *I;
if (Use == N)
continue;
if (N->hasPredecessorHelper(Use, Visited, Worklist))
return false;
// If Ptr may be folded in addressing mode of other use, then it's
// not profitable to do this transformation.
if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
RealUse = true;
}
if (!RealUse)
return false;
SDValue Result;
if (isLoad)
Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
BasePtr, Offset, AM);
else
Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
BasePtr, Offset, AM);
++PreIndexedNodes;
++NodesCombined;
DEBUG(dbgs() << "\nReplacing.4 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
if (Swapped)
std::swap(BasePtr, Offset);
// Replace other uses of BasePtr that can be updated to use Ptr
for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
unsigned OffsetIdx = 1;
if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
OffsetIdx = 0;
assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
BasePtr.getNode() && "Expected BasePtr operand");
// We need to replace ptr0 in the following expression:
// x0 * offset0 + y0 * ptr0 = t0
// knowing that
// x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
//
// where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
// indexed load/store and the expresion that needs to be re-written.
//
// Therefore, we have:
// t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
ConstantSDNode *CN =
cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
int X0, X1, Y0, Y1;
APInt Offset0 = CN->getAPIntValue();
APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
APInt CNV = Offset0;
if (X0 < 0) CNV = -CNV;
if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
else CNV = CNV - Offset1;
// We can now generate the new expression.
SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
SDValue NewUse = DAG.getNode(Opcode,
SDLoc(OtherUses[i]),
OtherUses[i]->getValueType(0), NewOp1, NewOp2);
DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
removeFromWorkList(OtherUses[i]);
DAG.DeleteNode(OtherUses[i]);
}
// Replace the uses of Ptr with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
removeFromWorkList(Ptr.getNode());
DAG.DeleteNode(Ptr.getNode());
return true;
}
/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
/// add / sub of the base pointer node into a post-indexed load / store.
/// The transformation folded the add / subtract into the new indexed
/// load / store effectively and all of its uses are redirected to the
/// new load / store.
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
bool isLoad = true;
SDValue Ptr;
EVT VT;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
if (LD->isIndexed())
return false;
VT = LD->getMemoryVT();
if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
!TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
return false;
Ptr = LD->getBasePtr();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
if (ST->isIndexed())
return false;
VT = ST->getMemoryVT();
if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
!TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
return false;
Ptr = ST->getBasePtr();
isLoad = false;
} else {
return false;
}
if (Ptr.getNode()->hasOneUse())
return false;
for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
E = Ptr.getNode()->use_end(); I != E; ++I) {
SDNode *Op = *I;
if (Op == N ||
(Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
continue;
SDValue BasePtr;
SDValue Offset;
ISD::MemIndexedMode AM = ISD::UNINDEXED;
if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
// Don't create a indexed load / store with zero offset.
if (isa<ConstantSDNode>(Offset) &&
cast<ConstantSDNode>(Offset)->isNullValue())
continue;
// Try turning it into a post-indexed load / store except when
// 1) All uses are load / store ops that use it as base ptr (and
// it may be folded as addressing mmode).
// 2) Op must be independent of N, i.e. Op is neither a predecessor
// nor a successor of N. Otherwise, if Op is folded that would
// create a cycle.
if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
continue;
// Check for #1.
bool TryNext = false;
for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
SDNode *Use = *II;
if (Use == Ptr.getNode())
continue;
// If all the uses are load / store addresses, then don't do the
// transformation.
if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
bool RealUse = false;
for (SDNode::use_iterator III = Use->use_begin(),
EEE = Use->use_end(); III != EEE; ++III) {
SDNode *UseUse = *III;
if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
RealUse = true;
}
if (!RealUse) {
TryNext = true;
break;
}
}
}
if (TryNext)
continue;
// Check for #2
if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
SDValue Result = isLoad
? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
BasePtr, Offset, AM)
: DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
BasePtr, Offset, AM);
++PostIndexedNodes;
++NodesCombined;
DEBUG(dbgs() << "\nReplacing.5 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
// Replace the uses of Use with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
Result.getValue(isLoad ? 1 : 0));
removeFromWorkList(Op);
DAG.DeleteNode(Op);
return true;
}
}
}
return false;
}
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
// If load is not volatile and there are no uses of the loaded value (and
// the updated indexed value in case of indexed loads), change uses of the
// chain value into uses of the chain input (i.e. delete the dead load).
if (!LD->isVolatile()) {
if (N->getValueType(1) == MVT::Other) {
// Unindexed loads.
if (!N->hasAnyUseOfValue(0)) {
// It's not safe to use the two value CombineTo variant here. e.g.
// v1, chain2 = load chain1, loc
// v2, chain3 = load chain2, loc
// v3 = add v2, c
// Now we replace use of chain2 with chain1. This makes the second load
// isomorphic to the one we are deleting, and thus makes this load live.
DEBUG(dbgs() << "\nReplacing.6 ";
N->dump(&DAG);
dbgs() << "\nWith chain: ";
Chain.getNode()->dump(&DAG);
dbgs() << "\n");
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
if (N->use_empty()) {
removeFromWorkList(N);
DAG.DeleteNode(N);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
Undef.getNode()->dump(&DAG);
dbgs() << " and 2 other values\n");
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
DAG.getUNDEF(N->getValueType(1)));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
removeFromWorkList(N);
DAG.DeleteNode(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
// If this load is directly stored, replace the load value with the stored
// value.
// TODO: Handle store large -> read small portion.
// TODO: Handle TRUNCSTORE/LOADEXT
if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
if (ISD::isNON_TRUNCStore(Chain.getNode())) {
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
if (PrevST->getBasePtr() == Ptr &&
PrevST->getValue().getValueType() == N->getValueType(0))
return CombineTo(N, Chain.getOperand(1), Chain);
}
}
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getMemOperand()->getBaseAlignment()) {
SDValue NewLoad =
DAG.getExtLoad(LD->getExtensionType(), SDLoc(N),
LD->getValueType(0),
Chain, Ptr, LD->getPointerInfo(),
LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), Align,
LD->getTBAAInfo());
return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
}
}
}
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
if (UseAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
// If there is a better chain.
if (Chain != BetterChain) {
SDValue ReplLoad;
// Replace the chain to void dependency.
if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
BetterChain, Ptr, LD->getMemOperand());
} else {
ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
LD->getValueType(0),
BetterChain, Ptr, LD->getMemoryVT(),
LD->getMemOperand());
}
// Create token factor to keep old chain connected.
SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
MVT::Other, Chain, ReplLoad.getValue(1));
// Make sure the new and old chains are cleaned up.
AddToWorkList(Token.getNode());
// Replace uses with load result and token factor. Don't add users
// to work list.
return CombineTo(N, ReplLoad.getValue(0), Token, false);
}
}
// Try transforming N to an indexed load.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
// Try to slice up N to more direct loads if the slices are mapped to
// different register banks or pairing can take place.
if (SliceUpLoad(N))
return SDValue(N, 0);
return SDValue();
}
namespace {
/// \brief Helper structure used to slice a load in smaller loads.
/// Basically a slice is obtained from the following sequence:
/// Origin = load Ty1, Base
/// Shift = srl Ty1 Origin, CstTy Amount
/// Inst = trunc Shift to Ty2
///
/// Then, it will be rewriten into:
/// Slice = load SliceTy, Base + SliceOffset
/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
///
/// SliceTy is deduced from the number of bits that are actually used to
/// build Inst.
struct LoadedSlice {
/// \brief Helper structure used to compute the cost of a slice.
struct Cost {
/// Are we optimizing for code size.
bool ForCodeSize;
/// Various cost.
unsigned Loads;
unsigned Truncates;
unsigned CrossRegisterBanksCopies;
unsigned ZExts;
unsigned Shift;
Cost(bool ForCodeSize = false)
: ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
/// \brief Get the cost of one isolated slice.
Cost(const LoadedSlice &LS, bool ForCodeSize = false)
: ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
EVT TruncType = LS.Inst->getValueType(0);
EVT LoadedType = LS.getLoadedType();
if (TruncType != LoadedType &&
!LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
ZExts = 1;
}
/// \brief Account for slicing gain in the current cost.
/// Slicing provide a few gains like removing a shift or a
/// truncate. This method allows to grow the cost of the original
/// load with the gain from this slice.
void addSliceGain(const LoadedSlice &LS) {
// Each slice saves a truncate.
const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
LS.Inst->getOperand(0).getValueType()))
++Truncates;
// If there is a shift amount, this slice gets rid of it.
if (LS.Shift)
++Shift;
// If this slice can merge a cross register bank copy, account for it.
if (LS.canMergeExpensiveCrossRegisterBankCopy())
++CrossRegisterBanksCopies;
}
Cost &operator+=(const Cost &RHS) {
Loads += RHS.Loads;
Truncates += RHS.Truncates;
CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
ZExts += RHS.ZExts;
Shift += RHS.Shift;
return *this;
}
bool operator==(const Cost &RHS) const {
return Loads == RHS.Loads && Truncates == RHS.Truncates &&
CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
ZExts == RHS.ZExts && Shift == RHS.Shift;
}
bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
bool operator<(const Cost &RHS) const {
// Assume cross register banks copies are as expensive as loads.
// FIXME: Do we want some more target hooks?
unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
// Unless we are optimizing for code size, consider the
// expensive operation first.
if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
return ExpensiveOpsLHS < ExpensiveOpsRHS;
return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
(RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
}
bool operator>(const Cost &RHS) const { return RHS < *this; }
bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
};
// The last instruction that represent the slice. This should be a
// truncate instruction.
SDNode *Inst;
// The original load instruction.
LoadSDNode *Origin;
// The right shift amount in bits from the original load.
unsigned Shift;
// The DAG from which Origin came from.
// This is used to get some contextual information about legal types, etc.
SelectionDAG *DAG;
LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL,
unsigned Shift = 0, SelectionDAG *DAG = NULL)
: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
LoadedSlice(const LoadedSlice &LS)
: Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
/// \brief Get the bits used in a chunk of bits \p BitWidth large.
/// \return Result is \p BitWidth and has used bits set to 1 and
/// not used bits set to 0.
APInt getUsedBits() const {
// Reproduce the trunc(lshr) sequence:
// - Start from the truncated value.
// - Zero extend to the desired bit width.
// - Shift left.
assert(Origin && "No original load to compare against.");
unsigned BitWidth = Origin->getValueSizeInBits(0);
assert(Inst && "This slice is not bound to an instruction");
assert(Inst->getValueSizeInBits(0) <= BitWidth &&
"Extracted slice is bigger than the whole type!");
APInt UsedBits(Inst->getValueSizeInBits(0), 0);
UsedBits.setAllBits();
UsedBits = UsedBits.zext(BitWidth);
UsedBits <<= Shift;
return UsedBits;
}
/// \brief Get the size of the slice to be loaded in bytes.
unsigned getLoadedSize() const {
unsigned SliceSize = getUsedBits().countPopulation();
assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
return SliceSize / 8;
}
/// \brief Get the type that will be loaded for this slice.
/// Note: This may not be the final type for the slice.
EVT getLoadedType() const {
assert(DAG && "Missing context");
LLVMContext &Ctxt = *DAG->getContext();
return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
}
/// \brief Get the alignment of the load used for this slice.
unsigned getAlignment() const {
unsigned Alignment = Origin->getAlignment();
unsigned Offset = getOffsetFromBase();
if (Offset != 0)
Alignment = MinAlign(Alignment, Alignment + Offset);
return Alignment;
}
/// \brief Check if this slice can be rewritten with legal operations.
bool isLegal() const {
// An invalid slice is not legal.
if (!Origin || !Inst || !DAG)
return false;
// Offsets are for indexed load only, we do not handle that.
if (Origin->getOffset().getOpcode() != ISD::UNDEF)
return false;
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
// Check that the type is legal.
EVT SliceType = getLoadedType();
if (!TLI.isTypeLegal(SliceType))
return false;
// Check that the load is legal for this type.
if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
return false;
// Check that the offset can be computed.
// 1. Check its type.
EVT PtrType = Origin->getBasePtr().getValueType();
if (PtrType == MVT::Untyped || PtrType.isExtended())
return false;
// 2. Check that it fits in the immediate.
if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
return false;
// 3. Check that the computation is legal.
if (!TLI.isOperationLegal(ISD::ADD, PtrType))
return false;
// Check that the zext is legal if it needs one.
EVT TruncateType = Inst->getValueType(0);
if (TruncateType != SliceType &&
!TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
return false;
return true;
}
/// \brief Get the offset in bytes of this slice in the original chunk of
/// bits.
/// \pre DAG != NULL.
uint64_t getOffsetFromBase() const {
assert(DAG && "Missing context.");
bool IsBigEndian =
DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
uint64_t Offset = Shift / 8;
unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
"The size of the original loaded type is not a multiple of a"
" byte.");
// If Offset is bigger than TySizeInBytes, it means we are loading all
// zeros. This should have been optimized before in the process.
assert(TySizeInBytes > Offset &&
"Invalid shift amount for given loaded size");
if (IsBigEndian)
Offset = TySizeInBytes - Offset - getLoadedSize();
return Offset;
}
/// \brief Generate the sequence of instructions to load the slice
/// represented by this object and redirect the uses of this slice to
/// this new sequence of instructions.
/// \pre this->Inst && this->Origin are valid Instructions and this
/// object passed the legal check: LoadedSlice::isLegal returned true.
/// \return The last instruction of the sequence used to load the slice.
SDValue loadSlice() const {
assert(Inst && Origin && "Unable to replace a non-existing slice.");
const SDValue &OldBaseAddr = Origin->getBasePtr();
SDValue BaseAddr = OldBaseAddr;
// Get the offset in that chunk of bytes w.r.t. the endianess.
int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
assert(Offset >= 0 && "Offset too big to fit in int64_t!");
if (Offset) {
// BaseAddr = BaseAddr + Offset.
EVT ArithType = BaseAddr.getValueType();
BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
DAG->getConstant(Offset, ArithType));
}
// Create the type of the loaded slice according to its size.
EVT SliceType = getLoadedType();
// Create the load for the slice.
SDValue LastInst = DAG->getLoad(
SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
// If the final type is not the same as the loaded type, this means that
// we have to pad with zero. Create a zero extend for that.
EVT FinalType = Inst->getValueType(0);
if (SliceType != FinalType)
LastInst =
DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
return LastInst;
}
/// \brief Check if this slice can be merged with an expensive cross register
/// bank copy. E.g.,
/// i = load i32
/// f = bitcast i32 i to float
bool canMergeExpensiveCrossRegisterBankCopy() const {
if (!Inst || !Inst->hasOneUse())
return false;
SDNode *Use = *Inst->use_begin();
if (Use->getOpcode() != ISD::BITCAST)
return false;
assert(DAG && "Missing context");
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
EVT ResVT = Use->getValueType(0);
const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
const TargetRegisterClass *ArgRC =
TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
return false;
// At this point, we know that we perform a cross-register-bank copy.
// Check if it is expensive.
const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo();
// Assume bitcasts are cheap, unless both register classes do not
// explicitly share a common sub class.
if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
return false;
// Check if it will be merged with the load.
// 1. Check the alignment constraint.
unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
ResVT.getTypeForEVT(*DAG->getContext()));
if (RequiredAlignment > getAlignment())
return false;
// 2. Check that the load is a legal operation for that type.
if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
return false;
// 3. Check that we do not have a zext in the way.
if (Inst->getValueType(0) != getLoadedType())
return false;
return true;
}
};
}
/// \brief Sorts LoadedSlice according to their offset.
struct LoadedSliceSorter {
bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
}
};
/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// If all the bits are one, this is dense!
if (UsedBits.isAllOnesValue())
return true;
// Get rid of the unused bits on the right.
APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
// Get rid of the unused bits on the left.
if (NarrowedUsedBits.countLeadingZeros())
NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
// Check that the chunk of bits is completely used.
return NarrowedUsedBits.isAllOnesValue();
}
/// \brief Check whether or not \p First and \p Second are next to each other
/// in memory. This means that there is no hole between the bits loaded
/// by \p First and the bits loaded by \p Second.
static bool areSlicesNextToEachOther(const LoadedSlice &First,
const LoadedSlice &Second) {
assert(First.Origin == Second.Origin && First.Origin &&
"Unable to match different memory origins.");
APInt UsedBits = First.getUsedBits();
assert((UsedBits & Second.getUsedBits()) == 0 &&
"Slices are not supposed to overlap.");
UsedBits |= Second.getUsedBits();
return areUsedBitsDense(UsedBits);
}
/// \brief Adjust the \p GlobalLSCost according to the target
/// paring capabilities and the layout of the slices.
/// \pre \p GlobalLSCost should account for at least as many loads as
/// there is in the slices in \p LoadedSlices.
static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
LoadedSlice::Cost &GlobalLSCost) {
unsigned NumberOfSlices = LoadedSlices.size();
// If there is less than 2 elements, no pairing is possible.
if (NumberOfSlices < 2)
return;
// Sort the slices so that elements that are likely to be next to each
// other in memory are next to each other in the list.
std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
// First (resp. Second) is the first (resp. Second) potentially candidate
// to be placed in a paired load.
const LoadedSlice *First = NULL;
const LoadedSlice *Second = NULL;
for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
// Set the beginning of the pair.
First = Second) {
Second = &LoadedSlices[CurrSlice];
// If First is NULL, it means we start a new pair.
// Get to the next slice.
if (!First)
continue;
EVT LoadedType = First->getLoadedType();
// If the types of the slices are different, we cannot pair them.
if (LoadedType != Second->getLoadedType())
continue;
// Check if the target supplies paired loads for this type.
unsigned RequiredAlignment = 0;
if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
// move to the next pair, this type is hopeless.
Second = NULL;
continue;
}
// Check if we meet the alignment requirement.
if (RequiredAlignment > First->getAlignment())
continue;
// Check that both loads are next to each other in memory.
if (!areSlicesNextToEachOther(*First, *Second))
continue;
assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
--GlobalLSCost.Loads;
// Move to the next pair.
Second = NULL;
}
}
/// \brief Check the profitability of all involved LoadedSlice.
/// Currently, it is considered profitable if there is exactly two
/// involved slices (1) which are (2) next to each other in memory, and
/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
///
/// Note: The order of the elements in \p LoadedSlices may be modified, but not
/// the elements themselves.
///
/// FIXME: When the cost model will be mature enough, we can relax
/// constraints (1) and (2).
static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
const APInt &UsedBits, bool ForCodeSize) {
unsigned NumberOfSlices = LoadedSlices.size();
if (StressLoadSlicing)
return NumberOfSlices > 1;
// Check (1).
if (NumberOfSlices != 2)
return false;
// Check (2).
if (!areUsedBitsDense(UsedBits))
return false;
// Check (3).
LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
// The original code has one big load.
OrigCost.Loads = 1;
for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
const LoadedSlice &LS = LoadedSlices[CurrSlice];
// Accumulate the cost of all the slices.
LoadedSlice::Cost SliceCost(LS, ForCodeSize);
GlobalSlicingCost += SliceCost;
// Account as cost in the original configuration the gain obtained
// with the current slices.
OrigCost.addSliceGain(LS);
}
// If the target supports paired load, adjust the cost accordingly.
adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
return OrigCost > GlobalSlicingCost;
}
/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
/// operations, split it in the various pieces being extracted.
///
/// This sort of thing is introduced by SROA.
/// This slicing takes care not to insert overlapping loads.
/// \pre LI is a simple load (i.e., not an atomic or volatile load).
bool DAGCombiner::SliceUpLoad(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
LoadSDNode *LD = cast<LoadSDNode>(N);
if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
!LD->getValueType(0).isInteger())
return false;
// Keep track of already used bits to detect overlapping values.
// In that case, we will just abort the transformation.
APInt UsedBits(LD->getValueSizeInBits(0), 0);
SmallVector<LoadedSlice, 4> LoadedSlices;
// Check if this load is used as several smaller chunks of bits.
// Basically, look for uses in trunc or trunc(lshr) and record a new chain
// of computation for each trunc.
for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
UI != UIEnd; ++UI) {
// Skip the uses of the chain.
if (UI.getUse().getResNo() != 0)
continue;
SDNode *User = *UI;
unsigned Shift = 0;
// Check if this is a trunc(lshr).
if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
isa<ConstantSDNode>(User->getOperand(1))) {
Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
User = *User->use_begin();
}
// At this point, User is a Truncate, iff we encountered, trunc or
// trunc(lshr).
if (User->getOpcode() != ISD::TRUNCATE)
return false;
// The width of the type must be a power of 2 and greater than 8-bits.
// Otherwise the load cannot be represented in LLVM IR.
// Moreover, if we shifted with a non 8-bits multiple, the slice
// will be accross several bytes. We do not support that.
unsigned Width = User->getValueSizeInBits(0);
if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
return 0;
// Build the slice for this chain of computations.
LoadedSlice LS(User, LD, Shift, &DAG);
APInt CurrentUsedBits = LS.getUsedBits();
// Check if this slice overlaps with another.
if ((CurrentUsedBits & UsedBits) != 0)
return false;
// Update the bits used globally.
UsedBits |= CurrentUsedBits;
// Check if the new slice would be legal.
if (!LS.isLegal())
return false;
// Record the slice.
LoadedSlices.push_back(LS);
}
// Abort slicing if it does not seem to be profitable.
if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
return false;
++SlicedLoads;
// Rewrite each chain to use an independent load.
// By construction, each chain can be represented by a unique load.
// Prepare the argument for the new token factor for all the slices.
SmallVector<SDValue, 8> ArgChains;
for (SmallVectorImpl<LoadedSlice>::const_iterator
LSIt = LoadedSlices.begin(),
LSItEnd = LoadedSlices.end();
LSIt != LSItEnd; ++LSIt) {
SDValue SliceInst = LSIt->loadSlice();
CombineTo(LSIt->Inst, SliceInst, true);
if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
SliceInst = SliceInst.getOperand(0);
assert(SliceInst->getOpcode() == ISD::LOAD &&
"It takes more than a zext to get to the loaded slice!!");
ArgChains.push_back(SliceInst.getValue(1));
}
SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
&ArgChains[0], ArgChains.size());
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
return true;
}
/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
/// load is having specific bytes cleared out. If so, return the byte size
/// being masked out and the shift amount.
static std::pair<unsigned, unsigned>
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
std::pair<unsigned, unsigned> Result(0, 0);
// Check for the structure we're looking for.
if (V->getOpcode() != ISD::AND ||
!isa<ConstantSDNode>(V->getOperand(1)) ||
!ISD::isNormalLoad(V->getOperand(0).getNode()))
return Result;
// Check the chain and pointer.
LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
// The store should be chained directly to the load or be an operand of a
// tokenfactor.
if (LD == Chain.getNode())
; // ok.
else if (Chain->getOpcode() != ISD::TokenFactor)
return Result; // Fail.
else {
bool isOk = false;
for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
if (Chain->getOperand(i).getNode() == LD) {
isOk = true;
break;
}
if (!isOk) return Result;
}
// This only handles simple types.
if (V.getValueType() != MVT::i16 &&
V.getValueType() != MVT::i32 &&
V.getValueType() != MVT::i64)
return Result;
// Check the constant mask. Invert it so that the bits being masked out are
// 0 and the bits being kept are 1. Use getSExtValue so that leading bits
// follow the sign bit for uniformity.
uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
unsigned NotMaskLZ = countLeadingZeros(NotMask);
if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
unsigned NotMaskTZ = countTrailingZeros(NotMask);
if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
if (NotMaskLZ == 64) return Result; // All zero mask.
// See if we have a continuous run of bits. If so, we have 0*1+0*
if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
return Result;
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
if (V.getValueType() != MVT::i64 && NotMaskLZ)
NotMaskLZ -= 64-V.getValueSizeInBits();
unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
switch (MaskedBytes) {
case 1:
case 2:
case 4: break;
default: return Result; // All one mask, or 5-byte mask.
}
// Verify that the first bit starts at a multiple of mask so that the access
// is aligned the same as the access width.
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
Result.first = MaskedBytes;
Result.second = NotMaskTZ/8;
return Result;
}
/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
/// provides a value as specified by MaskInfo. If so, replace the specified
/// store with a narrower store of truncated IVal.
static SDNode *
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue IVal, StoreSDNode *St,
DAGCombiner *DC) {
unsigned NumBytes = MaskInfo.first;
unsigned ByteShift = MaskInfo.second;
SelectionDAG &DAG = DC->getDAG();
// Check to see if IVal is all zeros in the part being masked in by the 'or'
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
// legalization.
MVT VT = MVT::getIntegerVT(NumBytes*8);
if (!DC->isTypeLegal(VT))
return 0;
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
if (ByteShift)
IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal,
DAG.getConstant(ByteShift*8,
DC->getShiftAmountTy(IVal.getValueType())));
// Figure out the offset for the store and the alignment of the access.
unsigned StOffset;
unsigned NewAlign = St->getAlignment();
if (DAG.getTargetLoweringInfo().isLittleEndian())
StOffset = ByteShift;
else
StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
SDValue Ptr = St->getBasePtr();
if (StOffset) {
Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(),
Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
NewAlign = MinAlign(NewAlign, StOffset);
}
// Truncate down to the new size.
IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
++OpsNarrowed;
return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
St->getPointerInfo().getWithOffset(StOffset),
false, false, NewAlign).getNode();
}
/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
/// of the loaded bits, try narrowing the load and store if it would end up
/// being a win for performance or code size.
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
if (ST->isVolatile())
return SDValue();
SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
EVT VT = Value.getValueType();
if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
return SDValue();
unsigned Opc = Value.getOpcode();
// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
// is a byte mask indicating a consecutive number of bytes, check to see if
// Y is known to provide just those bytes. If so, we try to replace the
// load + replace + store sequence with a single (narrower) store, which makes
// the load dead.
if (Opc == ISD::OR) {
std::pair<unsigned, unsigned> MaskedLoad;
MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
if (MaskedLoad.first)
if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(1), ST,this))
return SDValue(NewST, 0);
// Or is commutative, so try swapping X and Y.
MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
if (MaskedLoad.first)
if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(0), ST,this))
return SDValue(NewST, 0);
}
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
Value.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
SDValue N0 = Value.getOperand(0);
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
Chain == SDValue(N0.getNode(), 1)) {
LoadSDNode *LD = cast<LoadSDNode>(N0);
if (LD->getBasePtr() != Ptr ||
LD->getPointerInfo().getAddrSpace() !=
ST->getPointerInfo().getAddrSpace())
return SDValue();
// Find the type to narrow it the load / op / store to.
SDValue N1 = Value.getOperand(1);
unsigned BitWidth = N1.getValueSizeInBits();
APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
if (Opc == ISD::AND)
Imm ^= APInt::getAllOnesValue(BitWidth);
if (Imm == 0 || Imm.isAllOnesValue())
return SDValue();
unsigned ShAmt = Imm.countTrailingZeros();
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
while (NewBW < BitWidth &&
!(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
TLI.isNarrowingProfitable(VT, NewVT))) {
NewBW = NextPowerOf2(NewBW);
NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
}
if (NewBW >= BitWidth)
return SDValue();
// If the lsb changed does not start at the type bitwidth boundary,
// start at the previous one.
if (ShAmt % NewBW)
ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
std::min(BitWidth, ShAmt + NewBW));
if ((Imm & Mask) == Imm) {
APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
if (Opc == ISD::AND)
NewImm ^= APInt::getAllOnesValue(NewBW);
uint64_t PtrOff = ShAmt / 8;
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (TLI.isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
return SDValue();
SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
Ptr.getValueType(), Ptr,
DAG.getConstant(PtrOff, Ptr.getValueType()));
SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), NewAlign,
LD->getTBAAInfo());
SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, SDLoc(N),
NewVal, NewPtr,
ST->getPointerInfo().getWithOffset(PtrOff),
false, false, NewAlign);
AddToWorkList(NewPtr.getNode());
AddToWorkList(NewLD.getNode());
AddToWorkList(NewVal.getNode());
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
++OpsNarrowed;
return NewST;
}
}
return SDValue();
}
/// TransformFPLoadStorePair - For a given floating point load / store pair,
/// if the load value isn't used by any other operations, then consider
/// transforming the pair to integer load / store operations if the target
/// deems the transformation profitable.
SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
Value.hasOneUse() &&
Chain == SDValue(Value.getNode(), 1)) {
LoadSDNode *LD = cast<LoadSDNode>(Value);
EVT VT = LD->getMemoryVT();
if (!VT.isFloatingPoint() ||
VT != ST->getMemoryVT() ||
LD->isNonTemporal() ||
ST->isNonTemporal() ||
LD->getPointerInfo().getAddrSpace() != 0 ||
ST->getPointerInfo().getAddrSpace() != 0)
return SDValue();
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
return SDValue();
unsigned LDAlign = LD->getAlignment();
unsigned STAlign = ST->getAlignment();
Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
if (LDAlign < ABIAlign || STAlign < ABIAlign)
return SDValue();
SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value),
LD->getChain(), LD->getBasePtr(),
LD->getPointerInfo(),
false, false, false, LDAlign);
SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N),
NewLD, ST->getBasePtr(),
ST->getPointerInfo(),
false, false, STAlign);
AddToWorkList(NewLD.getNode());
AddToWorkList(NewST.getNode());
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
++LdStFP2Int;
return NewST;
}
return SDValue();
}
/// Helper struct to parse and store a memory address as base + index + offset.
/// We ignore sign extensions when it is safe to do so.
/// The following two expressions are not equivalent. To differentiate we need
/// to store whether there was a sign extension involved in the index
/// computation.
/// (load (i64 add (i64 copyfromreg %c)
/// (i64 signextend (add (i8 load %index)
/// (i8 1))))
/// vs
///
/// (load (i64 add (i64 copyfromreg %c)
/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
/// (i32 1)))))
struct BaseIndexOffset {
SDValue Base;
SDValue Index;
int64_t Offset;
bool IsIndexSignExt;
BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
bool IsIndexSignExt) :
Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
bool equalBaseIndex(const BaseIndexOffset &Other) {
return Other.Base == Base && Other.Index == Index &&
Other.IsIndexSignExt == IsIndexSignExt;
}
/// Parses tree in Ptr for base, index, offset addresses.
static BaseIndexOffset match(SDValue Ptr) {
bool IsIndexSignExt = false;
// We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
// instruction, then it could be just the BASE or everything else we don't
// know how to handle. Just use Ptr as BASE and give up.
if (Ptr->getOpcode() != ISD::ADD)
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
// We know that we have at least an ADD instruction. Try to pattern match
// the simple case of BASE + OFFSET.
if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
IsIndexSignExt);
}
// Inside a loop the current BASE pointer is calculated using an ADD and a
// MUL instruction. In this case Ptr is the actual BASE pointer.
// (i64 add (i64 %array_ptr)
// (i64 mul (i64 %induction_var)
// (i64 %element_size)))
if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
// Look at Base + Index + Offset cases.
SDValue Base = Ptr->getOperand(0);
SDValue IndexOffset = Ptr->getOperand(1);
// Skip signextends.
if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
IndexOffset = IndexOffset->getOperand(0);
IsIndexSignExt = true;
}
// Either the case of Base + Index (no offset) or something else.
if (IndexOffset->getOpcode() != ISD::ADD)
return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
// Now we have the case of Base + Index + offset.
SDValue Index = IndexOffset->getOperand(0);
SDValue Offset = IndexOffset->getOperand(1);
if (!isa<ConstantSDNode>(Offset))
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
// Ignore signextends.
if (Index->getOpcode() == ISD::SIGN_EXTEND) {
Index = Index->getOperand(0);
IsIndexSignExt = true;
} else IsIndexSignExt = false;
int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
}
};
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
// Ptr to the mem node.
LSBaseSDNode *MemNode;
// Offset from the base ptr.
int64_t OffsetFromBase;
// What is the sequence number of this mem node.
// Lowest mem operand in the DAG starts at zero.
unsigned SequenceNum;
};
/// Sorts store nodes in a link according to their offset from a shared
// base ptr.
struct ConsecutiveMemoryChainSorter {
bool operator()(MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ return
+ LHS.OffsetFromBase < RHS.OffsetFromBase ||
+ (LHS.OffsetFromBase == RHS.OffsetFromBase &&
+ LHS.SequenceNum > RHS.SequenceNum);
}
};
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
// Don't merge vectors into wider inputs.
if (MemVT.isVector() || !MemVT.isSimple())
return false;
// Perform an early exit check. Do not bother looking at stored values that
// are not constants or loads.
SDValue StoredVal = St->getValue();
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
!IsLoadSrc)
return false;
// Only look at ends of store sequences.
SDValue Chain = SDValue(St, 1);
if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
return false;
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
// We must have a base and an offset.
if (!BasePtr.Base.getNode())
return false;
// Do not handle stores to undef base pointers.
if (BasePtr.Base.getOpcode() == ISD::UNDEF)
return false;
// Save the LoadSDNodes that we find in the chain.
// We need to make sure that these nodes do not interfere with
// any of the store nodes.
SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
// Save the StoreSDNodes that we find in the chain.
SmallVector<MemOpLink, 8> StoreNodes;
// Walk up the chain and look for nodes with offsets from the same
// base pointer. Stop when reaching an instruction with a different kind
// or instruction which has a different base pointer.
unsigned Seq = 0;
StoreSDNode *Index = St;
while (Index) {
// If the chain has more than one use, then we can't reorder the mem ops.
if (Index != St && !SDValue(Index, 1)->hasOneUse())
break;
// Find the base pointer and offset for this memory node.
BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
// Check that the base pointer is the same as the original one.
if (!Ptr.equalBaseIndex(BasePtr))
break;
// Check that the alignment is the same.
if (Index->getAlignment() != St->getAlignment())
break;
// The memory operands must not be volatile.
if (Index->isVolatile() || Index->isIndexed())
break;
// No truncation.
if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
if (St->isTruncatingStore())
break;
// The stored memory type must be the same.
if (Index->getMemoryVT() != MemVT)
break;
// We do not allow unaligned stores because we want to prevent overriding
// stores.
if (Index->getAlignment()*8 != MemVT.getSizeInBits())
break;
// We found a potential memory operand to merge.
StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
// Find the next memory operand in the chain. If the next operand in the
// chain is a store then move up and continue the scan with the next
// memory operand. If the next operand is a load save it and use alias
// information to check if it interferes with anything.
SDNode *NextInChain = Index->getChain().getNode();
while (1) {
if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
// We found a store node. Use it for the next iteration.
Index = STn;
break;
} else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
if (Ldn->isVolatile()) {
Index = NULL;
break;
}
// Save the load node for later. Continue the scan.
AliasLoadNodes.push_back(Ldn);
NextInChain = Ldn->getChain().getNode();
continue;
} else {
Index = NULL;
break;
}
}
}
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
// Sort the memory operands according to their distance from the base pointer.
std::sort(StoreNodes.begin(), StoreNodes.end(),
ConsecutiveMemoryChainSorter());
// Scan the memory operations on the chain and find the first non-consecutive
// store memory address.
unsigned LastConsecutiveStore = 0;
int64_t StartAddress = StoreNodes[0].OffsetFromBase;
for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
// Check that the addresses are consecutive starting from the second
// element in the list of stores.
if (i > 0) {
int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
}
bool Alias = false;
// Check if this store interferes with any of the loads that we found.
for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
Alias = true;
break;
}
// We found a load that alias with this store. Stop the sequence.
if (Alias)
break;
// Mark this node as useful.
LastConsecutiveStore = i;
}
// The node with the lowest store address.
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
// Store the constants into memory as one consecutive store.
if (!IsLoadSrc) {
unsigned LastLegalType = 0;
unsigned LastLegalVectorType = 0;
bool NonZero = false;
for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue StoredVal = St->getValue();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
NonZero |= !C->isNullValue();
} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
NonZero |= !C->getConstantFPValue()->isNullValue();
} else {
// Non constant.
break;
}
// Find a legal type for the constant store.
unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
if (TLI.isTypeLegal(StoreTy))
LastLegalType = i+1;
// Or check whether a truncstore is legal.
else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
LastLegalType = i+1;
}
// Find a legal type for the vector store.
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
if (TLI.isTypeLegal(Ty))
LastLegalVectorType = i + 1;
}
// We only use vectors if the constant is known to be zero and the
// function is not marked with the noimplicitfloat attribute.
if (NonZero || NoVectors)
LastLegalVectorType = 0;
// Check if we found a legal integer type to store.
if (LastLegalType == 0 && LastLegalVectorType == 0)
return false;
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
// Make sure we have something to merge.
if (NumElem < 2)
return false;
unsigned EarliestNodeUsed = 0;
for (unsigned i=0; i < NumElem; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
// in the wide store. The chain we use needs to be the chain of the
// earliest store node which is *used* and replaced by the wide store.
if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
EarliestNodeUsed = i;
}
// The earliest Node in the DAG.
LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
SDValue StoredVal;
if (UseVector) {
// Find a legal type for the vector store.
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
StoredVal = DAG.getConstant(0, Ty);
} else {
unsigned StoreBW = NumElem * ElementSizeBytes * 8;
APInt StoreInt(StoreBW, 0);
// Construct a single integer constant which is made of the smaller
// constant inputs.
bool IsLE = TLI.isLittleEndian();
for (unsigned i = 0; i < NumElem ; ++i) {
unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
SDValue Val = St->getValue();
StoreInt<<=ElementSizeBytes*8;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
StoreInt|=C->getAPIntValue().zext(StoreBW);
} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
} else {
assert(false && "Invalid constant element type");
}
}
// Create the new Load and Store operations.
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
StoredVal = DAG.getConstant(StoreInt, StoreTy);
}
SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
false, false,
FirstInChain->getAlignment());
// Replace the first store with the new store
CombineTo(EarliestOp, NewStore);
// Erase all other stores.
for (unsigned i = 0; i < NumElem ; ++i) {
if (StoreNodes[i].MemNode == EarliestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
// ReplaceAllUsesWith will replace all uses that existed when it was
// called, but graph optimizations may cause new ones to appear. For
// example, the case in pr14333 looks like
//
// St's chain -> St -> another store -> X
//
// And the only difference from St to the other store is the chain.
// When we change it's chain to be St's chain they become identical,
// get CSEed and the net result is that X is now a use of St.
// Since we know that St is redundant, just iterate.
while (!St->use_empty())
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
removeFromWorkList(St);
DAG.DeleteNode(St);
}
return true;
}
// Below we handle the case of multiple consecutive stores that
// come from multiple consecutive loads. We merge them into a single
// wide load and a single wide store.
// Look for load nodes which are used by the stored values.
SmallVector<MemOpLink, 8> LoadNodes;
// Find acceptable loads. Loads need to have the same chain (token factor),
// must not be zext, volatile, indexed, and they must be consecutive.
BaseIndexOffset LdBasePtr;
for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
if (!Ld) break;
// Loads must only have one use.
if (!Ld->hasNUsesOfValue(1, 0))
break;
// Check that the alignment is the same as the stores.
if (Ld->getAlignment() != St->getAlignment())
break;
// The memory operands must not be volatile.
if (Ld->isVolatile() || Ld->isIndexed())
break;
// We do not accept ext loads.
if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
break;
// The stored memory type must be the same.
if (Ld->getMemoryVT() != MemVT)
break;
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
// If this is not the first ptr that we check.
if (LdBasePtr.Base.getNode()) {
// The base ptr must be the same.
if (!LdPtr.equalBaseIndex(LdBasePtr))
break;
} else {
// Check that all other base pointers are the same as this one.
LdBasePtr = LdPtr;
}
// We found a potential memory operand to merge.
LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
}
if (LoadNodes.size() < 2)
return false;
// Scan the memory operations on the chain and find the first non-consecutive
// load memory address. These variables hold the index in the store node
// array.
unsigned LastConsecutiveLoad = 0;
// This variable refers to the size and not index in the array.
unsigned LastLegalVectorType = 0;
unsigned LastLegalIntegerType = 0;
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = LoadNodes[0].MemNode->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
// All loads much share the same chain.
if (LoadNodes[i].MemNode->getChain() != FirstChain)
break;
int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
LastConsecutiveLoad = i;
// Find a legal type for the vector store.
EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
if (TLI.isTypeLegal(StoreTy))
LastLegalVectorType = i + 1;
// Find a legal type for the integer store.
unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
if (TLI.isTypeLegal(StoreTy))
LastLegalIntegerType = i + 1;
// Or check whether a truncstore and extload is legal.
else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
LastLegalIntegerType = i+1;
}
}
// Only use vector types if the vector type is larger than the integer type.
// If they are the same, use integers.
bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
// We add +1 here because the LastXXX variables refer to location while
// the NumElem refers to array/index size.
unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
NumElem = std::min(LastLegalType, NumElem);
if (NumElem < 2)
return false;
// The earliest Node in the DAG.
unsigned EarliestNodeUsed = 0;
LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
for (unsigned i=1; i<NumElem; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
// in the wide store. The chain we use needs to be the chain of the
// earliest store node which is *used* and replaced by the wide store.
if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
EarliestNodeUsed = i;
}
// Find if it is better to use vectors or integers to load and store
// to memory.
EVT JointMemOpVT;
if (UseVectorTy) {
JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
} else {
unsigned StoreBW = NumElem * ElementSizeBytes * 8;
JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
}
SDLoc LoadDL(LoadNodes[0].MemNode);
SDLoc StoreDL(StoreNodes[0].MemNode);
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
FirstLoad->getChain(),
FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(),
false, false, false,
FirstLoad->getAlignment());
SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), false, false,
FirstInChain->getAlignment());
// Replace one of the loads with the new load.
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
SDValue(NewLoad.getNode(), 1));
// Remove the rest of the load chains.
for (unsigned i = 1; i < NumElem ; ++i) {
// Replace all chain users of the old load nodes with the chain of the new
// load node.
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
}
// Replace the first store with the new store.
CombineTo(EarliestOp, NewStore);
// Erase all other stores.
for (unsigned i = 0; i < NumElem ; ++i) {
// Remove all Store nodes.
if (StoreNodes[i].MemNode == EarliestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
removeFromWorkList(St);
DAG.DeleteNode(St);
}
return true;
}
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
// If this is a store of a bit convert, store the input value if the
// resultant store does not need a higher alignment than the original.
if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
unsigned OrigAlign = ST->getAlignment();
EVT SVT = Value.getOperand(0).getValueType();
unsigned Align = TLI.getDataLayout()->
getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
if (Align <= OrigAlign &&
((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), OrigAlign,
ST->getTBAAInfo());
}
// Turn 'store undef, Ptr' -> nothing.
if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
return Chain;
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
// NOTE: If the original store is volatile, this transform must not increase
// the number of stores. For example, on x86-32 an f64 can be stored in one
// processor operation but an i64 (which is not legal) requires two. So the
// transform should not be done in this case.
if (Value.getOpcode() != ISD::TargetConstantFP) {
SDValue Tmp;
switch (CFP->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unknown FP type");
case MVT::f16: // We don't do this for these yet.
case MVT::f80:
case MVT::f128:
case MVT::ppcf128:
break;
case MVT::f32:
if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
bitcastToAPInt().getZExtValue(), MVT::i32);
return DAG.getStore(Chain, SDLoc(N), Tmp,
Ptr, ST->getMemOperand());
}
break;
case MVT::f64:
if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
!ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
getZExtValue(), MVT::i64);
return DAG.getStore(Chain, SDLoc(N), Tmp,
Ptr, ST->getMemOperand());
}
if (!ST->isVolatile() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
// 64-bit integer store into two 32-bit stores.
uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
const MDNode *TBAAInfo = ST->getTBAAInfo();
SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
ST->getAlignment(), TBAAInfo);
Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
Ptr, ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal,
Alignment, TBAAInfo);
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
St0, St1);
}
break;
}
}
}
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment())
return DAG.getTruncStore(Chain, SDLoc(N), Value,
Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align,
ST->getTBAAInfo());
}
}
// Try transforming a pair floating point load / store ops to integer
// load / store ops.
SDValue NewST = TransformFPLoadStorePair(N);
if (NewST.getNode())
return NewST;
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
if (UseAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
// If there is a better chain.
if (Chain != BetterChain) {
SDValue ReplStore;
// Replace the chain to avoid dependency.
if (ST->isTruncatingStore()) {
ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
ST->getMemoryVT(), ST->getMemOperand());
} else {
ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
ST->getMemOperand());
}
// Create token to keep both nodes around.
SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
MVT::Other, Chain, ReplStore);
// Make sure the new and old chains are cleaned up.
AddToWorkList(Token.getNode());
// Don't add users to work list.
return CombineTo(N, Token, false);
}
}
// Try transforming N to an indexed store.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
// FIXME: is there such a thing as a truncating indexed store?
if (ST->isTruncatingStore() && ST->isUnindexed() &&
Value.getValueType().isInteger()) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
SDValue Shorter =
GetDemandedBits(Value,
APInt::getLowBitsSet(
Value.getValueType().getScalarType().getSizeInBits(),
ST->getMemoryVT().getScalarType().getSizeInBits()));
AddToWorkList(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
Ptr, ST->getMemoryVT(), ST->getMemOperand());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value,
APInt::getLowBitsSet(
Value.getValueType().getScalarType().getSizeInBits(),
ST->getMemoryVT().getScalarType().getSizeInBits())))
return SDValue(N, 0);
}
// If this is a load followed by a store to the same location, then the store
// is dead/noop.
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
ST->isUnindexed() && !ST->isVolatile() &&
// There can't be any side effects between the load and store, such as
// a call or store.
Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
// The store is dead, remove it.
return Chain;
}
}
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
// truncating store. We can do this even if this is already a truncstore.
if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
&& Value.getNode()->hasOneUse() && ST->isUnindexed() &&
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
ST->getMemoryVT())) {
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
Ptr, ST->getMemoryVT(), ST->getMemOperand());
}
// Only perform this optimization before the types are legal, because we
// don't want to perform this optimization on every DAGCombine invocation.
if (!LegalTypes) {
bool EverChanged = false;
do {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
bool Changed = MergeConsecutiveStores(ST);
EverChanged |= Changed;
if (!Changed) break;
} while (ST->getOpcode() != ISD::DELETED_NODE);
if (EverChanged)
return SDValue(N, 0);
}
return ReduceLoadOpStoreWidth(N);
}
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
SDValue EltNo = N->getOperand(2);
SDLoc dl(N);
// If the inserted element is an UNDEF, just use the input vector.
if (InVal.getOpcode() == ISD::UNDEF)
return InVec;
EVT VT = InVec.getValueType();
// If we can't generate a legal BUILD_VECTOR, exit
if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
return SDValue();
// Check that we know which element is being inserted
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
SmallVector<SDValue, 8> Ops;
// Do not combine these two vectors if the output vector will not replace
// the input vector.
if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
Ops.append(InVec.getNode()->op_begin(),
InVec.getNode()->op_end());
} else if (InVec.getOpcode() == ISD::UNDEF) {
unsigned NElts = VT.getVectorNumElements();
Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
} else {
return SDValue();
}
// Insert the element
if (Elt < Ops.size()) {
// All the operands of BUILD_VECTOR must have the same type;
// we enforce that here.
EVT OpVT = Ops[0].getValueType();
if (InVal.getValueType() != OpVT)
InVal = OpVT.bitsGT(InVal.getValueType()) ?
DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
Ops[Elt] = InVal;
}
// Return the new vector
return DAG.getNode(ISD::BUILD_VECTOR, dl,
VT, &Ops[0], Ops.size());
}
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
EVT VT = InVec.getValueType();
EVT NVT = N->getValueType(0);
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
SDValue InOp = InVec.getOperand(0);
if (InOp.getValueType() != NVT) {
assert(InOp.getValueType().isInteger() && NVT.isInteger());
return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
}
return InOp;
}
SDValue EltNo = N->getOperand(1);
bool ConstEltNo = isa<ConstantSDNode>(EltNo);
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
// we may introduce new vector instructions which are not backed by TD
// patterns. For example on AVX, extracting elements from a wide vector
// without using extract_subvector.
if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
&& ConstEltNo && !LegalOperations) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int NumElem = VT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
// Find the new index to extract from.
int OrigElt = SVOp->getMaskElt(Elt);
// Extracting an undef index is undef.
if (OrigElt == -1)
return DAG.getUNDEF(NVT);
// Select the right vector half to extract from.
if (OrigElt < NumElem) {
InVec = InVec->getOperand(0);
} else {
InVec = InVec->getOperand(1);
OrigElt -= NumElem;
}
EVT IndexTy = TLI.getVectorIdxTy();
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
InVec, DAG.getConstant(OrigElt, IndexTy));
}
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations) return SDValue();
// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
if (ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
bool NewLoad = false;
bool BCNumEltsChanged = false;
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
// If the result of load has to be truncated, then it's not necessarily
// profitable.
if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
return SDValue();
if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
EVT BCVT = InVec.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
BCNumEltsChanged = true;
InVec = InVec.getOperand(0);
ExtVT = BCVT.getVectorElementType();
NewLoad = true;
}
LoadSDNode *LN0 = NULL;
const ShuffleVectorSDNode *SVN = NULL;
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
InVec.getOperand(0).getValueType() == ExtVT &&
ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
LN0 = cast<LoadSDNode>(InVec.getOperand(0));
} else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
// =>
// (load $addr+1*size)
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
// If the bit convert changed the number of elements, it is unsafe
// to examine the mask.
if (BCNumEltsChanged)
return SDValue();
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = VT.getVectorNumElements();
int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
InVec = InVec.getOperand(0);
}
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
}
}
// Make sure we found a non-volatile load and the extractelement is
// the only use.
if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
return SDValue();
// If Idx was -1 above, Elt is going to be -1, so just return undef.
if (Elt == -1)
return DAG.getUNDEF(LVT);
unsigned Align = LN0->getAlignment();
if (NewLoad) {
// Check the resultant load doesn't need a higher alignment than the
// original load.
unsigned NewAlign =
TLI.getDataLayout()
->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
return SDValue();
Align = NewAlign;
}
SDValue NewPtr = LN0->getBasePtr();
unsigned PtrOff = 0;
if (Elt) {
PtrOff = LVT.getSizeInBits() * Elt / 8;
EVT PtrType = NewPtr.getValueType();
if (TLI.isBigEndian())
PtrOff = VT.getSizeInBits() / 8 - PtrOff;
NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
DAG.getConstant(PtrOff, PtrType));
}
// The replacement we need to do here is a little tricky: we need to
// replace an extractelement of a load with a load.
// Use ReplaceAllUsesOfValuesWith to do the replacement.
// Note that this replacement assumes that the extractvalue is the only
// use of the load; that's okay because we don't want to perform this
// transformation in other cases anyway.
SDValue Load;
SDValue Chain;
if (NVT.bitsGT(LVT)) {
// If the result type of vextract is wider than the load, then issue an
// extending load instead.
ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
? ISD::ZEXTLOAD : ISD::EXTLOAD;
Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
LVT, LN0->isVolatile(), LN0->isNonTemporal(),
Align, LN0->getTBAAInfo());
Chain = Load.getValue(1);
} else {
Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->isInvariant(), Align, LN0->getTBAAInfo());
Chain = Load.getValue(1);
if (NVT.bitsLT(LVT))
Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
else
Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
}
WorkListRemover DeadNodes(*this);
SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
SDValue To[] = { Load, Chain };
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
// Since we're explcitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
AddToWorkList(Load.getNode());
AddUsersToWorkList(Load.getNode()); // Add users too
// Make sure to revisit this node to clean it up; it will usually be dead.
AddToWorkList(N);
return SDValue(N, 0);
}
return SDValue();
}
// Simplify (build_vec (ext )) to (bitcast (build_vec ))
SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
// We perform this optimization post type-legalization because
// the type-legalizer often scalarizes integer-promoted vectors.
// Performing this optimization before may create bit-casts which
// will be type-legalized to complex code sequences.
// We perform this optimization only before the operation legalizer because we
// may introduce illegal operations.
if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
return SDValue();
unsigned NumInScalars = N->getNumOperands();
SDLoc dl(N);
EVT VT = N->getValueType(0);
// Check to see if this is a BUILD_VECTOR of a bunch of values
// which come from any_extend or zero_extend nodes. If so, we can create
// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
// optimizations. We do not handle sign-extend because we can't fill the sign
// using shuffles.
EVT SourceType = MVT::Other;
bool AllAnyExt = true;
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
// Ignore undef inputs.
if (In.getOpcode() == ISD::UNDEF) continue;
bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
// Abort if the element is not an extension.
if (!ZeroExt && !AnyExt) {
SourceType = MVT::Other;
break;
}
// The input is a ZeroExt or AnyExt. Check the original type.
EVT InTy = In.getOperand(0).getValueType();
// Check that all of the widened source types are the same.
if (SourceType == MVT::Other)
// First time.
SourceType = InTy;
else if (InTy != SourceType) {
// Multiple income types. Abort.
SourceType = MVT::Other;
break;
}
// Check if all of the extends are ANY_EXTENDs.
AllAnyExt &= AnyExt;
}
// In order to have valid types, all of the inputs must be extended from the
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
EVT OutScalarTy = VT.getScalarType();
bool ValidTypes = SourceType != MVT::Other &&
isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
isPowerOf2_32(SourceType.getSizeInBits());
// Create a new simpler BUILD_VECTOR sequence which other optimizations can
// turn into a single shuffle instruction.
if (!ValidTypes)
return SDValue();
bool isLE = TLI.isLittleEndian();
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
DAG.getConstant(0, SourceType);
unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
// Populate the new build_vector
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDValue Cast = N->getOperand(i);
assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
Cast.getOpcode() == ISD::ZERO_EXTEND ||
Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
SDValue In;
if (Cast.getOpcode() == ISD::UNDEF)
In = DAG.getUNDEF(SourceType);
else
In = Cast->getOperand(0);
unsigned Index = isLE ? (i * ElemRatio) :
(i * ElemRatio + (ElemRatio - 1));
assert(Index < Ops.size() && "Invalid index");
Ops[Index] = In;
}
// The type of the new BUILD_VECTOR node.
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
"Invalid vector size");
// Check if the new vector type is legal.
if (!isTypeLegal(VecVT)) return SDValue();
// Make the new BUILD_VECTOR.
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
// The new BUILD_VECTOR node has the potential to be further optimized.
AddToWorkList(BV.getNode());
// Bitcast to the desired type.
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumInScalars = N->getNumOperands();
SDLoc dl(N);
EVT SrcVT = MVT::Other;
unsigned Opcode = ISD::DELETED_NODE;
unsigned NumDefs = 0;
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
unsigned Opc = In.getOpcode();
if (Opc == ISD::UNDEF)
continue;
// If all scalar values are floats and converted from integers.
if (Opcode == ISD::DELETED_NODE &&
(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
Opcode = Opc;
}
if (Opc != Opcode)
return SDValue();
EVT InVT = In.getOperand(0).getValueType();
// If all scalar values are typed differently, bail out. It's chosen to
// simplify BUILD_VECTOR of integer types.
if (SrcVT == MVT::Other)
SrcVT = InVT;
if (SrcVT != InVT)
return SDValue();
NumDefs++;
}
// If the vector has just one element defined, it's not worth to fold it into
// a vectorized one.
if (NumDefs < 2)
return SDValue();
assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
&& "Should only handle conversion from integer to float.");
assert(SrcVT != MVT::Other && "Cannot determine source type!");
EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
return SDValue();
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
if (In.getOpcode() == ISD::UNDEF)
Opnds.push_back(DAG.getUNDEF(SrcVT));
else
Opnds.push_back(In.getOperand(0));
}
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
&Opnds[0], Opnds.size());
AddToWorkList(BV.getNode());
return DAG.getNode(Opcode, dl, VT, BV);
}
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
SDLoc dl(N);
EVT VT = N->getValueType(0);
// A vector built entirely of undefs is undef.
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
SDValue V = reduceBuildVecExtToExtBuildVec(N);
if (V.getNode())
return V;
V = reduceBuildVecConvertToConvertBuildVec(N);
if (V.getNode())
return V;
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
// at most two distinct vectors, turn this into a shuffle node.
// May only combine to shuffle after legalize if shuffle is legal.
if (LegalOperations &&
!TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
return SDValue();
SDValue VecIn1, VecIn2;
for (unsigned i = 0; i != NumInScalars; ++i) {
// Ignore undef inputs.
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
// If this input is something other than a EXTRACT_VECTOR_ELT with a
// constant index, bail out.
if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
VecIn1 = VecIn2 = SDValue(0, 0);
break;
}
// We allow up to two distinct input vectors.
SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
continue;
if (VecIn1.getNode() == 0) {
VecIn1 = ExtractedFromVec;
} else if (VecIn2.getNode() == 0) {
VecIn2 = ExtractedFromVec;
} else {
// Too many inputs.
VecIn1 = VecIn2 = SDValue(0, 0);
break;
}
}
// If everything is good, we can make a shuffle operation.
if (VecIn1.getNode()) {
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
Mask.push_back(-1);
continue;
}
// If extracting from the first vector, just use the index directly.
SDValue Extract = N->getOperand(i);
SDValue ExtVal = Extract.getOperand(1);
if (Extract.getOperand(0) == VecIn1) {
unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
if (ExtIndex > VT.getVectorNumElements())
return SDValue();
Mask.push_back(ExtIndex);
continue;
}
// Otherwise, use InIdx + VecSize
unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
Mask.push_back(Idx+NumInScalars);
}
// We can't generate a shuffle node with mismatched input and output types.
// Attempt to transform a single input vector to the correct type.
if ((VT != VecIn1.getValueType())) {
// We don't support shuffeling between TWO values of different types.
if (VecIn2.getNode() != 0)
return SDValue();
// We only support widening of vectors which are half the size of the
// output registers. For example XMM->YMM widening on X86 with AVX.
if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
return SDValue();
// If the input vector type has a different base type to the output
// vector type, bail out.
if (VecIn1.getValueType().getVectorElementType() !=
VT.getVectorElementType())
return SDValue();
// Widen the input vector by adding undef values.
VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
}
// If VecIn2 is unused then change it to undef.
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
// Check that we were able to transform all incoming values to the same
// type.
if (VecIn2.getValueType() != VecIn1.getValueType() ||
VecIn1.getValueType() != VT)
return SDValue();
// Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
if (!isTypeLegal(VT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.
SDValue Ops[2];
Ops[0] = VecIn1;
Ops[1] = VecIn2;
return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
}
return SDValue();
}
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
// EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
// inputs come from at most two distinct vectors, turn this into a shuffle
// node.
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
return N->getOperand(0);
// Check if all of the operands are undefs.
EVT VT = N->getValueType(0);
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
// Optimize concat_vectors where one of the vectors is undef.
if (N->getNumOperands() == 2 &&
N->getOperand(1)->getOpcode() == ISD::UNDEF) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
// Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
if (In->getOpcode() == ISD::BITCAST &&
!In->getOperand(0)->getValueType(0).isVector()) {
SDValue Scalar = In->getOperand(0);
EVT SclTy = Scalar->getValueType(0);
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return SDValue();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
VT.getSizeInBits() / SclTy.getSizeInBits());
if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
return SDValue();
SDLoc dl = SDLoc(N);
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
return DAG.getNode(ISD::BITCAST, dl, VT, Res);
}
}
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
// place the incoming vectors at the exact same location.
SDValue SingleSource = SDValue();
unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDValue Op = N->getOperand(i);
if (Op.getOpcode() == ISD::UNDEF)
continue;
// Check if this is the identity extract:
if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return SDValue();
// Find the single incoming vector for the extract_subvector.
if (SingleSource.getNode()) {
if (Op.getOperand(0) != SingleSource)
return SDValue();
} else {
SingleSource = Op.getOperand(0);
// Check the source type is the same as the type of the result.
// If not, this concat may extend the vector, so we can not
// optimize it away.
if (SingleSource.getValueType() != N->getValueType(0))
return SDValue();
}
unsigned IdentityIndex = i * PartNumElem;
ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
// The extract index must be constant.
if (!CS)
return SDValue();
// Check that we are reading from the identity index.
if (CS->getZExtValue() != IdentityIndex)
return SDValue();
}
if (SingleSource.getNode())
return SingleSource;
return SDValue();
}
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
if (V->getOpcode() == ISD::CONCAT_VECTORS) {
// Combine:
// (extract_subvec (concat V1, V2, ...), i)
// Into:
// Vi if possible
// Only operand 0 is checked as 'concat' assumes all inputs of the same
// type.
if (V->getOperand(0).getValueType() != NVT)
return SDValue();
unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
unsigned NumElems = NVT.getVectorNumElements();
assert((Idx % NumElems) == 0 &&
"IDX in concat is not a multiple of the result vector length.");
return V->getOperand(Idx / NumElems);
}
// Skip bitcasting
if (V->getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
SDLoc dl(N);
// Handle only simple case where vector being inserted and vector
// being extracted are of same type, and are half size of larger vectors.
EVT BigVT = V->getOperand(0).getValueType();
EVT SmallVT = V->getOperand(1).getValueType();
if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
return SDValue();
// Only handle cases where both indexes are constants with the same type.
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
if (InsIdx && ExtIdx &&
InsIdx->getValueType(0).getSizeInBits() <= 64 &&
ExtIdx->getValueType(0).getSizeInBits() <= 64) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
// indices are equal or bit offsets are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
DAG.getNode(ISD::BITCAST, dl,
N->getOperand(0).getValueType(),
V->getOperand(0)), N->getOperand(1));
}
}
return SDValue();
}
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
SmallVector<SDValue, 4> Ops;
EVT ConcatVT = N0.getOperand(0).getValueType();
unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
unsigned NumConcats = NumElts / NumElemsPerConcat;
// Look at every vector that's inserted. We're looking for exact
// subvector-sized copies from a concatenated vector
for (unsigned I = 0; I != NumConcats; ++I) {
// Make sure we're dealing with a copy.
unsigned Begin = I * NumElemsPerConcat;
bool AllUndef = true, NoUndef = true;
for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
if (SVN->getMaskElt(J) >= 0)
AllUndef = false;
else
NoUndef = false;
}
if (NoUndef) {
if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
return SDValue();
for (unsigned J = 1; J != NumElemsPerConcat; ++J)
if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
return SDValue();
unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
if (FirstElt < N0.getNumOperands())
Ops.push_back(N0.getOperand(FirstElt));
else
Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
} else if (AllUndef) {
Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
} else { // Mixed with general masks and undefs, can't do optimization.
return SDValue();
}
}
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(),
Ops.size());
}
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
// Canonicalize shuffle undef, undef -> undef
if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
return DAG.getUNDEF(VT);
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
// Canonicalize shuffle v, v -> v, undef
if (N0 == N1) {
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx >= (int)NumElts) Idx -= NumElts;
NewMask.push_back(Idx);
}
return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
&NewMask[0]);
}
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
if (N0.getOpcode() == ISD::UNDEF) {
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx >= 0) {
if (Idx >= (int)NumElts)
Idx -= NumElts;
else
Idx = -1; // remove reference to lhs
}
NewMask.push_back(Idx);
}
return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT),
&NewMask[0]);
}
// Remove references to rhs if it is undef
if (N1.getOpcode() == ISD::UNDEF) {
bool Changed = false;
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx >= (int)NumElts) {
Idx = -1;
Changed = true;
}
NewMask.push_back(Idx);
}
if (Changed)
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]);
}
// If it is a splat, check if the argument vector is another splat or a
// build_vector with all scalar elements the same.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
if (V->getOpcode() == ISD::BITCAST) {
SDValue ConvInput = V->getOperand(0);
if (ConvInput.getValueType().isVector() &&
ConvInput.getValueType().getVectorNumElements() == NumElts)
V = ConvInput.getNode();
}
if (V->getOpcode() == ISD::BUILD_VECTOR) {
assert(V->getNumOperands() == NumElts &&
"BUILD_VECTOR has wrong number of operands");
SDValue Base;
bool AllSame = true;
for (unsigned i = 0; i != NumElts; ++i) {
if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
Base = V->getOperand(i);
break;
}
}
// Splat of <u, u, u, u>, return <u, u, u, u>
if (!Base.getNode())
return N0;
for (unsigned i = 0; i != NumElts; ++i) {
if (V->getOperand(i) != Base) {
AllSame = false;
break;
}
}
// Splat of <x, x, x, x>, return <x, x, x, x>
if (AllSame)
return N0;
}
}
if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
Level < AfterLegalizeVectorOps &&
(N1.getOpcode() == ISD::UNDEF ||
(N1.getOpcode() == ISD::CONCAT_VECTORS &&
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
SDValue V = partitionShuffleOfConcats(N, DAG);
if (V.getNode())
return V;
}
// If this shuffle node is simply a swizzle of another shuffle node,
// and it reverses the swizzle of the previous shuffle then we can
// optimize shuffle(shuffle(x, undef), undef) -> x.
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
N1.getOpcode() == ISD::UNDEF) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
// Shuffle nodes can only reverse shuffles with a single non-undef value.
if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
return SDValue();
// The incoming shuffle must be of the same type as the result of the
// current shuffle.
assert(OtherSV->getOperand(0).getValueType() == VT &&
"Shuffle types don't match");
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
assert(Idx < (int)NumElts && "Index references undef operand");
// Next, this index comes from the first value, which is the incoming
// shuffle. Adopt the incoming index.
if (Idx >= 0)
Idx = OtherSV->getMaskElt(Idx);
// The combined shuffle must map each index to itself.
if (Idx >= 0 && (unsigned)Idx != i)
return SDValue();
}
return OtherSV->getOperand(0);
}
return SDValue();
}
/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
/// an AND to a vector_shuffle with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (N->getOpcode() == ISD::AND) {
if (RHS.getOpcode() == ISD::BITCAST)
RHS = RHS.getOperand(0);
if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<int, 8> Indices;
unsigned NumElts = RHS.getNumOperands();
for (unsigned i = 0; i != NumElts; ++i) {
SDValue Elt = RHS.getOperand(i);
if (!isa<ConstantSDNode>(Elt))
return SDValue();
if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
Indices.push_back(i);
else if (cast<ConstantSDNode>(Elt)->isNullValue())
Indices.push_back(NumElts);
else
return SDValue();
}
// Let's see if the target supports this vector_shuffle.
EVT RVT = RHS.getValueType();
if (!TLI.isVectorClearMaskLegal(Indices, RVT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.
EVT EltVT = RVT.getVectorElementType();
SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
DAG.getConstant(0, EltVT));
SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
RVT, &ZeroOps[0], ZeroOps.size());
LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
}
}
return SDValue();
}
/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
"SimplifyVBinOp only works on vectors!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Shuffle = XformToShuffleWithZero(N);
if (Shuffle.getNode()) return Shuffle;
// If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
// this operation.
if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
RHS.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
SDValue LHSOp = LHS.getOperand(i);
SDValue RHSOp = RHS.getOperand(i);
// If these two elements can't be folded, bail out.
if ((LHSOp.getOpcode() != ISD::UNDEF &&
LHSOp.getOpcode() != ISD::Constant &&
LHSOp.getOpcode() != ISD::ConstantFP) ||
(RHSOp.getOpcode() != ISD::UNDEF &&
RHSOp.getOpcode() != ISD::Constant &&
RHSOp.getOpcode() != ISD::ConstantFP))
break;
// Can't fold divide by zero.
if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
N->getOpcode() == ISD::FDIV) {
if ((RHSOp.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
(RHSOp.getOpcode() == ISD::ConstantFP &&
cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
break;
}
EVT VT = LHSOp.getValueType();
EVT RVT = RHSOp.getValueType();
if (RVT != VT) {
// Integer BUILD_VECTOR operands may have types larger than the element
// size (e.g., when the element type is not legal). Prior to type
// legalization, the types may not match between the two BUILD_VECTORS.
// Truncate one of the operands to make them match.
if (RVT.getSizeInBits() > VT.getSizeInBits()) {
RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp);
} else {
LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp);
VT = RVT;
}
}
SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
LHSOp, RHSOp);
if (FoldOp.getOpcode() != ISD::UNDEF &&
FoldOp.getOpcode() != ISD::Constant &&
FoldOp.getOpcode() != ISD::ConstantFP)
break;
Ops.push_back(FoldOp);
AddToWorkList(FoldOp.getNode());
}
if (Ops.size() == LHS.getNumOperands())
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
LHS.getValueType(), &Ops[0], Ops.size());
}
return SDValue();
}
/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
"SimplifyVUnaryOp only works on vectors!");
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
// Operand is a BUILD_VECTOR node, see if we can constant fold it.
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
SDValue Op = N0.getOperand(i);
if (Op.getOpcode() != ISD::UNDEF &&
Op.getOpcode() != ISD::ConstantFP)
break;
EVT EltVT = Op.getValueType();
SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op);
if (FoldOp.getOpcode() != ISD::UNDEF &&
FoldOp.getOpcode() != ISD::ConstantFP)
break;
Ops.push_back(FoldOp);
AddToWorkList(FoldOp.getNode());
}
if (Ops.size() != N0.getNumOperands())
return SDValue();
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
N0.getValueType(), &Ops[0], Ops.size());
}
SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
SDValue N1, SDValue N2){
assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If we got a simplified select_cc node back from SimplifySelectCC, then
// break it down into a new SETCC node, and a new SELECT node, and then return
// the SELECT node, since we were called with a SELECT node.
if (SCC.getNode()) {
// Check to see if we got a select_cc back (to turn into setcc/select).
// Otherwise, just return whatever node we got back, like fabs.
if (SCC.getOpcode() == ISD::SELECT_CC) {
SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
N0.getValueType(),
SCC.getOperand(0), SCC.getOperand(1),
SCC.getOperand(4));
AddToWorkList(SETCC.getNode());
return DAG.getSelect(SDLoc(SCC), SCC.getValueType(),
SCC.getOperand(2), SCC.getOperand(3), SETCC);
}
return SCC;
}
return SDValue();
}
/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
/// are the two values being selected between, see if we can simplify the
/// select. Callers of this should assume that TheSelect is deleted if this
/// returns true. As such, they should return the appropriate thing (e.g. the
/// node) back to the top-level of the DAG combiner loop to avoid it being
/// looked at.
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
// Cannot simplify select with vector condition
if (TheSelect->getOperand(0).getValueType().isVector()) return false;
// If this is a select from two identical things, try to pull the operation
// through the select.
if (LHS.getOpcode() != RHS.getOpcode() ||
!LHS.hasOneUse() || !RHS.hasOneUse())
return false;
// If this is a load and the token chain is identical, replace the select
// of two loads with a load through a select of the address to load from.
// This triggers in things like "select bool X, 10.0, 123.0" after the FP
// constants have been dropped into the constant pool.
if (LHS.getOpcode() == ISD::LOAD) {
LoadSDNode *LLD = cast<LoadSDNode>(LHS);
LoadSDNode *RLD = cast<LoadSDNode>(RHS);
// Token chains must be identical.
if (LHS.getOperand(0) != RHS.getOperand(0) ||
// Do not let this transformation reduce the number of volatile loads.
LLD->isVolatile() || RLD->isVolatile() ||
// If this is an EXTLOAD, the VT's must match.
LLD->getMemoryVT() != RLD->getMemoryVT() ||
// If this is an EXTLOAD, the kind of extension must match.
(LLD->getExtensionType() != RLD->getExtensionType() &&
// The only exception is if one of the extensions is anyext.
LLD->getExtensionType() != ISD::EXTLOAD &&
RLD->getExtensionType() != ISD::EXTLOAD) ||
// FIXME: this discards src value information. This is
// over-conservative. It would be beneficial to be able to remember
// both potential memory locations. Since we are discarding
// src value info, don't do the transformation if the memory
// locations are not in the default address space.
LLD->getPointerInfo().getAddrSpace() != 0 ||
RLD->getPointerInfo().getAddrSpace() != 0 ||
!TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
LLD->getBasePtr().getValueType()))
return false;
// Check that the select condition doesn't reach either load. If so,
// folding this will induce a cycle into the DAG. If not, this is safe to
// xform, so create a select of the addresses.
SDValue Addr;
if (TheSelect->getOpcode() == ISD::SELECT) {
SDNode *CondNode = TheSelect->getOperand(0).getNode();
if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
(RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
return false;
// The loads must not depend on one another.
if (LLD->isPredecessorOf(RLD) ||
RLD->isPredecessorOf(LLD))
return false;
Addr = DAG.getSelect(SDLoc(TheSelect),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0), LLD->getBasePtr(),
RLD->getBasePtr());
} else { // Otherwise SELECT_CC
SDNode *CondLHS = TheSelect->getOperand(0).getNode();
SDNode *CondRHS = TheSelect->getOperand(1).getNode();
if ((LLD->hasAnyUseOfValue(1) &&
(LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
(RLD->hasAnyUseOfValue(1) &&
(RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
return false;
Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0),
TheSelect->getOperand(1),
LLD->getBasePtr(), RLD->getBasePtr(),
TheSelect->getOperand(4));
}
SDValue Load;
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
Load = DAG.getLoad(TheSelect->getValueType(0),
SDLoc(TheSelect),
// FIXME: Discards pointer and TBAA info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->isVolatile(), LLD->isNonTemporal(),
LLD->isInvariant(), LLD->getAlignment());
} else {
Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
RLD->getExtensionType() : LLD->getExtensionType(),
SDLoc(TheSelect),
TheSelect->getValueType(0),
// FIXME: Discards pointer and TBAA info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->getMemoryVT(), LLD->isVolatile(),
LLD->isNonTemporal(), LLD->getAlignment());
}
// Users of the select now use the result of the load.
CombineTo(TheSelect, Load);
// Users of the old loads now use the new load's chain. We know the
// old-load value is dead now.
CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
return true;
}
return false;
}
/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
/// where 'cond' is the comparison specified by CC.
SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3,
ISD::CondCode CC, bool NotExtCompare) {
// (x ? y : y) -> y.
if (N2 == N3) return N2;
EVT VT = N2.getValueType();
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
// Determine if the condition we're dealing with is constant
SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
N0, N1, CC, DL, false);
if (SCC.getNode()) AddToWorkList(SCC.getNode());
ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
// fold select_cc true, x, y -> x
if (SCCC && !SCCC->isNullValue())
return N2;
// fold select_cc false, x, y -> y
if (SCCC && SCCC->isNullValue())
return N3;
// Check to see if we can simplify the select into an fabs node
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
// Allow either -0.0 or 0.0
if (CFP->getValueAPF().isZero()) {
// select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
N0 == N2 && N3.getOpcode() == ISD::FNEG &&
N2 == N3.getOperand(0))
return DAG.getNode(ISD::FABS, DL, VT, N0);
// select (setl[te] X, +/-0.0), fneg(X), X -> fabs
if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
N0 == N3 && N2.getOpcode() == ISD::FNEG &&
N2.getOperand(0) == N3)
return DAG.getNode(ISD::FABS, DL, VT, N3);
}
}
// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
// in it. This is a win when the constant is not otherwise available because
// it replaces two constant pool loads with one. We only do this if the FP
// type is known to be legal, because if it isn't, then we are before legalize
// types an we want the other legalization to happen first (e.g. to avoid
// messing with soft float) and if the ConstantFP is not legal, because if
// it is legal, we may not need to store the FP constant in a constant pool.
if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
if (TLI.isTypeLegal(N2.getValueType()) &&
(TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
TargetLowering::Legal) &&
// If both constants have multiple uses, then we won't need to do an
// extra load, they are likely around in registers for other users.
(TV->hasOneUse() || FV->hasOneUse())) {
Constant *Elts[] = {
const_cast<ConstantFP*>(FV->getConstantFPValue()),
const_cast<ConstantFP*>(TV->getConstantFPValue())
};
Type *FPTy = Elts[0]->getType();
const DataLayout &TD = *TLI.getDataLayout();
// Create a ConstantArray of the two constants.
Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
TD.getPrefTypeAlignment(FPTy));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
// Get the offsets to the 0 and 1 element of the array so that we can
// select between them.
SDValue Zero = DAG.getIntPtrConstant(0);
unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
SDValue One = DAG.getIntPtrConstant(EltSize);
SDValue Cond = DAG.getSetCC(DL,
getSetCCResultType(N0.getValueType()),
N0, N1, CC);
AddToWorkList(Cond.getNode());
SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
Cond, One, Zero);
AddToWorkList(CstOffset.getNode());
CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
CstOffset);
AddToWorkList(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(), false,
false, false, Alignment);
}
}
// Check to see if we can perform the "gzip trick", transforming
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
(N1C->isNullValue() || // (a < 0) ? b : 0
(N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
EVT XType = N0.getValueType();
EVT AType = N2.getValueType();
if (XType.bitsGE(AType)) {
// and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
// single-bit constant.
if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
unsigned ShCtV = N2C->getAPIntValue().logBase2();
ShCtV = XType.getSizeInBits()-ShCtV-1;
SDValue ShCt = DAG.getConstant(ShCtV,
getShiftAmountTy(N0.getValueType()));
SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
XType, N0, ShCt);
AddToWorkList(Shift.getNode());
if (XType.bitsGT(AType)) {
Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
AddToWorkList(Shift.getNode());
}
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
}
SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
XType, N0,
DAG.getConstant(XType.getSizeInBits()-1,
getShiftAmountTy(N0.getValueType())));
AddToWorkList(Shift.getNode());
if (XType.bitsGT(AType)) {
Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
AddToWorkList(Shift.getNode());
}
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
}
}
// fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
// where y is has a single bit set.
// A plaintext description would be, we can turn the SELECT_CC into an AND
// when the condition can be materialized as an all-ones register. Any
// single bit-test can be materialized as an all-ones register with
// shift-left and shift-right-arith.
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
N0->getValueType(0) == VT &&
N1C && N1C->isNullValue() &&
N2C && N2C->isNullValue()) {
SDValue AndLHS = N0->getOperand(0);
ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
// Shift the tested bit over the sign bit.
APInt AndMask = ConstAndRHS->getAPIntValue();
SDValue ShlAmt =
DAG.getConstant(AndMask.countLeadingZeros(),
getShiftAmountTy(AndLHS.getValueType()));
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
// Now arithmetic right shift it all the way over, so the result is either
// all-ones, or zero.
SDValue ShrAmt =
DAG.getConstant(AndMask.getBitWidth()-1,
getShiftAmountTy(Shl.getValueType()));
SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
}
}
// fold select C, 16, 0 -> shl C, 4
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
TLI.getBooleanContents(N0.getValueType().isVector()) ==
TargetLowering::ZeroOrOneBooleanContent) {
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.
if (NotExtCompare && N2C->getAPIntValue() == 1)
return SDValue();
// Get a SetCC of the condition
// NOTE: Don't create a SETCC if it's not legal on this target.
if (!LegalOperations ||
TLI.isOperationLegal(ISD::SETCC,
LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) {
SDValue Temp, SCC;
// cast from setcc result type to select result type
if (LegalTypes) {
SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
N0, N1, CC);
if (N2.getValueType().bitsLT(SCC.getValueType()))
Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
N2.getValueType());
else
Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
N2.getValueType(), SCC);
} else {
SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
N2.getValueType(), SCC);
}
AddToWorkList(SCC.getNode());
AddToWorkList(Temp.getNode());
if (N2C->getAPIntValue() == 1)
return Temp;
// shl setcc result by log2 n2c
return DAG.getNode(
ISD::SHL, DL, N2.getValueType(), Temp,
DAG.getConstant(N2C->getAPIntValue().logBase2(),
getShiftAmountTy(Temp.getValueType())));
}
}
// Check to see if this is the equivalent of setcc
// FIXME: Turn all of these into setcc if setcc if setcc is legal
// otherwise, go ahead with the folds.
if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
EVT XType = N0.getValueType();
if (!LegalOperations ||
TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC);
if (Res.getValueType() != VT)
Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
return Res;
}
// fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::CTLZ, XType))) {
SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
DAG.getConstant(Log2_32(XType.getSizeInBits()),
getShiftAmountTy(Ctlz.getValueType())));
}
// fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0),
XType, DAG.getConstant(0, XType), N0);
SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType);
return DAG.getNode(ISD::SRL, DL, XType,
DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
DAG.getConstant(XType.getSizeInBits()-1,
getShiftAmountTy(XType)));
}
// fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0,
DAG.getConstant(XType.getSizeInBits()-1,
getShiftAmountTy(N0.getValueType())));
return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
}
}
// Check to see if this is an integer abs.
// select_cc setg[te] X, 0, X, -X ->
// select_cc setgt X, -1, X, -X ->
// select_cc setl[te] X, 0, -X, X ->
// select_cc setlt X, 1, -X, X ->
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
if (N1C) {
ConstantSDNode *SubC = NULL;
if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
(N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
(N1C->isOne() && CC == ISD::SETLT)) &&
N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
EVT XType = N0.getValueType();
if (SubC && SubC->isNullValue() && XType.isInteger()) {
SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType,
N0,
DAG.getConstant(XType.getSizeInBits()-1,
getShiftAmountTy(N0.getValueType())));
SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
XType, N0, Shift);
AddToWorkList(Shift.getNode());
AddToWorkList(Add.getNode());
return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
}
}
return SDValue();
}
/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
SDValue N1, ISD::CondCode Cond,
SDLoc DL, bool foldBooleans) {
TargetLowering::DAGCombinerInfo
DagCombineInfo(DAG, Level, false, this);
return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
}
/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
std::vector<SDNode*> Built;
SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
ii != ee; ++ii)
AddToWorkList(*ii);
return S;
}
/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
std::vector<SDNode*> Built;
SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
ii != ee; ++ii)
AddToWorkList(*ii);
return S;
}
/// FindBaseOffset - Return true if base is a frame index, which is known not
// to alias with anything but itself. Provides base object and offset as
// results.
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, const void *&CV) {
// Assume it is a primitive operation.
Base = Ptr; Offset = 0; GV = 0; CV = 0;
// If it's an adding a simple constant then integrate the offset.
if (Base.getOpcode() == ISD::ADD) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
Base = Base.getOperand(0);
Offset += C->getZExtValue();
}
}
// Return the underlying GlobalValue, and update the Offset. Return false
// for GlobalAddressSDNode since the same GlobalAddress may be represented
// by multiple nodes with different offsets.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
GV = G->getGlobal();
Offset += G->getOffset();
return false;
}
// Return the underlying Constant value, and update the Offset. Return false
// for ConstantSDNodes since the same constant pool entry may be represented
// by multiple nodes with different offsets.
if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
: (const void *)C->getConstVal();
Offset += C->getOffset();
return false;
}
// If it's any of the following then it can't alias with anything but itself.
return isa<FrameIndexSDNode>(Base);
}
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2, bool IsVolatile2,
const Value *SrcValue2, int SrcValueOffset2,
unsigned SrcValueAlign2,
const MDNode *TBAAInfo2) const {
// If they are the same then they must be aliases.
if (Ptr1 == Ptr2) return true;
// If they are both volatile then they cannot be reordered.
if (IsVolatile1 && IsVolatile2) return true;
// Gather base node and offset information.
SDValue Base1, Base2;
int64_t Offset1, Offset2;
const GlobalValue *GV1, *GV2;
const void *CV1, *CV2;
bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
// If they have a same base address then check to see if they overlap.
if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
// It is possible for different frame indices to alias each other, mostly
// when tail call optimization reuses return address slots for arguments.
// To catch this case, look up the actual index of frame indices to compute
// the real alias relationship.
if (isFrameIndex1 && isFrameIndex2) {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
}
// Otherwise, if we know what the bases are, and they aren't identical, then
// we know they cannot alias.
if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
return false;
// If we know required SrcValue1 and SrcValue2 have relatively large alignment
// compared to the size and offset of the access, we may be able to prove they
// do not alias. This check is conservative for now to catch cases created by
// splitting vector types.
if ((SrcValueAlign1 == SrcValueAlign2) &&
(SrcValueOffset1 != SrcValueOffset2) &&
(Size1 == Size2) && (SrcValueAlign1 > Size1)) {
int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
// There is no overlap between these relatively aligned accesses of similar
// size, return no alias.
if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
return false;
}
bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
if (UseAA && SrcValue1 && SrcValue2) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
AliasAnalysis::AliasResult AAResult =
AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
if (AAResult == AliasAnalysis::NoAlias)
return false;
}
// Otherwise we have to assume they alias.
return true;
}
bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
SDValue Ptr0, Ptr1;
int64_t Size0, Size1;
bool IsVolatile0, IsVolatile1;
const Value *SrcValue0, *SrcValue1;
int SrcValueOffset0, SrcValueOffset1;
unsigned SrcValueAlign0, SrcValueAlign1;
const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
SrcValueAlign0, SrcTBAAInfo0);
FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
SrcValueAlign1, SrcTBAAInfo1);
return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
SrcValueAlign0, SrcTBAAInfo0,
Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
SrcValueAlign1, SrcTBAAInfo1);
}
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a nonvolatile load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size, bool &IsVolatile,
const Value *&SrcValue,
int &SrcValueOffset,
unsigned &SrcValueAlign,
const MDNode *&TBAAInfo) const {
LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
Ptr = LS->getBasePtr();
Size = LS->getMemoryVT().getSizeInBits() >> 3;
IsVolatile = LS->isVolatile();
SrcValue = LS->getSrcValue();
SrcValueOffset = LS->getSrcValueOffset();
SrcValueAlign = LS->getOriginalAlignment();
TBAAInfo = LS->getTBAAInfo();
return isa<LoadSDNode>(LS) && !IsVolatile;
}
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVectorImpl<SDValue> &Aliases) {
SmallVector<SDValue, 8> Chains; // List of chains to visit.
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
// Get alias information for node.
SDValue Ptr;
int64_t Size;
bool IsVolatile;
const Value *SrcValue;
int SrcValueOffset;
unsigned SrcValueAlign;
const MDNode *SrcTBAAInfo;
bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue,
SrcValueOffset, SrcValueAlign, SrcTBAAInfo);
// Starting off.
Chains.push_back(OriginalChain);
unsigned Depth = 0;
// Look at each chain and determine if it is an alias. If so, add it to the
// aliases list. If not, then continue up the chain looking for the next
// candidate.
while (!Chains.empty()) {
SDValue Chain = Chains.back();
Chains.pop_back();
// For TokenFactor nodes, look at each operand and only continue up the
// chain until we find two aliases. If we've seen two aliases, assume we'll
// find more and revert to original chain since the xform is unlikely to be
// profitable.
//
// FIXME: The depth check could be made to return the last non-aliasing
// chain we found before we hit a tokenfactor rather than the original
// chain.
if (Depth > 6 || Aliases.size() == 2) {
Aliases.clear();
Aliases.push_back(OriginalChain);
break;
}
// Don't bother if we've been before.
if (!Visited.insert(Chain.getNode()))
continue;
switch (Chain.getOpcode()) {
case ISD::EntryToken:
// Entry token is ideal chain operand, but handled in FindBetterChain.
break;
case ISD::LOAD:
case ISD::STORE: {
// Get alias information for Chain.
SDValue OpPtr;
int64_t OpSize;
bool OpIsVolatile;
const Value *OpSrcValue;
int OpSrcValueOffset;
unsigned OpSrcValueAlign;
const MDNode *OpSrcTBAAInfo;
bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
OpIsVolatile, OpSrcValue, OpSrcValueOffset,
OpSrcValueAlign,
OpSrcTBAAInfo);
// If chain is alias then stop here.
if (!(IsLoad && IsOpLoad) &&
isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset,
SrcValueAlign, SrcTBAAInfo,
OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset,
OpSrcValueAlign, OpSrcTBAAInfo)) {
Aliases.push_back(Chain);
} else {
// Look further up the chain.
Chains.push_back(Chain.getOperand(0));
++Depth;
}
break;
}
case ISD::TokenFactor:
// We have to check each of the operands of the token factor for "small"
// token factors, so we queue them up. Adding the operands to the queue
// (stack) in reverse order maintains the original order and increases the
// likelihood that getNode will find a matching token factor (CSE.)
if (Chain.getNumOperands() > 16) {
Aliases.push_back(Chain);
break;
}
for (unsigned n = Chain.getNumOperands(); n;)
Chains.push_back(Chain.getOperand(--n));
++Depth;
break;
default:
// For all other instructions we will just have to take what we can get.
Aliases.push_back(Chain);
break;
}
}
}
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
/// for a better chain (aliasing node.)
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
// Accumulate all the aliases to this node.
GatherAllAliases(N, OldChain, Aliases);
// If no operands then chain to entry token.
if (Aliases.size() == 0)
return DAG.getEntryNode();
// If a single operand then chain to it. We don't need to revisit it.
if (Aliases.size() == 1)
return Aliases[0];
// Construct a custom tailored token factor.
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
&Aliases[0], Aliases.size());
}
// SelectionDAG::Combine - This is the entry point for the file.
//
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
CodeGenOpt::Level OptLevel) {
/// run - This is the main entry point to this class.
///
DAGCombiner(*this, AA, OptLevel).Run(Level);
}
Index: head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
===================================================================
--- head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (revision 265924)
+++ head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (revision 265925)
@@ -1,778 +1,779 @@
//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the SelectionDAG::LegalizeVectors method.
//
// The vector legalizer looks for vector operations which might need to be
// scalarized and legalizes them. This is a separate step from Legalize because
// scalarizing can introduce illegal types. For example, suppose we have an
// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
// operation, which introduces nodes with the illegal type i64 which must be
// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
// the operation must be unrolled, which introduces nodes with the illegal
// type i8 which must be promoted.
//
// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
// or operations that happen to take a vector which are custom-lowered;
// the legalization for such operations never produces nodes
// with illegal types, so it's okay to put off legalizing them until
// SelectionDAG::Legalize runs.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
namespace {
class VectorLegalizer {
SelectionDAG& DAG;
const TargetLowering &TLI;
bool Changed; // Keep track of whether anything changed
/// LegalizedNodes - For nodes that are of legal width, and that have more
/// than one use, this map indicates what regularized operand to use. This
/// allows us to avoid legalizing the same thing more than once.
SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
// Adds a node to the translation cache
void AddLegalizedOperand(SDValue From, SDValue To) {
LegalizedNodes.insert(std::make_pair(From, To));
// If someone requests legalization of the new node, return itself.
if (From != To)
LegalizedNodes.insert(std::make_pair(To, To));
}
// Legalizes the given node
SDValue LegalizeOp(SDValue Op);
// Assuming the node is legal, "legalize" the results
SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
// Implements unrolling a VSETCC.
SDValue UnrollVSETCC(SDValue Op);
// Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
// isn't legal.
// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
// SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
SDValue ExpandSEXTINREG(SDValue Op);
// Implement vselect in terms of XOR, AND, OR when blend is not supported
// by the target.
SDValue ExpandVSELECT(SDValue Op);
SDValue ExpandSELECT(SDValue Op);
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
// Implements vector promotion; this is essentially just bitcasting the
// operands to a different type and bitcasting the result back to the
// original type.
SDValue PromoteVectorOp(SDValue Op);
// Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
// operand to the next size up.
SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
public:
bool Run();
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
};
bool VectorLegalizer::Run() {
// Before we start legalizing vector nodes, check if there are any vectors.
bool HasVectors = false;
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) {
// Check if the values of the nodes contain vectors. We don't need to check
// the operands because we are going to check their values at some point.
for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
J != E; ++J)
HasVectors |= J->isVector();
// If we found a vector node we can start the legalization.
if (HasVectors)
break;
}
// If this basic block has no vectors then no need to legalize vectors.
if (!HasVectors)
return false;
// The legalize process is inherently a bottom-up recursive process (users
// legalize their uses before themselves). Given infinite stack space, we
// could just start legalizing on the root and traverse the whole graph. In
// practice however, this causes us to run out of stack space on large basic
// blocks. To avoid this problem, compute an ordering of the nodes where each
// node is only legalized after all of its operands are legalized.
DAG.AssignTopologicalOrder();
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
LegalizeOp(SDValue(I, 0));
// Finally, it's possible the root changed. Get the new root.
SDValue OldRoot = DAG.getRoot();
assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
DAG.setRoot(LegalizedNodes[OldRoot]);
LegalizedNodes.clear();
// Remove dead nodes now.
DAG.RemoveDeadNodes();
return Changed;
}
SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
// Generic legalization: just pass the operand through.
for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
return Result.getValue(Op.getResNo());
}
SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
// Note that LegalizeOp may be reentered even from single-use nodes, which
// means that we always must cache transformed nodes.
DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
if (I != LegalizedNodes.end()) return I->second;
SDNode* Node = Op.getNode();
// Legalize the operands
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
Ops.push_back(LegalizeOp(Node->getOperand(i)));
SDValue Result =
SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
return TranslateLegalizeResults(Op, Result);
Changed = true;
return LegalizeOp(ExpandLoad(Op));
}
} else if (Op.getOpcode() == ISD::STORE) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT StVT = ST->getMemoryVT();
MVT ValVT = ST->getValue().getSimpleValueType();
if (StVT.isVector() && ST->isTruncatingStore())
switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom:
Changed = true;
return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG));
case TargetLowering::Expand:
Changed = true;
return LegalizeOp(ExpandStore(Op));
}
}
bool HasVectorValue = false;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
J != E;
++J)
HasVectorValue |= J->isVector();
if (!HasVectorValue)
return TranslateLegalizeResults(Op, Result);
EVT QueryType;
switch (Op.getOpcode()) {
default:
return TranslateLegalizeResults(Op, Result);
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
+ case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::SELECT:
case ISD::VSELECT:
case ISD::SELECT_CC:
case ISD::SETCC:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FNEG:
case ISD::FABS:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
case ISD::FPOWI:
case ISD::FPOW:
case ISD::FLOG:
case ISD::FLOG2:
case ISD::FLOG10:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FCEIL:
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FROUND:
case ISD::FFLOOR:
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
case ISD::FMA:
case ISD::SIGN_EXTEND_INREG:
QueryType = Node->getValueType(0);
break;
case ISD::FP_ROUND_INREG:
QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
QueryType = Node->getOperand(0).getValueType();
break;
}
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
case TargetLowering::Promote:
switch (Op.getOpcode()) {
default:
// "Promote" the operation by bitcasting
Result = PromoteVectorOp(Op);
Changed = true;
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
// "Promote" the operation by extending the operand.
Result = PromoteVectorOpINT_TO_FP(Op);
Changed = true;
break;
}
break;
case TargetLowering::Legal: break;
case TargetLowering::Custom: {
SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
if (Tmp1.getNode()) {
Result = Tmp1;
break;
}
// FALL THROUGH
}
case TargetLowering::Expand:
if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
Result = ExpandSEXTINREG(Op);
else if (Node->getOpcode() == ISD::VSELECT)
Result = ExpandVSELECT(Op);
else if (Node->getOpcode() == ISD::SELECT)
Result = ExpandSELECT(Op);
else if (Node->getOpcode() == ISD::UINT_TO_FP)
Result = ExpandUINT_TO_FLOAT(Op);
else if (Node->getOpcode() == ISD::FNEG)
Result = ExpandFNEG(Op);
else if (Node->getOpcode() == ISD::SETCC)
Result = UnrollVSETCC(Op);
else
Result = DAG.UnrollVectorOp(Op.getNode());
break;
}
// Make sure that the generated code is itself legal.
if (Result != Op) {
Result = LegalizeOp(Result);
Changed = true;
}
// Note that LegalizeOp may be reentered even from single-use nodes, which
// means that we always must cache transformed nodes.
AddLegalizedOperand(Op, Result);
return Result;
}
SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
// Vector "promotion" is basically just bitcasting and doing the operation
// in a different type. For example, x86 promotes ISD::AND on v2i32 to
// v1i64.
MVT VT = Op.getSimpleValueType();
assert(Op.getNode()->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
SDLoc dl(Op);
SmallVector<SDValue, 4> Operands(Op.getNumOperands());
for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
if (Op.getOperand(j).getValueType().isVector())
Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
else
Operands[j] = Op.getOperand(j);
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
// INT_TO_FP operations may require the input operand be promoted even
// when the type is otherwise legal.
EVT VT = Op.getOperand(0).getValueType();
assert(Op.getNode()->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
// Normal getTypeToPromoteTo() doesn't work here, as that will promote
// by widening the vector w/ the same element width and twice the number
// of elements. We want the other way around, the same number of elements,
// each twice the width.
//
// Increase the bitwidth of the element to the next pow-of-two
// (which is greater than 8 bits).
unsigned NumElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits());
assert(EltVT.isSimple() && "Promoting to a non-simple vector type!");
// Build a new vector type and check if it is legal.
MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
SDLoc dl(Op);
SmallVector<SDValue, 4> Operands(Op.getNumOperands());
unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
ISD::SIGN_EXTEND;
for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
if (Op.getOperand(j).getValueType().isVector())
Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
else
Operands[j] = Op.getOperand(j);
}
return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0],
Operands.size());
}
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
SDLoc dl(Op);
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
EVT SrcVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = LD->getExtensionType();
SmallVector<SDValue, 8> Vals;
SmallVector<SDValue, 8> LoadChains;
unsigned NumElem = SrcVT.getVectorNumElements();
EVT SrcEltVT = SrcVT.getScalarType();
EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
// When elements in a vector is not byte-addressable, we cannot directly
// load each element by advancing pointer, which could only address bytes.
// Instead, we load all significant words, mask bits off, and concatenate
// them to form each element. Finally, they are extended to destination
// scalar type to build the destination vector.
EVT WideVT = TLI.getPointerTy();
assert(WideVT.isRound() &&
"Could not handle the sophisticated case when the widest integer is"
" not power of 2.");
assert(WideVT.bitsGE(SrcEltVT) &&
"Type is not legalized?");
unsigned WideBytes = WideVT.getStoreSize();
unsigned Offset = 0;
unsigned RemainingBytes = SrcVT.getStoreSize();
SmallVector<SDValue, 8> LoadVals;
while (RemainingBytes > 0) {
SDValue ScalarLoad;
unsigned LoadBytes = WideBytes;
if (RemainingBytes >= LoadBytes) {
ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), LD->getAlignment(),
LD->getTBAAInfo());
} else {
EVT LoadVT = WideVT;
while (RemainingBytes < LoadBytes) {
LoadBytes >>= 1; // Reduce the load size by half.
LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
}
ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Offset),
LoadVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment(),
LD->getTBAAInfo());
}
RemainingBytes -= LoadBytes;
Offset += LoadBytes;
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(LoadBytes, BasePTR.getValueType()));
LoadVals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
}
// Extract bits, pack and extend/trunc them into destination type.
unsigned SrcEltBits = SrcEltVT.getSizeInBits();
SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT);
unsigned BitOffset = 0;
unsigned WideIdx = 0;
unsigned WideBits = WideVT.getSizeInBits();
for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
SDValue Lo, Hi, ShAmt;
if (BitOffset < WideBits) {
ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT));
Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
}
BitOffset += SrcEltBits;
if (BitOffset >= WideBits) {
WideIdx++;
Offset -= WideBits;
if (Offset > 0) {
ShAmt = DAG.getConstant(SrcEltBits - Offset,
TLI.getShiftAmountTy(WideVT));
Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
}
}
if (Hi.getNode())
Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
switch (ExtType) {
default: llvm_unreachable("Unknown extended-load op!");
case ISD::EXTLOAD:
Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
break;
case ISD::ZEXTLOAD:
Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
break;
case ISD::SEXTLOAD:
ShAmt = DAG.getConstant(WideBits - SrcEltBits,
TLI.getShiftAmountTy(WideVT));
Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
break;
}
Vals.push_back(Lo);
}
} else {
unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
for (unsigned Idx=0; Idx<NumElem; Idx++) {
SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
Op.getNode()->getValueType(0).getScalarType(),
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcVT.getScalarType(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment(), LD->getTBAAInfo());
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, BasePTR.getValueType()));
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
}
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&LoadChains[0], LoadChains.size());
SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
Op.getNode()->getValueType(0), &Vals[0], Vals.size());
AddLegalizedOperand(Op.getValue(0), Value);
AddLegalizedOperand(Op.getValue(1), NewChain);
return (Op.getResNo() ? NewChain : Value);
}
SDValue VectorLegalizer::ExpandStore(SDValue Op) {
SDLoc dl(Op);
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
SDValue Chain = ST->getChain();
SDValue BasePTR = ST->getBasePtr();
SDValue Value = ST->getValue();
EVT StVT = ST->getMemoryVT();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
const MDNode *TBAAInfo = ST->getTBAAInfo();
unsigned NumElem = StVT.getVectorNumElements();
// The type of the data we want to save
EVT RegVT = Value.getValueType();
EVT RegSclVT = RegVT.getScalarType();
// The type of data as saved in memory.
EVT MemSclVT = StVT.getScalarType();
// Cast floats into integers
unsigned ScalarSize = MemSclVT.getSizeInBits();
// Round odd types to the next pow of two.
if (!isPowerOf2_32(ScalarSize))
ScalarSize = NextPowerOf2(ScalarSize);
// Store Stride in bytes
unsigned Stride = ScalarSize/8;
// Extract each of the elements from the original vector
// and save them into memory individually.
SmallVector<SDValue, 8> Stores;
for (unsigned Idx = 0; Idx < NumElem; Idx++) {
SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy()));
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
isVolatile, isNonTemporal, Alignment, TBAAInfo);
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, BasePTR.getValueType()));
Stores.push_back(Store);
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&Stores[0], Stores.size());
AddLegalizedOperand(Op, TF);
return TF;
}
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
// Lower a select instruction where the condition is a scalar and the
// operands are vectors. Lower this select to VSELECT and implement it
// using XOR AND OR. The selector bit is broadcasted.
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue Mask = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
assert(VT.isVector() && !Mask.getValueType().isVector()
&& Op1.getValueType() == Op2.getValueType() && "Invalid type");
unsigned NumElem = VT.getVectorNumElements();
// If we can't even use the basic vector operations of
// AND,OR,XOR, we will have to scalarize the op.
// Notice that the operation may be 'promoted' which means that it is
// 'bitcasted' to another type which is handled.
// Also, we need to be able to construct a splat vector using BUILD_VECTOR.
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
return DAG.UnrollVectorOp(Op.getNode());
// Generate a mask operand.
EVT MaskTy = VT.changeVectorElementTypeToInteger();
// What is the size of each element in the vector mask.
EVT BitTy = MaskTy.getScalarType();
Mask = DAG.getSelect(DL, BitTy, Mask,
DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
DAG.getConstant(0, BitTy));
// Broadcast the mask so that the entire vector is all-one or all zero.
SmallVector<SDValue, 8> Ops(NumElem, Mask);
Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size());
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
// the mask is a vector of integers.
Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
SDValue AllOnes = DAG.getConstant(
APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy);
SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
}
SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
EVT VT = Op.getValueType();
// Make sure that the SRA and SHL instructions are available.
if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
return DAG.UnrollVectorOp(Op.getNode());
SDLoc DL(Op);
EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
unsigned BW = VT.getScalarType().getSizeInBits();
unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT);
Op = Op.getOperand(0);
Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
}
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
SDLoc DL(Op);
SDValue Mask = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
EVT VT = Mask.getValueType();
// If we can't even use the basic vector operations of
// AND,OR,XOR, we will have to scalarize the op.
// Notice that the operation may be 'promoted' which means that it is
// 'bitcasted' to another type which is handled.
// This operation also isn't safe with AND, OR, XOR when the boolean
// type is 0/1 as we need an all ones vector constant to mask with.
// FIXME: Sign extend 1 to all ones if thats legal on the target.
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
TLI.getBooleanContents(true) !=
TargetLowering::ZeroOrNegativeOneBooleanContent)
return DAG.UnrollVectorOp(Op.getNode());
// If the mask and the type are different sizes, unroll the vector op. This
// can occur when getSetCCResultType returns something that is different in
// size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits())
return DAG.UnrollVectorOp(Op.getNode());
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
// the mask is a vector of integers.
Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
SDValue AllOnes = DAG.getConstant(
APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT);
SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
}
SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
EVT VT = Op.getOperand(0).getValueType();
SDLoc DL(Op);
// Make sure that the SINT_TO_FP and SRL instructions are available.
if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
return DAG.UnrollVectorOp(Op.getNode());
EVT SVT = VT.getScalarType();
assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
"Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
unsigned BW = SVT.getSizeInBits();
SDValue HalfWord = DAG.getConstant(BW/2, VT);
// Constants to clear the upper part of the word.
// Notice that we can also use SHL+SHR, but using a constant is slightly
// faster on x86.
uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
// Two to the power of half-word-size.
SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
// Clear upper part of LO, lower HI
SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
// Convert hi and lo to floats
// Convert the hi part back to the upper values
SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
// Add the two halves
return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
}
SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
Zero, Op.getOperand(0));
}
return DAG.UnrollVectorOp(Op.getNode());
}
SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
EVT VT = Op.getValueType();
unsigned NumElems = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
EVT TmpEltVT = LHS.getValueType().getVectorElementType();
SDLoc dl(Op);
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < NumElems; ++i) {
SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
DAG.getConstant(i, TLI.getVectorIdxTy()));
SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
DAG.getConstant(i, TLI.getVectorIdxTy()));
Ops[i] = DAG.getNode(ISD::SETCC, dl,
TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT),
LHSElem, RHSElem, CC);
Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
DAG.getConstant(APInt::getAllOnesValue
(EltVT.getSizeInBits()), EltVT),
DAG.getConstant(0, EltVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
}
}
bool SelectionDAG::LegalizeVectors() {
return VectorLegalizer(*this).Run();
}
Index: head/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
===================================================================
--- head/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (revision 265924)
+++ head/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (revision 265925)
@@ -1,907 +1,912 @@
//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements the ScheduleDAG class, which is a base class used by
// scheduling implementation classes.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "pre-RA-sched"
#include "ScheduleDAGSDNodes.h"
#include "InstrEmitter.h"
#include "SDNodeDbgValue.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
STATISTIC(LoadsClustered, "Number of loads clustered together");
// This allows latency based scheduler to notice high latency instructions
// without a target itinerary. The choise if number here has more to do with
// balancing scheduler heursitics than with the actual machine latency.
static cl::opt<int> HighLatencyCycles(
"sched-high-latency-cycles", cl::Hidden, cl::init(10),
cl::desc("Roughly estimate the number of cycles that 'long latency'"
"instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
: ScheduleDAG(mf), BB(0), DAG(0),
InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
BB = bb;
DAG = dag;
// Clear the scheduler's SUnit DAG.
ScheduleDAG::clearDAG();
Sequence.clear();
// Invoke the target's selection of scheduler.
Schedule();
}
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
#ifndef NDEBUG
const SUnit *Addr = 0;
if (!SUnits.empty())
Addr = &SUnits[0];
#endif
SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
assert((Addr == 0 || Addr == &SUnits[0]) &&
"SUnits std::vector reallocated on the fly!");
SUnits.back().OrigNode = &SUnits.back();
SUnit *SU = &SUnits.back();
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
if (!N ||
(N->isMachineOpcode() &&
N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
SU->SchedulingPref = Sched::None;
else
SU->SchedulingPref = TLI.getSchedulingPreference(N);
return SU;
}
SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SUnit *SU = newSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->Latency = Old->Latency;
SU->isVRegCycle = Old->isVRegCycle;
SU->isCall = Old->isCall;
SU->isCallOp = Old->isCallOp;
SU->isTwoAddress = Old->isTwoAddress;
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
SU->isScheduleHigh = Old->isScheduleHigh;
SU->isScheduleLow = Old->isScheduleLow;
SU->SchedulingPref = Old->SchedulingPref;
Old->isCloned = true;
return SU;
}
/// CheckForPhysRegDependency - Check if the dependency between def and use of
/// a specified operand is a physical register dependency. If so, returns the
/// register and the cost of copying the register.
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
unsigned &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
return;
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
return;
unsigned ResNo = User->getOperand(2).getResNo();
if (Def->isMachineOpcode()) {
const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
if (ResNo >= II.getNumDefs() &&
II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
PhysReg = Reg;
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
Cost = RC->getCopyCost();
}
}
}
// Helper for AddGlue to clone node operands.
static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
SmallVectorImpl<EVT> &VTs,
SDValue ExtraOper = SDValue()) {
SmallVector<SDValue, 4> Ops;
for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
Ops.push_back(N->getOperand(I));
if (ExtraOper.getNode())
Ops.push_back(ExtraOper);
SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
MachineSDNode::mmo_iterator Begin = 0, End = 0;
MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
// Store memory references.
if (MN) {
Begin = MN->memoperands_begin();
End = MN->memoperands_end();
}
DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
// Reset the memory references
if (MN)
MN->setMemRefs(Begin, End);
}
static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
SmallVector<EVT, 4> VTs;
SDNode *GlueDestNode = Glue.getNode();
// Don't add glue from a node to itself.
if (GlueDestNode == N) return false;
// Don't add a glue operand to something that already uses glue.
if (GlueDestNode &&
N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
return false;
}
// Don't add glue to something that already has a glue value.
if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
VTs.push_back(N->getValueType(I));
if (AddGlue)
VTs.push_back(MVT::Glue);
CloneNodeWithValues(N, DAG, VTs, Glue);
return true;
}
// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
// node even though simply shrinking the value list is sufficient.
static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
!N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
"expected an unused glue value");
SmallVector<EVT, 4> VTs;
for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I)
VTs.push_back(N->getValueType(I));
CloneNodeWithValues(N, DAG, VTs);
}
/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
/// This function finds loads of the same base and different offsets. If the
/// offsets are not far apart (target specific), it add MVT::Glue inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
SDNode *Chain = 0;
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
Chain = Node->getOperand(NumOps-1).getNode();
if (!Chain)
return;
// Look for other loads of the same chain. Find loads that are loading from
// the same base pointer and different offsets.
SmallPtrSet<SDNode*, 16> Visited;
SmallVector<int64_t, 4> Offsets;
DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
bool Cluster = false;
SDNode *Base = Node;
+ // This algorithm requires a reasonably low use count before finding a match
+ // to avoid uselessly blowing up compile time in large blocks.
+ unsigned UseCount = 0;
for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
- I != E; ++I) {
+ I != E && UseCount < 100; ++I, ++UseCount) {
SDNode *User = *I;
if (User == Node || !Visited.insert(User))
continue;
int64_t Offset1, Offset2;
if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
Offset1 == Offset2)
// FIXME: Should be ok if they addresses are identical. But earlier
// optimizations really should have eliminated one of the loads.
continue;
if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
Offsets.push_back(Offset1);
O2SMap.insert(std::make_pair(Offset2, User));
Offsets.push_back(Offset2);
if (Offset2 < Offset1)
Base = User;
Cluster = true;
+ // Reset UseCount to allow more matches.
+ UseCount = 0;
}
if (!Cluster)
return;
// Sort them in increasing order.
std::sort(Offsets.begin(), Offsets.end());
// Check if the loads are close enough.
SmallVector<SDNode*, 4> Loads;
unsigned NumLoads = 0;
int64_t BaseOff = Offsets[0];
SDNode *BaseLoad = O2SMap[BaseOff];
Loads.push_back(BaseLoad);
for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
int64_t Offset = Offsets[i];
SDNode *Load = O2SMap[Offset];
if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
break; // Stop right here. Ignore loads that are further away.
Loads.push_back(Load);
++NumLoads;
}
if (NumLoads == 0)
return;
// Cluster loads by adding MVT::Glue outputs and inputs. This also
// ensure they are scheduled in order of increasing addresses.
SDNode *Lead = Loads[0];
SDValue InGlue = SDValue(0, 0);
if (AddGlue(Lead, InGlue, true, DAG))
InGlue = SDValue(Lead, Lead->getNumValues() - 1);
for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
bool OutGlue = I < E - 1;
SDNode *Load = Loads[I];
// If AddGlue fails, we could leave an unsused glue value. This should not
// cause any
if (AddGlue(Load, InGlue, OutGlue, DAG)) {
if (OutGlue)
InGlue = SDValue(Load, Load->getNumValues() - 1);
++LoadsClustered;
}
else if (!OutGlue && InGlue.getNode())
RemoveUnusedGlue(InGlue.getNode(), DAG);
}
}
/// ClusterNodes - Cluster certain nodes which should be scheduled together.
///
void ScheduleDAGSDNodes::ClusterNodes() {
for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
E = DAG->allnodes_end(); NI != E; ++NI) {
SDNode *Node = &*NI;
if (!Node || !Node->isMachineOpcode())
continue;
unsigned Opc = Node->getMachineOpcode();
const MCInstrDesc &MCID = TII->get(Opc);
if (MCID.mayLoad())
// Cluster loads from "near" addresses into combined SUnits.
ClusterNeighboringLoads(Node);
}
}
void ScheduleDAGSDNodes::BuildSchedUnits() {
// During scheduling, the NodeId field of SDNode is used to map SDNodes
// to their associated SUnits by holding SUnits table indices. A value
// of -1 means the SDNode does not yet have an associated SUnit.
unsigned NumNodes = 0;
for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
E = DAG->allnodes_end(); NI != E; ++NI) {
NI->setNodeId(-1);
++NumNodes;
}
// Reserve entries in the vector for each of the SUnits we are creating. This
// ensure that reallocation of the vector won't happen, so SUnit*'s won't get
// invalidated.
// FIXME: Multiply by 2 because we may clone nodes during scheduling.
// This is a temporary workaround.
SUnits.reserve(NumNodes * 2);
// Add all nodes in depth first order.
SmallVector<SDNode*, 64> Worklist;
SmallPtrSet<SDNode*, 64> Visited;
Worklist.push_back(DAG->getRoot().getNode());
Visited.insert(DAG->getRoot().getNode());
SmallVector<SUnit*, 8> CallSUnits;
while (!Worklist.empty()) {
SDNode *NI = Worklist.pop_back_val();
// Add all operands to the worklist unless they've already been added.
for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
if (Visited.insert(NI->getOperand(i).getNode()))
Worklist.push_back(NI->getOperand(i).getNode());
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
continue;
// If this node has already been processed, stop now.
if (NI->getNodeId() != -1) continue;
SUnit *NodeSUnit = newSUnit(NI);
// See if anything is glued to this node, if so, add them to glued
// nodes. Nodes can have at most one glue input and one glue output. Glue
// is required to be the last operand and result of a node.
// Scan up to find glued preds.
SDNode *N = NI;
while (N->getNumOperands() &&
N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
N = N->getOperand(N->getNumOperands()-1).getNode();
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
NodeSUnit->isCall = true;
}
// Scan down to find any glued succs.
N = NI;
while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
SDValue GlueVal(N, N->getNumValues()-1);
// There are either zero or one users of the Glue result.
bool HasGlueUse = false;
for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
UI != E; ++UI)
if (GlueVal.isOperandOf(*UI)) {
HasGlueUse = true;
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
N = *UI;
if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
NodeSUnit->isCall = true;
break;
}
if (!HasGlueUse) break;
}
if (NodeSUnit->isCall)
CallSUnits.push_back(NodeSUnit);
// Schedule zero-latency TokenFactor below any nodes that may increase the
// schedule height. Otherwise, ancestors of the TokenFactor may appear to
// have false stalls.
if (NI->getOpcode() == ISD::TokenFactor)
NodeSUnit->isScheduleLow = true;
// If there are glue operands involved, N is now the bottom-most node
// of the sequence of nodes that are glued together.
// Update the SUnit.
NodeSUnit->setNode(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
// Compute NumRegDefsLeft. This must be done before AddSchedEdges.
InitNumRegDefsLeft(NodeSUnit);
// Assign the Latency field of NodeSUnit using target-provided information.
computeLatency(NodeSUnit);
}
// Find all call operands.
while (!CallSUnits.empty()) {
SUnit *SU = CallSUnits.pop_back_val();
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->getOpcode() != ISD::CopyToReg)
continue;
SDNode *SrcN = SUNode->getOperand(2).getNode();
if (isPassiveNode(SrcN)) continue; // Not scheduled.
SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
SrcSU->isCallOp = true;
}
}
}
void ScheduleDAGSDNodes::AddSchedEdges() {
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = forceUnitLatencies();
// Pass 2: add the preds, succs, etc.
for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
SUnit *SU = &SUnits[su];
SDNode *MainNode = SU->getNode();
if (MainNode->isMachineOpcode()) {
unsigned Opc = MainNode->getMachineOpcode();
const MCInstrDesc &MCID = TII->get(Opc);
for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
SU->isTwoAddress = true;
break;
}
}
if (MCID.isCommutable())
SU->isCommutable = true;
}
// Find all predecessors and successors of the group.
for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
if (N->isMachineOpcode() &&
TII->get(N->getMachineOpcode()).getImplicitDefs()) {
SU->hasPhysRegClobbers = true;
unsigned NumUsed = InstrEmitter::CountResults(N);
while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
--NumUsed; // Skip over unused values at the end.
if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
SU->hasPhysRegDefs = true;
}
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDNode *OpN = N->getOperand(i).getNode();
if (isPassiveNode(OpN)) continue; // Not scheduled.
SUnit *OpSU = &SUnits[OpN->getNodeId()];
assert(OpSU && "Node has no SUnit!");
if (OpSU == SU) continue; // In the same group.
EVT OpVT = N->getOperand(i).getValueType();
assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
bool isChain = OpVT == MVT::Other;
unsigned PhysReg = 0;
int Cost = 1;
// Determine if this is a physical register dependency.
CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
assert((PhysReg == 0 || !isChain) &&
"Chain dependence via physreg data?");
// FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
// emits a copy from the physical register to a virtual register unless
// it requires a cross class copy (cost < 0). That means we are only
// treating "expensive to copy" register dependency as physical register
// dependency. This may change in the future though.
if (Cost >= 0 && !StressSched)
PhysReg = 0;
// If this is a ctrl dep, latency is 1.
unsigned OpLatency = isChain ? 1 : OpSU->Latency;
// Special-case TokenFactor chains as zero-latency.
if(isChain && OpN->getOpcode() == ISD::TokenFactor)
OpLatency = 0;
SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
: SDep(OpSU, SDep::Data, PhysReg);
Dep.setLatency(OpLatency);
if (!isChain && !UnitLatencies) {
computeOperandLatency(OpN, N, i, Dep);
ST.adjustSchedDependency(OpSU, SU, Dep);
}
if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
// Multiple register uses are combined in the same SUnit. For example,
// we could have a set of glued nodes with all their defs consumed by
// another set of glued nodes. Register pressure tracking sees this as
// a single use, so to keep pressure balanced we reduce the defs.
//
// We can't tell (without more book-keeping) if this results from
// glued nodes or duplicate operands. As long as we don't reduce
// NumRegDefsLeft to zero, we handle the common cases well.
--OpSU->NumRegDefsLeft;
}
}
}
}
}
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// glued together nodes with a single SUnit.
void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
// Cluster certain nodes which should be scheduled together.
ClusterNodes();
// Populate the SUnits array.
BuildSchedUnits();
// Compute all the scheduling dependencies between nodes.
AddSchedEdges();
}
// Initialize NumNodeDefs for the current Node's opcode.
void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
// Check for phys reg copy.
if (!Node)
return;
if (!Node->isMachineOpcode()) {
if (Node->getOpcode() == ISD::CopyFromReg)
NodeNumDefs = 1;
else
NodeNumDefs = 0;
return;
}
unsigned POpc = Node->getMachineOpcode();
if (POpc == TargetOpcode::IMPLICIT_DEF) {
// No register need be allocated for this.
NodeNumDefs = 0;
return;
}
unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
// Some instructions define regs that are not represented in the selection DAG
// (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
DefIdx = 0;
}
// Construct a RegDefIter for this SUnit and find the first valid value.
ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
const ScheduleDAGSDNodes *SD)
: SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
InitNodeNumDefs();
Advance();
}
// Advance to the next valid value defined by the SUnit.
void ScheduleDAGSDNodes::RegDefIter::Advance() {
for (;Node;) { // Visit all glued nodes.
for (;DefIdx < NodeNumDefs; ++DefIdx) {
if (!Node->hasAnyUseOfValue(DefIdx))
continue;
ValueType = Node->getSimpleValueType(DefIdx);
++DefIdx;
return; // Found a normal regdef.
}
Node = Node->getGluedNode();
if (Node == NULL) {
return; // No values left to visit.
}
InitNodeNumDefs();
}
}
void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
assert(SU->NumRegDefsLeft == 0 && "expect a new node");
for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
++SU->NumRegDefsLeft;
}
}
void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
SDNode *N = SU->getNode();
// TokenFactor operands are considered zero latency, and some schedulers
// (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
// whenever node latency is nonzero.
if (N && N->getOpcode() == ISD::TokenFactor) {
SU->Latency = 0;
return;
}
// Check to see if the scheduler cares about latencies.
if (forceUnitLatencies()) {
SU->Latency = 1;
return;
}
if (!InstrItins || InstrItins->isEmpty()) {
if (N && N->isMachineOpcode() &&
TII->isHighLatencyDef(N->getMachineOpcode()))
SU->Latency = HighLatencyCycles;
else
SU->Latency = 1;
return;
}
// Compute the latency for the node. We use the sum of the latencies for
// all nodes glued together into this SUnit.
SU->Latency = 0;
for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
if (N->isMachineOpcode())
SU->Latency += TII->getInstrLatency(InstrItins, N);
}
void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const{
// Check to see if the scheduler cares about latencies.
if (forceUnitLatencies())
return;
if (dep.getKind() != SDep::Data)
return;
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
if (Use->isMachineOpcode())
// Adjust the use operand index by num of defs.
OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
!BB->succ_empty()) {
unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
// This copy is a liveout value. It is likely coalesced, so reduce the
// latency so not to penalize the def.
// FIXME: need target specific adjustment here?
Latency = (Latency > 1) ? Latency - 1 : 1;
}
if (Latency >= 0)
dep.setLatency(Latency);
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
if (!SU->getNode()) {
dbgs() << "PHYS REG COPY\n";
return;
}
SU->getNode()->dump(DAG);
dbgs() << "\n";
SmallVector<SDNode *, 4> GluedNodes;
for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
dbgs() << " ";
GluedNodes.back()->dump(DAG);
dbgs() << "\n";
GluedNodes.pop_back();
}
#endif
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ScheduleDAGSDNodes::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
SU->dump(this);
else
dbgs() << "**** NOOP ****\n";
}
}
#endif
#ifndef NDEBUG
/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
/// their state is consistent with the nodes listed in Sequence.
///
void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
unsigned Noops = 0;
for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
if (!Sequence[i])
++Noops;
assert(Sequence.size() - Noops == ScheduledNodes &&
"The number of nodes scheduled doesn't match the expected number!");
}
#endif // NDEBUG
/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
static void
ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) {
if (!N->getHasDebugValue())
return;
// Opportunistically insert immediate dbg_value uses, i.e. those with source
// order number right after the N.
MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
if (DVs[i]->isInvalidated())
continue;
unsigned DVOrder = DVs[i]->getOrder();
if (!Order || DVOrder == ++Order) {
MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
if (DbgMI) {
Orders.push_back(std::make_pair(DVOrder, DbgMI));
BB->insert(InsertPos, DbgMI);
}
DVs[i]->setIsInvalidated();
}
}
}
// ProcessSourceNode - Process nodes with source order numbers. These are added
// to a vector which EmitSchedule uses to determine how to insert dbg_value
// instructions in the right order.
static void
ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
DenseMap<SDValue, unsigned> &VRBaseMap,
SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
SmallSet<unsigned, 8> &Seen) {
unsigned Order = N->getIROrder();
if (!Order || !Seen.insert(Order)) {
// Process any valid SDDbgValues even if node does not have any order
// assigned.
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
return;
}
MachineBasicBlock *BB = Emitter.getBlock();
if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
// Fast-isel may have inserted some instructions, in which case the
// BB->back().isPHI() test will not fire when we want it to.
prior(Emitter.getInsertPos())->isPHI()) {
// Did not insert any instruction.
Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
return;
}
Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
void ScheduleDAGSDNodes::
EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
MachineBasicBlock::iterator InsertPos) {
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
if (I->getSUnit()->CopyDstRC) {
// Copy to physical register.
DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
// Find the destination physical register.
unsigned Reg = 0;
for (SUnit::const_succ_iterator II = SU->Succs.begin(),
EE = SU->Succs.end(); II != EE; ++II) {
if (II->isCtrl()) continue; // ignore chain preds
if (II->getReg()) {
Reg = II->getReg();
break;
}
}
BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
.addReg(VRI->second);
} else {
// Copy from physical register.
assert(I->getReg() && "Unknown physical register!");
unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
(void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
.addReg(I->getReg());
}
break;
}
}
/// EmitSchedule - Emit the machine code in scheduled order. Return the new
/// InsertPos and MachineBasicBlock that contains this insertion
/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
/// not necessarily refer to returned BB. The emitter may split blocks.
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
DenseMap<SDValue, unsigned> VRBaseMap;
DenseMap<SUnit*, unsigned> CopyVRBaseMap;
SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
SmallSet<unsigned, 8> Seen;
bool HasDbg = DAG->hasDebugValues();
// If this is the first BB, emit byval parameter dbg_value's.
if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
for (; PDI != PDE; ++PDI) {
MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
if (DbgMI)
BB->insert(InsertPos, DbgMI);
}
}
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
SUnit *SU = Sequence[i];
if (!SU) {
// Null SUnit* is a noop.
TII->insertNoop(*Emitter.getBlock(), InsertPos);
continue;
}
// For pre-regalloc scheduling, create instructions corresponding to the
// SDNode and any glued SDNodes and append them to the block.
if (!SU->getNode()) {
// Emit a copy.
EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
continue;
}
SmallVector<SDNode *, 4> GluedNodes;
for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
SDNode *N = GluedNodes.back();
Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
GluedNodes.pop_back();
}
Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
Seen);
}
// Insert all the dbg_values which have not already been inserted in source
// order sequence.
if (HasDbg) {
MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
// Sort the source order instructions and use the order to insert debug
// values.
std::sort(Orders.begin(), Orders.end(), less_first());
SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
// Now emit the rest according to source order.
unsigned LastOrder = 0;
for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
unsigned Order = Orders[i].first;
MachineInstr *MI = Orders[i].second;
// Insert all SDDbgValue's whose order(s) are before "Order".
if (!MI)
continue;
for (; DI != DE &&
(*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
if ((*DI)->isInvalidated())
continue;
MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
if (DbgMI) {
if (!LastOrder)
// Insert to start of the BB (after PHIs).
BB->insert(BBBegin, DbgMI);
else {
// Insert at the instruction, which may be in a different
// block, if the block was split by a custom inserter.
MachineBasicBlock::iterator Pos = MI;
MI->getParent()->insert(Pos, DbgMI);
}
}
}
LastOrder = Order;
}
// Add trailing DbgValue's before the terminator. FIXME: May want to add
// some of them before one or more conditional branches?
SmallVector<MachineInstr*, 8> DbgMIs;
while (DI != DE) {
if (!(*DI)->isInvalidated())
if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
DbgMIs.push_back(DbgMI);
++DI;
}
MachineBasicBlock *InsertBB = Emitter.getBlock();
MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
}
InsertPos = Emitter.getInsertPos();
return Emitter.getBlock();
}
/// Return the basic block label.
std::string ScheduleDAGSDNodes::getDAGName() const {
return "sunit-dag." + BB->getFullName();
}
Index: head/contrib/llvm/lib/MC/MCAsmInfo.cpp
===================================================================
--- head/contrib/llvm/lib/MC/MCAsmInfo.cpp (revision 265924)
+++ head/contrib/llvm/lib/MC/MCAsmInfo.cpp (revision 265925)
@@ -1,140 +1,142 @@
//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines target asm properties related what form asm statements
// should take.
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Dwarf.h"
#include <cctype>
#include <cstring>
using namespace llvm;
MCAsmInfo::MCAsmInfo() {
PointerSize = 4;
CalleeSaveStackSlotSize = 4;
IsLittleEndian = true;
StackGrowsUp = false;
HasSubsectionsViaSymbols = false;
HasMachoZeroFillDirective = false;
HasMachoTBSSDirective = false;
HasStaticCtorDtorReferenceInStaticMode = false;
LinkerRequiresNonEmptyDwarfLines = false;
MaxInstLength = 4;
MinInstAlignment = 1;
DollarIsPC = false;
SeparatorString = ";";
CommentColumn = 40;
CommentString = "#";
LabelSuffix = ":";
DebugLabelSuffix = ":";
GlobalPrefix = "";
PrivateGlobalPrefix = ".";
LinkerPrivateGlobalPrefix = "";
InlineAsmStart = "APP";
InlineAsmEnd = "NO_APP";
Code16Directive = ".code16";
Code32Directive = ".code32";
Code64Directive = ".code64";
AssemblerDialect = 0;
AllowAtInName = false;
UseDataRegionDirectives = false;
ZeroDirective = "\t.zero\t";
AsciiDirective = "\t.ascii\t";
AscizDirective = "\t.asciz\t";
Data8bitsDirective = "\t.byte\t";
Data16bitsDirective = "\t.short\t";
Data32bitsDirective = "\t.long\t";
Data64bitsDirective = "\t.quad\t";
SunStyleELFSectionSwitchSyntax = false;
UsesELFSectionDirectiveForBSS = false;
AlignDirective = "\t.align\t";
AlignmentIsInBytes = true;
TextAlignFillValue = 0;
GPRel64Directive = 0;
GPRel32Directive = 0;
GlobalDirective = "\t.globl\t";
HasSetDirective = true;
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = true;
LCOMMDirectiveAlignmentType = LCOMM::NoAlignment;
HasDotTypeDotSizeDirective = true;
HasSingleParameterDotFile = true;
HasIdentDirective = false;
HasNoDeadStrip = false;
WeakRefDirective = 0;
- WeakDefDirective = 0;
- LinkOnceDirective = 0;
+ HasWeakDefDirective = false;
+ HasWeakDefCanBeHiddenDirective = false;
+ HasLinkOnceDirective = false;
HiddenVisibilityAttr = MCSA_Hidden;
HiddenDeclarationVisibilityAttr = MCSA_Hidden;
ProtectedVisibilityAttr = MCSA_Protected;
HasLEB128 = false;
SupportsDebugInformation = false;
ExceptionsType = ExceptionHandling::None;
DwarfUsesRelocationsAcrossSections = true;
+ DwarfFDESymbolsUseAbsDiff = false;
DwarfRegNumForCFI = false;
HasMicrosoftFastStdCallMangling = false;
NeedsDwarfSectionOffsetDirective = false;
}
MCAsmInfo::~MCAsmInfo() {
}
unsigned MCAsmInfo::getULEB128Size(uint64_t Value) {
unsigned Size = 0;
do {
Value >>= 7;
Size += sizeof(int8_t);
} while (Value);
return Size;
}
unsigned MCAsmInfo::getSLEB128Size(int64_t Value) {
unsigned Size = 0;
int Sign = Value >> (8 * sizeof(Value) - 1);
bool IsMore;
do {
unsigned Byte = Value & 0x7f;
Value >>= 7;
IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
Size += sizeof(int8_t);
} while (IsMore);
return Size;
}
const MCExpr *
MCAsmInfo::getExprForPersonalitySymbol(const MCSymbol *Sym,
unsigned Encoding,
MCStreamer &Streamer) const {
return getExprForFDESymbol(Sym, Encoding, Streamer);
}
const MCExpr *
MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
unsigned Encoding,
MCStreamer &Streamer) const {
if (!(Encoding & dwarf::DW_EH_PE_pcrel))
return MCSymbolRefExpr::Create(Sym, Streamer.getContext());
MCContext &Context = Streamer.getContext();
const MCExpr *Res = MCSymbolRefExpr::Create(Sym, Context);
MCSymbol *PCSym = Context.CreateTempSymbol();
Streamer.EmitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
return MCBinaryExpr::CreateSub(Res, PC, Context);
}
Index: head/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
===================================================================
--- head/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp (revision 265924)
+++ head/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp (revision 265925)
@@ -1,52 +1,52 @@
//===-- MCAsmInfoCOFF.cpp - COFF asm properties -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines target asm properties related what form asm statements
// should take in general on COFF-based targets
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoCOFF.h"
using namespace llvm;
void MCAsmInfoCOFF::anchor() { }
MCAsmInfoCOFF::MCAsmInfoCOFF() {
GlobalPrefix = "_";
// MingW 4.5 and later support .comm with log2 alignment, but .lcomm uses byte
// alignment.
COMMDirectiveAlignmentIsInBytes = false;
LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
HasDotTypeDotSizeDirective = false;
HasSingleParameterDotFile = false;
PrivateGlobalPrefix = "L"; // Prefix for private global symbols
WeakRefDirective = "\t.weak\t";
- LinkOnceDirective = "\t.linkonce discard\n";
+ HasLinkOnceDirective = true;
// Doesn't support visibility:
HiddenVisibilityAttr = HiddenDeclarationVisibilityAttr = MCSA_Invalid;
ProtectedVisibilityAttr = MCSA_Invalid;
// Set up DWARF directives
HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
SupportsDebugInformation = true;
HasMicrosoftFastStdCallMangling = true;
NeedsDwarfSectionOffsetDirective = true;
}
void MCAsmInfoMicrosoft::anchor() { }
MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() {
}
void MCAsmInfoGNUCOFF::anchor() { }
MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() {
}
Index: head/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
===================================================================
--- head/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp (revision 265924)
+++ head/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp (revision 265925)
@@ -1,62 +1,63 @@
//===-- MCAsmInfoDarwin.cpp - Darwin asm properties -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines target asm properties related what form asm statements
// should take in general on Darwin-based targets
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
void MCAsmInfoDarwin::anchor() { }
MCAsmInfoDarwin::MCAsmInfoDarwin() {
// Common settings for all Darwin targets.
// Syntax:
GlobalPrefix = "_";
PrivateGlobalPrefix = "L";
LinkerPrivateGlobalPrefix = "l";
HasSingleParameterDotFile = false;
HasSubsectionsViaSymbols = true;
AlignmentIsInBytes = false;
COMMDirectiveAlignmentIsInBytes = false;
LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
InlineAsmStart = " InlineAsm Start";
InlineAsmEnd = " InlineAsm End";
// Directives:
- WeakDefDirective = "\t.weak_definition ";
+ HasWeakDefDirective = true;
+ HasWeakDefCanBeHiddenDirective = true;
WeakRefDirective = "\t.weak_reference ";
ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
HasMachoZeroFillDirective = true; // Uses .zerofill
HasMachoTBSSDirective = true; // Uses .tbss
HasStaticCtorDtorReferenceInStaticMode = true;
// FIXME: Darwin 10 and newer don't need this.
LinkerRequiresNonEmptyDwarfLines = true;
// FIXME: Change this once MC is the system assembler.
HasAggressiveSymbolFolding = false;
HiddenVisibilityAttr = MCSA_PrivateExtern;
HiddenDeclarationVisibilityAttr = MCSA_Invalid;
// Doesn't support protected visibility.
ProtectedVisibilityAttr = MCSA_Invalid;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
DwarfUsesRelocationsAcrossSections = false;
}
Index: head/contrib/llvm/lib/MC/MCDwarf.cpp
===================================================================
--- head/contrib/llvm/lib/MC/MCDwarf.cpp (revision 265924)
+++ head/contrib/llvm/lib/MC/MCDwarf.cpp (revision 265925)
@@ -1,1520 +1,1524 @@
//===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCDwarf.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
// Given a special op, return the address skip amount (in units of
// DWARF2_LINE_MIN_INSN_LENGTH.
#define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
// The maximum address skip amount that can be encoded with a special op.
#define MAX_SPECIAL_ADDR_DELTA SPECIAL_ADDR(255)
// First special line opcode - leave room for the standard opcodes.
// Note: If you want to change this, you'll have to update the
// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().
#define DWARF2_LINE_OPCODE_BASE 13
// Minimum line offset in a special line info. opcode. This value
// was chosen to give a reasonable range of values.
#define DWARF2_LINE_BASE -5
// Range of line offsets in a special line info. opcode.
#define DWARF2_LINE_RANGE 14
static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) {
unsigned MinInsnLength = Context.getAsmInfo()->getMinInstAlignment();
if (MinInsnLength == 1)
return AddrDelta;
if (AddrDelta % MinInsnLength != 0) {
// TODO: report this error, but really only once.
;
}
return AddrDelta / MinInsnLength;
}
//
// This is called when an instruction is assembled into the specified section
// and if there is information from the last .loc directive that has yet to have
// a line entry made for it is made.
//
void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) {
if (!MCOS->getContext().getDwarfLocSeen())
return;
// Create a symbol at in the current section for use in the line entry.
MCSymbol *LineSym = MCOS->getContext().CreateTempSymbol();
// Set the value of the symbol to use for the MCLineEntry.
MCOS->EmitLabel(LineSym);
// Get the current .loc info saved in the context.
const MCDwarfLoc &DwarfLoc = MCOS->getContext().getCurrentDwarfLoc();
// Create a (local) line entry with the symbol and the current .loc info.
MCLineEntry LineEntry(LineSym, DwarfLoc);
// clear DwarfLocSeen saying the current .loc info is now used.
MCOS->getContext().ClearDwarfLocSeen();
// Get the MCLineSection for this section, if one does not exist for this
// section create it.
const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
MCOS->getContext().getMCLineSections();
MCLineSection *LineSection = MCLineSections.lookup(Section);
if (!LineSection) {
// Create a new MCLineSection. This will be deleted after the dwarf line
// table is created using it by iterating through the MCLineSections
// DenseMap.
LineSection = new MCLineSection;
// Save a pointer to the new LineSection into the MCLineSections DenseMap.
MCOS->getContext().addMCLineSection(Section, LineSection);
}
// Add the line entry to this section's entries.
LineSection->addLineEntry(LineEntry,
MCOS->getContext().getDwarfCompileUnitID());
}
//
// This helper routine returns an expression of End - Start + IntVal .
//
static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
const MCSymbol &Start,
const MCSymbol &End,
int IntVal) {
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
const MCExpr *Res =
MCSymbolRefExpr::Create(&End, Variant, MCOS.getContext());
const MCExpr *RHS =
MCSymbolRefExpr::Create(&Start, Variant, MCOS.getContext());
const MCExpr *Res1 =
MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext());
const MCExpr *Res2 =
MCConstantExpr::Create(IntVal, MCOS.getContext());
const MCExpr *Res3 =
MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext());
return Res3;
}
//
// This emits the Dwarf line table for the specified section from the entries
// in the LineSection.
//
static inline void EmitDwarfLineTable(MCStreamer *MCOS,
const MCSection *Section,
const MCLineSection *LineSection,
unsigned CUID) {
// This LineSection does not contain any LineEntry for the given Compile Unit.
if (!LineSection->containEntriesForID(CUID))
return;
unsigned FileNum = 1;
unsigned LastLine = 1;
unsigned Column = 0;
unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
unsigned Isa = 0;
MCSymbol *LastLabel = NULL;
// Loop through each MCLineEntry and encode the dwarf line number table.
for (MCLineSection::const_iterator
it = LineSection->getMCLineEntries(CUID).begin(),
ie = LineSection->getMCLineEntries(CUID).end(); it != ie; ++it) {
if (FileNum != it->getFileNum()) {
FileNum = it->getFileNum();
MCOS->EmitIntValue(dwarf::DW_LNS_set_file, 1);
MCOS->EmitULEB128IntValue(FileNum);
}
if (Column != it->getColumn()) {
Column = it->getColumn();
MCOS->EmitIntValue(dwarf::DW_LNS_set_column, 1);
MCOS->EmitULEB128IntValue(Column);
}
if (Isa != it->getIsa()) {
Isa = it->getIsa();
MCOS->EmitIntValue(dwarf::DW_LNS_set_isa, 1);
MCOS->EmitULEB128IntValue(Isa);
}
if ((it->getFlags() ^ Flags) & DWARF2_FLAG_IS_STMT) {
Flags = it->getFlags();
MCOS->EmitIntValue(dwarf::DW_LNS_negate_stmt, 1);
}
if (it->getFlags() & DWARF2_FLAG_BASIC_BLOCK)
MCOS->EmitIntValue(dwarf::DW_LNS_set_basic_block, 1);
if (it->getFlags() & DWARF2_FLAG_PROLOGUE_END)
MCOS->EmitIntValue(dwarf::DW_LNS_set_prologue_end, 1);
if (it->getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
MCOS->EmitIntValue(dwarf::DW_LNS_set_epilogue_begin, 1);
int64_t LineDelta = static_cast<int64_t>(it->getLine()) - LastLine;
MCSymbol *Label = it->getLabel();
// At this point we want to emit/create the sequence to encode the delta in
// line numbers and the increment of the address from the previous Label
// and the current Label.
const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
asmInfo->getPointerSize());
LastLine = it->getLine();
LastLabel = Label;
}
// Emit a DW_LNE_end_sequence for the end of the section.
// Using the pointer Section create a temporary label at the end of the
// section and use that and the LastLabel to compute the address delta
// and use INT64_MAX as the line delta which is the signal that this is
// actually a DW_LNE_end_sequence.
// Switch to the section to be able to create a symbol at its end.
// TODO: keep track of the last subsection so that this symbol appears in the
// correct place.
MCOS->SwitchSection(Section);
MCContext &context = MCOS->getContext();
// Create a symbol at the end of the section.
MCSymbol *SectionEnd = context.CreateTempSymbol();
// Set the value of the symbol, as we are at the end of the section.
MCOS->EmitLabel(SectionEnd);
// Switch back the dwarf line section.
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
asmInfo->getPointerSize());
}
//
// This emits the Dwarf file and the line tables.
//
const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) {
MCContext &context = MCOS->getContext();
// Switch to the section where the table will be emitted into.
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
const DenseMap<unsigned, MCSymbol *> &MCLineTableSymbols =
MCOS->getContext().getMCLineTableSymbols();
// CUID and MCLineTableSymbols are set in DwarfDebug, when DwarfDebug does
// not exist, CUID will be 0 and MCLineTableSymbols will be empty.
// Handle Compile Unit 0, the line table start symbol is the section symbol.
const MCSymbol *LineStartSym = EmitCU(MCOS, 0);
// Handle the rest of the Compile Units.
for (unsigned Is = 1, Ie = MCLineTableSymbols.size(); Is < Ie; Is++)
EmitCU(MCOS, Is);
// Now delete the MCLineSections that were created in MCLineEntry::Make()
// and used to emit the line table.
const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
MCOS->getContext().getMCLineSections();
for (DenseMap<const MCSection *, MCLineSection *>::const_iterator it =
MCLineSections.begin(), ie = MCLineSections.end(); it != ie;
++it)
delete it->second;
return LineStartSym;
}
const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) {
MCContext &context = MCOS->getContext();
// Create a symbol at the beginning of the line table.
MCSymbol *LineStartSym = MCOS->getContext().getMCLineTableSymbol(CUID);
if (!LineStartSym)
LineStartSym = context.CreateTempSymbol();
// Set the value of the symbol, as we are at the start of the line table.
MCOS->EmitLabel(LineStartSym);
// Create a symbol for the end of the section (to be set when we get there).
MCSymbol *LineEndSym = context.CreateTempSymbol();
// The first 4 bytes is the total length of the information for this
// compilation unit (not including these 4 bytes for the length).
MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym,4),
4);
// Next 2 bytes is the Version, which is Dwarf 2.
MCOS->EmitIntValue(2, 2);
// Create a symbol for the end of the prologue (to be set when we get there).
MCSymbol *ProEndSym = context.CreateTempSymbol(); // Lprologue_end
// Length of the prologue, is the next 4 bytes. Which is the start of the
// section to the end of the prologue. Not including the 4 bytes for the
// total length, the 2 bytes for the version, and these 4 bytes for the
// length of the prologue.
MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym,
(4 + 2 + 4)), 4);
// Parameters of the state machine, are next.
MCOS->EmitIntValue(context.getAsmInfo()->getMinInstAlignment(), 1);
MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1);
MCOS->EmitIntValue(DWARF2_LINE_BASE, 1);
MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1);
MCOS->EmitIntValue(DWARF2_LINE_OPCODE_BASE, 1);
// Standard opcode lengths
MCOS->EmitIntValue(0, 1); // length of DW_LNS_copy
MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_pc
MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_line
MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_file
MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_column
MCOS->EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
MCOS->EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
MCOS->EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
MCOS->EmitIntValue(1, 1); // DW_LNS_set_isa
// Put out the directory and file tables.
// First the directory table.
const SmallVectorImpl<StringRef> &MCDwarfDirs =
context.getMCDwarfDirs(CUID);
for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName
MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string
}
MCOS->EmitIntValue(0, 1); // Terminate the directory list
// Second the file table.
const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
MCOS->getContext().getMCDwarfFiles(CUID);
for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
MCOS->EmitBytes(MCDwarfFiles[i]->getName()); // FileName
MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string
// the Directory num
MCOS->EmitULEB128IntValue(MCDwarfFiles[i]->getDirIndex());
MCOS->EmitIntValue(0, 1); // last modification timestamp (always 0)
MCOS->EmitIntValue(0, 1); // filesize (always 0)
}
MCOS->EmitIntValue(0, 1); // Terminate the file list
// This is the end of the prologue, so set the value of the symbol at the
// end of the prologue (that was used in a previous expression).
MCOS->EmitLabel(ProEndSym);
// Put out the line tables.
const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
MCOS->getContext().getMCLineSections();
const std::vector<const MCSection *> &MCLineSectionOrder =
MCOS->getContext().getMCLineSectionOrder();
for (std::vector<const MCSection*>::const_iterator it =
MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
++it) {
const MCSection *Sec = *it;
const MCLineSection *Line = MCLineSections.lookup(Sec);
EmitDwarfLineTable(MCOS, Sec, Line, CUID);
}
if (MCOS->getContext().getAsmInfo()->getLinkerRequiresNonEmptyDwarfLines()
&& MCLineSectionOrder.begin() == MCLineSectionOrder.end()) {
// The darwin9 linker has a bug (see PR8715). For for 32-bit architectures
// it requires:
// total_length >= prologue_length + 10
// We are 4 bytes short, since we have total_length = 51 and
// prologue_length = 45
// The regular end_sequence should be sufficient.
MCDwarfLineAddr::Emit(MCOS, INT64_MAX, 0);
}
// This is the end of the section, so set the value of the symbol at the end
// of this section (that was used in a previous expression).
MCOS->EmitLabel(LineEndSym);
return LineStartSym;
}
/// Utility function to emit the encoding to a streamer.
void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta,
uint64_t AddrDelta) {
MCContext &Context = MCOS->getContext();
SmallString<256> Tmp;
raw_svector_ostream OS(Tmp);
MCDwarfLineAddr::Encode(Context, LineDelta, AddrDelta, OS);
MCOS->EmitBytes(OS.str());
}
/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
uint64_t AddrDelta, raw_ostream &OS) {
uint64_t Temp, Opcode;
bool NeedCopy = false;
// Scale the address delta by the minimum instruction length.
AddrDelta = ScaleAddrDelta(Context, AddrDelta);
// A LineDelta of INT64_MAX is a signal that this is actually a
// DW_LNE_end_sequence. We cannot use special opcodes here, since we want the
// end_sequence to emit the matrix entry.
if (LineDelta == INT64_MAX) {
if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
OS << char(dwarf::DW_LNS_const_add_pc);
else {
OS << char(dwarf::DW_LNS_advance_pc);
encodeULEB128(AddrDelta, OS);
}
OS << char(dwarf::DW_LNS_extended_op);
OS << char(1);
OS << char(dwarf::DW_LNE_end_sequence);
return;
}
// Bias the line delta by the base.
Temp = LineDelta - DWARF2_LINE_BASE;
// If the line increment is out of range of a special opcode, we must encode
// it with DW_LNS_advance_line.
if (Temp >= DWARF2_LINE_RANGE) {
OS << char(dwarf::DW_LNS_advance_line);
encodeSLEB128(LineDelta, OS);
LineDelta = 0;
Temp = 0 - DWARF2_LINE_BASE;
NeedCopy = true;
}
// Use DW_LNS_copy instead of a "line +0, addr +0" special opcode.
if (LineDelta == 0 && AddrDelta == 0) {
OS << char(dwarf::DW_LNS_copy);
return;
}
// Bias the opcode by the special opcode base.
Temp += DWARF2_LINE_OPCODE_BASE;
// Avoid overflow when addr_delta is large.
if (AddrDelta < 256 + MAX_SPECIAL_ADDR_DELTA) {
// Try using a special opcode.
Opcode = Temp + AddrDelta * DWARF2_LINE_RANGE;
if (Opcode <= 255) {
OS << char(Opcode);
return;
}
// Try using DW_LNS_const_add_pc followed by special op.
Opcode = Temp + (AddrDelta - MAX_SPECIAL_ADDR_DELTA) * DWARF2_LINE_RANGE;
if (Opcode <= 255) {
OS << char(dwarf::DW_LNS_const_add_pc);
OS << char(Opcode);
return;
}
}
// Otherwise use DW_LNS_advance_pc.
OS << char(dwarf::DW_LNS_advance_pc);
encodeULEB128(AddrDelta, OS);
if (NeedCopy)
OS << char(dwarf::DW_LNS_copy);
else
OS << char(Temp);
}
void MCDwarfFile::print(raw_ostream &OS) const {
OS << '"' << getName() << '"';
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MCDwarfFile::dump() const {
print(dbgs());
}
#endif
// Utility function to write a tuple for .debug_abbrev.
static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) {
MCOS->EmitULEB128IntValue(Name);
MCOS->EmitULEB128IntValue(Form);
}
// When generating dwarf for assembly source files this emits
// the data for .debug_abbrev section which contains three DIEs.
static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
MCContext &context = MCOS->getContext();
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
// DW_TAG_compile_unit DIE abbrev (1).
MCOS->EmitULEB128IntValue(1);
MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
if (!context.getCompilationDir().empty())
EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string);
StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
if (!DwarfDebugFlags.empty())
EmitAbbrev(MCOS, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string);
EmitAbbrev(MCOS, dwarf::DW_AT_producer, dwarf::DW_FORM_string);
EmitAbbrev(MCOS, dwarf::DW_AT_language, dwarf::DW_FORM_data2);
EmitAbbrev(MCOS, 0, 0);
// DW_TAG_label DIE abbrev (2).
MCOS->EmitULEB128IntValue(2);
MCOS->EmitULEB128IntValue(dwarf::DW_TAG_label);
MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
EmitAbbrev(MCOS, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data4);
EmitAbbrev(MCOS, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data4);
EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
EmitAbbrev(MCOS, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag);
EmitAbbrev(MCOS, 0, 0);
// DW_TAG_unspecified_parameters DIE abbrev (3).
MCOS->EmitULEB128IntValue(3);
MCOS->EmitULEB128IntValue(dwarf::DW_TAG_unspecified_parameters);
MCOS->EmitIntValue(dwarf::DW_CHILDREN_no, 1);
EmitAbbrev(MCOS, 0, 0);
// Terminate the abbreviations for this compilation unit.
MCOS->EmitIntValue(0, 1);
}
// When generating dwarf for assembly source files this emits the data for
// .debug_aranges section. Which contains a header and a table of pairs of
// PointerSize'ed values for the address and size of section(s) with line table
// entries (just the default .text in our case) and a terminating pair of zeros.
static void EmitGenDwarfAranges(MCStreamer *MCOS,
const MCSymbol *InfoSectionSymbol) {
MCContext &context = MCOS->getContext();
// Create a symbol at the end of the section that we are creating the dwarf
// debugging info to use later in here as part of the expression to calculate
// the size of the section for the table.
MCOS->SwitchSection(context.getGenDwarfSection());
MCSymbol *SectionEndSym = context.CreateTempSymbol();
MCOS->EmitLabel(SectionEndSym);
context.setGenDwarfSectionEndSym(SectionEndSym);
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
// This will be the length of the .debug_aranges section, first account for
// the size of each item in the header (see below where we emit these items).
int Length = 4 + 2 + 4 + 1 + 1;
// Figure the padding after the header before the table of address and size
// pairs who's values are PointerSize'ed.
const MCAsmInfo *asmInfo = context.getAsmInfo();
int AddrSize = asmInfo->getPointerSize();
int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1));
if (Pad == 2 * AddrSize)
Pad = 0;
Length += Pad;
// Add the size of the pair of PointerSize'ed values for the address and size
// of the one default .text section we have in the table.
Length += 2 * AddrSize;
// And the pair of terminating zeros.
Length += 2 * AddrSize;
// Emit the header for this section.
// The 4 byte length not including the 4 byte value for the length.
MCOS->EmitIntValue(Length - 4, 4);
// The 2 byte version, which is 2.
MCOS->EmitIntValue(2, 2);
// The 4 byte offset to the compile unit in the .debug_info from the start
// of the .debug_info.
if (InfoSectionSymbol)
MCOS->EmitSymbolValue(InfoSectionSymbol, 4);
else
MCOS->EmitIntValue(0, 4);
// The 1 byte size of an address.
MCOS->EmitIntValue(AddrSize, 1);
// The 1 byte size of a segment descriptor, we use a value of zero.
MCOS->EmitIntValue(0, 1);
// Align the header with the padding if needed, before we put out the table.
for(int i = 0; i < Pad; i++)
MCOS->EmitIntValue(0, 1);
// Now emit the table of pairs of PointerSize'ed values for the section(s)
// address and size, in our case just the one default .text section.
const MCExpr *Addr = MCSymbolRefExpr::Create(
context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
*context.getGenDwarfSectionStartSym(), *SectionEndSym, 0);
MCOS->EmitAbsValue(Addr, AddrSize);
MCOS->EmitAbsValue(Size, AddrSize);
// And finally the pair of terminating zeros.
MCOS->EmitIntValue(0, AddrSize);
MCOS->EmitIntValue(0, AddrSize);
}
// When generating dwarf for assembly source files this emits the data for
// .debug_info section which contains three parts. The header, the compile_unit
// DIE and a list of label DIEs.
static void EmitGenDwarfInfo(MCStreamer *MCOS,
const MCSymbol *AbbrevSectionSymbol,
const MCSymbol *LineSectionSymbol) {
MCContext &context = MCOS->getContext();
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
// Create a symbol at the start and end of this section used in here for the
// expression to calculate the length in the header.
MCSymbol *InfoStart = context.CreateTempSymbol();
MCOS->EmitLabel(InfoStart);
MCSymbol *InfoEnd = context.CreateTempSymbol();
// First part: the header.
// The 4 byte total length of the information for this compilation unit, not
// including these 4 bytes.
const MCExpr *Length = MakeStartMinusEndExpr(*MCOS, *InfoStart, *InfoEnd, 4);
MCOS->EmitAbsValue(Length, 4);
// The 2 byte DWARF version, which is 2.
MCOS->EmitIntValue(2, 2);
// The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev,
// it is at the start of that section so this is zero.
if (AbbrevSectionSymbol) {
MCOS->EmitSymbolValue(AbbrevSectionSymbol, 4);
} else {
MCOS->EmitIntValue(0, 4);
}
const MCAsmInfo *asmInfo = context.getAsmInfo();
int AddrSize = asmInfo->getPointerSize();
// The 1 byte size of an address.
MCOS->EmitIntValue(AddrSize, 1);
// Second part: the compile_unit DIE.
// The DW_TAG_compile_unit DIE abbrev (1).
MCOS->EmitULEB128IntValue(1);
// DW_AT_stmt_list, a 4 byte offset from the start of the .debug_line section,
// which is at the start of that section so this is zero.
if (LineSectionSymbol) {
MCOS->EmitSymbolValue(LineSectionSymbol, 4);
} else {
MCOS->EmitIntValue(0, 4);
}
// AT_low_pc, the first address of the default .text section.
const MCExpr *Start = MCSymbolRefExpr::Create(
context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
MCOS->EmitAbsValue(Start, AddrSize);
// AT_high_pc, the last address of the default .text section.
const MCExpr *End = MCSymbolRefExpr::Create(
context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context);
MCOS->EmitAbsValue(End, AddrSize);
// AT_name, the name of the source file. Reconstruct from the first directory
// and file table entries.
const SmallVectorImpl<StringRef> &MCDwarfDirs =
context.getMCDwarfDirs();
if (MCDwarfDirs.size() > 0) {
MCOS->EmitBytes(MCDwarfDirs[0]);
MCOS->EmitBytes("/");
}
const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
MCOS->getContext().getMCDwarfFiles();
MCOS->EmitBytes(MCDwarfFiles[1]->getName());
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
// AT_comp_dir, the working directory the assembly was done in.
if (!context.getCompilationDir().empty()) {
MCOS->EmitBytes(context.getCompilationDir());
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
}
// AT_APPLE_flags, the command line arguments of the assembler tool.
StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
if (!DwarfDebugFlags.empty()){
MCOS->EmitBytes(DwarfDebugFlags);
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
}
// AT_producer, the version of the assembler tool.
StringRef DwarfDebugProducer = context.getDwarfDebugProducer();
if (!DwarfDebugProducer.empty()){
MCOS->EmitBytes(DwarfDebugProducer);
}
else {
MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "));
MCOS->EmitBytes(StringRef(PACKAGE_VERSION));
MCOS->EmitBytes(StringRef(")"));
}
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
// AT_language, a 4 byte value. We use DW_LANG_Mips_Assembler as the dwarf2
// draft has no standard code for assembler.
MCOS->EmitIntValue(dwarf::DW_LANG_Mips_Assembler, 2);
// Third part: the list of label DIEs.
// Loop on saved info for dwarf labels and create the DIEs for them.
const std::vector<const MCGenDwarfLabelEntry *> &Entries =
MCOS->getContext().getMCGenDwarfLabelEntries();
for (std::vector<const MCGenDwarfLabelEntry *>::const_iterator it =
Entries.begin(), ie = Entries.end(); it != ie;
++it) {
const MCGenDwarfLabelEntry *Entry = *it;
// The DW_TAG_label DIE abbrev (2).
MCOS->EmitULEB128IntValue(2);
// AT_name, of the label without any leading underbar.
MCOS->EmitBytes(Entry->getName());
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
// AT_decl_file, index into the file table.
MCOS->EmitIntValue(Entry->getFileNumber(), 4);
// AT_decl_line, source line number.
MCOS->EmitIntValue(Entry->getLineNumber(), 4);
// AT_low_pc, start address of the label.
const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry->getLabel(),
MCSymbolRefExpr::VK_None, context);
MCOS->EmitAbsValue(AT_low_pc, AddrSize);
// DW_AT_prototyped, a one byte flag value of 0 saying we have no prototype.
MCOS->EmitIntValue(0, 1);
// The DW_TAG_unspecified_parameters DIE abbrev (3).
MCOS->EmitULEB128IntValue(3);
// Add the NULL DIE terminating the DW_TAG_unspecified_parameters DIE's.
MCOS->EmitIntValue(0, 1);
}
// Deallocate the MCGenDwarfLabelEntry classes that saved away the info
// for the dwarf labels.
for (std::vector<const MCGenDwarfLabelEntry *>::const_iterator it =
Entries.begin(), ie = Entries.end(); it != ie;
++it) {
const MCGenDwarfLabelEntry *Entry = *it;
delete Entry;
}
// Add the NULL DIE terminating the Compile Unit DIE's.
MCOS->EmitIntValue(0, 1);
// Now set the value of the symbol at the end of the info section.
MCOS->EmitLabel(InfoEnd);
}
//
// When generating dwarf for assembly source files this emits the Dwarf
// sections.
//
void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) {
// Create the dwarf sections in this order (.debug_line already created).
MCContext &context = MCOS->getContext();
const MCAsmInfo *AsmInfo = context.getAsmInfo();
bool CreateDwarfSectionSymbols =
AsmInfo->doesDwarfUseRelocationsAcrossSections();
if (!CreateDwarfSectionSymbols)
LineSectionSymbol = NULL;
MCSymbol *AbbrevSectionSymbol = NULL;
MCSymbol *InfoSectionSymbol = NULL;
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
if (CreateDwarfSectionSymbols) {
InfoSectionSymbol = context.CreateTempSymbol();
MCOS->EmitLabel(InfoSectionSymbol);
}
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
if (CreateDwarfSectionSymbols) {
AbbrevSectionSymbol = context.CreateTempSymbol();
MCOS->EmitLabel(AbbrevSectionSymbol);
}
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
// If there are no line table entries then do not emit any section contents.
if (context.getMCLineSections().empty())
return;
// Output the data for .debug_aranges section.
EmitGenDwarfAranges(MCOS, InfoSectionSymbol);
// Output the data for .debug_abbrev section.
EmitGenDwarfAbbrev(MCOS);
// Output the data for .debug_info section.
EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol);
}
//
// When generating dwarf for assembly source files this is called when symbol
// for a label is created. If this symbol is not a temporary and is in the
// section that dwarf is being generated for, save the needed info to create
// a dwarf label.
//
void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
SourceMgr &SrcMgr, SMLoc &Loc) {
// We won't create dwarf labels for temporary symbols or symbols not in
// the default text.
if (Symbol->isTemporary())
return;
MCContext &context = MCOS->getContext();
if (context.getGenDwarfSection() != MCOS->getCurrentSection().first)
return;
// The dwarf label's name does not have the symbol name's leading
// underbar if any.
StringRef Name = Symbol->getName();
if (Name.startswith("_"))
Name = Name.substr(1, Name.size()-1);
// Get the dwarf file number to be used for the dwarf label.
unsigned FileNumber = context.getGenDwarfFileNumber();
// Finding the line number is the expensive part which is why we just don't
// pass it in as for some symbols we won't create a dwarf label.
int CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
unsigned LineNumber = SrcMgr.FindLineNumber(Loc, CurBuffer);
// We create a temporary symbol for use for the AT_high_pc and AT_low_pc
// values so that they don't have things like an ARM thumb bit from the
// original symbol. So when used they won't get a low bit set after
// relocation.
MCSymbol *Label = context.CreateTempSymbol();
MCOS->EmitLabel(Label);
// Create and entry for the info and add it to the other entries.
MCGenDwarfLabelEntry *Entry =
new MCGenDwarfLabelEntry(Name, FileNumber, LineNumber, Label);
MCOS->getContext().addMCGenDwarfLabelEntry(Entry);
}
static int getDataAlignmentFactor(MCStreamer &streamer) {
MCContext &context = streamer.getContext();
const MCAsmInfo *asmInfo = context.getAsmInfo();
int size = asmInfo->getCalleeSaveStackSlotSize();
if (asmInfo->isStackGrowthDirectionUp())
return size;
else
return -size;
}
static unsigned getSizeForEncoding(MCStreamer &streamer,
unsigned symbolEncoding) {
MCContext &context = streamer.getContext();
unsigned format = symbolEncoding & 0x0f;
switch (format) {
default: llvm_unreachable("Unknown Encoding");
case dwarf::DW_EH_PE_absptr:
case dwarf::DW_EH_PE_signed:
return context.getAsmInfo()->getPointerSize();
case dwarf::DW_EH_PE_udata2:
case dwarf::DW_EH_PE_sdata2:
return 2;
case dwarf::DW_EH_PE_udata4:
case dwarf::DW_EH_PE_sdata4:
return 4;
case dwarf::DW_EH_PE_udata8:
case dwarf::DW_EH_PE_sdata8:
return 8;
}
}
-static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
- unsigned symbolEncoding, const char *comment = 0) {
+static void EmitFDESymbol(MCStreamer &streamer, const MCSymbol &symbol,
+ unsigned symbolEncoding, bool isEH,
+ const char *comment = 0) {
MCContext &context = streamer.getContext();
const MCAsmInfo *asmInfo = context.getAsmInfo();
const MCExpr *v = asmInfo->getExprForFDESymbol(&symbol,
symbolEncoding,
streamer);
unsigned size = getSizeForEncoding(streamer, symbolEncoding);
if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment);
- streamer.EmitAbsValue(v, size);
+ if (asmInfo->doDwarfFDESymbolsUseAbsDiff() && isEH)
+ streamer.EmitAbsValue(v, size);
+ else
+ streamer.EmitValue(v, size);
}
static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
unsigned symbolEncoding) {
MCContext &context = streamer.getContext();
const MCAsmInfo *asmInfo = context.getAsmInfo();
const MCExpr *v = asmInfo->getExprForPersonalitySymbol(&symbol,
symbolEncoding,
streamer);
unsigned size = getSizeForEncoding(streamer, symbolEncoding);
streamer.EmitValue(v, size);
}
namespace {
class FrameEmitterImpl {
int CFAOffset;
int CIENum;
bool UsingCFI;
bool IsEH;
const MCSymbol *SectionStart;
public:
FrameEmitterImpl(bool usingCFI, bool isEH)
: CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH),
SectionStart(0) {}
void setSectionStart(const MCSymbol *Label) { SectionStart = Label; }
/// EmitCompactUnwind - Emit the unwind information in a compact way.
void EmitCompactUnwind(MCStreamer &streamer,
const MCDwarfFrameInfo &frame);
const MCSymbol &EmitCIE(MCStreamer &streamer,
const MCSymbol *personality,
unsigned personalityEncoding,
const MCSymbol *lsda,
bool IsSignalFrame,
unsigned lsdaEncoding);
MCSymbol *EmitFDE(MCStreamer &streamer,
const MCSymbol &cieStart,
const MCDwarfFrameInfo &frame);
void EmitCFIInstructions(MCStreamer &streamer,
ArrayRef<MCCFIInstruction> Instrs,
MCSymbol *BaseLabel);
void EmitCFIInstruction(MCStreamer &Streamer,
const MCCFIInstruction &Instr);
};
} // end anonymous namespace
static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding,
StringRef Prefix) {
if (Streamer.isVerboseAsm()) {
const char *EncStr;
switch (Encoding) {
default: EncStr = "<unknown encoding>"; break;
case dwarf::DW_EH_PE_absptr: EncStr = "absptr"; break;
case dwarf::DW_EH_PE_omit: EncStr = "omit"; break;
case dwarf::DW_EH_PE_pcrel: EncStr = "pcrel"; break;
case dwarf::DW_EH_PE_udata4: EncStr = "udata4"; break;
case dwarf::DW_EH_PE_udata8: EncStr = "udata8"; break;
case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4"; break;
case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8"; break;
case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4:
EncStr = "pcrel udata4";
break;
case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4:
EncStr = "pcrel sdata4";
break;
case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8:
EncStr = "pcrel udata8";
break;
case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8:
EncStr = "screl sdata8";
break;
case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4:
EncStr = "indirect pcrel udata4";
break;
case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4:
EncStr = "indirect pcrel sdata4";
break;
case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8:
EncStr = "indirect pcrel udata8";
break;
case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8:
EncStr = "indirect pcrel sdata8";
break;
}
Streamer.AddComment(Twine(Prefix) + " = " + EncStr);
}
Streamer.EmitIntValue(Encoding, 1);
}
void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
const MCCFIInstruction &Instr) {
int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
bool VerboseAsm = Streamer.isVerboseAsm();
switch (Instr.getOperation()) {
case MCCFIInstruction::OpRegister: {
unsigned Reg1 = Instr.getRegister();
unsigned Reg2 = Instr.getRegister2();
if (VerboseAsm) {
Streamer.AddComment("DW_CFA_register");
Streamer.AddComment(Twine("Reg1 ") + Twine(Reg1));
Streamer.AddComment(Twine("Reg2 ") + Twine(Reg2));
}
Streamer.EmitIntValue(dwarf::DW_CFA_register, 1);
Streamer.EmitULEB128IntValue(Reg1);
Streamer.EmitULEB128IntValue(Reg2);
return;
}
case MCCFIInstruction::OpWindowSave: {
Streamer.EmitIntValue(dwarf::DW_CFA_GNU_window_save, 1);
return;
}
case MCCFIInstruction::OpUndefined: {
unsigned Reg = Instr.getRegister();
if (VerboseAsm) {
Streamer.AddComment("DW_CFA_undefined");
Streamer.AddComment(Twine("Reg ") + Twine(Reg));
}
Streamer.EmitIntValue(dwarf::DW_CFA_undefined, 1);
Streamer.EmitULEB128IntValue(Reg);
return;
}
case MCCFIInstruction::OpAdjustCfaOffset:
case MCCFIInstruction::OpDefCfaOffset: {
const bool IsRelative =
Instr.getOperation() == MCCFIInstruction::OpAdjustCfaOffset;
if (VerboseAsm)
Streamer.AddComment("DW_CFA_def_cfa_offset");
Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
if (IsRelative)
CFAOffset += Instr.getOffset();
else
CFAOffset = -Instr.getOffset();
if (VerboseAsm)
Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
Streamer.EmitULEB128IntValue(CFAOffset);
return;
}
case MCCFIInstruction::OpDefCfa: {
if (VerboseAsm)
Streamer.AddComment("DW_CFA_def_cfa");
Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
if (VerboseAsm)
Streamer.AddComment(Twine("Reg ") + Twine(Instr.getRegister()));
Streamer.EmitULEB128IntValue(Instr.getRegister());
CFAOffset = -Instr.getOffset();
if (VerboseAsm)
Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
Streamer.EmitULEB128IntValue(CFAOffset);
return;
}
case MCCFIInstruction::OpDefCfaRegister: {
if (VerboseAsm)
Streamer.AddComment("DW_CFA_def_cfa_register");
Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
if (VerboseAsm)
Streamer.AddComment(Twine("Reg ") + Twine(Instr.getRegister()));
Streamer.EmitULEB128IntValue(Instr.getRegister());
return;
}
case MCCFIInstruction::OpOffset:
case MCCFIInstruction::OpRelOffset: {
const bool IsRelative =
Instr.getOperation() == MCCFIInstruction::OpRelOffset;
unsigned Reg = Instr.getRegister();
int Offset = Instr.getOffset();
if (IsRelative)
Offset -= CFAOffset;
Offset = Offset / dataAlignmentFactor;
if (Offset < 0) {
if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended_sf");
Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
Streamer.EmitULEB128IntValue(Reg);
if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
Streamer.EmitSLEB128IntValue(Offset);
} else if (Reg < 64) {
if (VerboseAsm) Streamer.AddComment(Twine("DW_CFA_offset + Reg(") +
Twine(Reg) + ")");
Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
Streamer.EmitULEB128IntValue(Offset);
} else {
if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended");
Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
Streamer.EmitULEB128IntValue(Reg);
if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
Streamer.EmitULEB128IntValue(Offset);
}
return;
}
case MCCFIInstruction::OpRememberState:
if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
return;
case MCCFIInstruction::OpRestoreState:
if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
return;
case MCCFIInstruction::OpSameValue: {
unsigned Reg = Instr.getRegister();
if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value");
Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1);
if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
Streamer.EmitULEB128IntValue(Reg);
return;
}
case MCCFIInstruction::OpRestore: {
unsigned Reg = Instr.getRegister();
if (VerboseAsm) {
Streamer.AddComment("DW_CFA_restore");
Streamer.AddComment(Twine("Reg ") + Twine(Reg));
}
Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
return;
}
case MCCFIInstruction::OpEscape:
if (VerboseAsm) Streamer.AddComment("Escape bytes");
Streamer.EmitBytes(Instr.getValues());
return;
}
llvm_unreachable("Unhandled case in switch");
}
/// EmitFrameMoves - Emit frame instructions to describe the layout of the
/// frame.
void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
ArrayRef<MCCFIInstruction> Instrs,
MCSymbol *BaseLabel) {
for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
const MCCFIInstruction &Instr = Instrs[i];
MCSymbol *Label = Instr.getLabel();
// Throw out move if the label is invalid.
if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
// Advance row if new location.
if (BaseLabel && Label) {
MCSymbol *ThisSym = Label;
if (ThisSym != BaseLabel) {
if (streamer.isVerboseAsm()) streamer.AddComment("DW_CFA_advance_loc4");
streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
BaseLabel = ThisSym;
}
}
EmitCFIInstruction(streamer, Instr);
}
}
/// EmitCompactUnwind - Emit the unwind information in a compact way.
void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
const MCDwarfFrameInfo &Frame) {
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
bool VerboseAsm = Streamer.isVerboseAsm();
// range-start range-length compact-unwind-enc personality-func lsda
// _foo LfooEnd-_foo 0x00000023 0 0
// _bar LbarEnd-_bar 0x00000025 __gxx_personality except_tab1
//
// .section __LD,__compact_unwind,regular,debug
//
// # compact unwind for _foo
// .quad _foo
// .set L1,LfooEnd-_foo
// .long L1
// .long 0x01010001
// .quad 0
// .quad 0
//
// # compact unwind for _bar
// .quad _bar
// .set L2,LbarEnd-_bar
// .long L2
// .long 0x01020011
// .quad __gxx_personality
// .quad except_tab1
uint32_t Encoding = Frame.CompactUnwindEncoding;
if (!Encoding) return;
bool DwarfEHFrameOnly = (Encoding == MOFI->getCompactUnwindDwarfEHFrameOnly());
// The encoding needs to know we have an LSDA.
if (!DwarfEHFrameOnly && Frame.Lsda)
Encoding |= 0x40000000;
// Range Start
unsigned FDEEncoding = MOFI->getFDEEncoding(UsingCFI);
unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
if (VerboseAsm) Streamer.AddComment("Range Start");
Streamer.EmitSymbolValue(Frame.Function, Size);
// Range Length
const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin,
*Frame.End, 0);
if (VerboseAsm) Streamer.AddComment("Range Length");
Streamer.EmitAbsValue(Range, 4);
// Compact Encoding
Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4);
if (VerboseAsm) Streamer.AddComment("Compact Unwind Encoding: 0x" +
Twine::utohexstr(Encoding));
Streamer.EmitIntValue(Encoding, Size);
// Personality Function
Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
if (VerboseAsm) Streamer.AddComment("Personality Function");
if (!DwarfEHFrameOnly && Frame.Personality)
Streamer.EmitSymbolValue(Frame.Personality, Size);
else
Streamer.EmitIntValue(0, Size); // No personality fn
// LSDA
Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
if (VerboseAsm) Streamer.AddComment("LSDA");
if (!DwarfEHFrameOnly && Frame.Lsda)
Streamer.EmitSymbolValue(Frame.Lsda, Size);
else
Streamer.EmitIntValue(0, Size); // No LSDA
}
const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
const MCSymbol *personality,
unsigned personalityEncoding,
const MCSymbol *lsda,
bool IsSignalFrame,
unsigned lsdaEncoding) {
MCContext &context = streamer.getContext();
const MCRegisterInfo *MRI = context.getRegisterInfo();
const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
bool verboseAsm = streamer.isVerboseAsm();
MCSymbol *sectionStart;
if (MOFI->isFunctionEHFrameSymbolPrivate() || !IsEH)
sectionStart = context.CreateTempSymbol();
else
sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum));
streamer.EmitLabel(sectionStart);
CIENum++;
MCSymbol *sectionEnd = context.CreateTempSymbol();
// Length
const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
*sectionEnd, 4);
if (verboseAsm) streamer.AddComment("CIE Length");
streamer.EmitAbsValue(Length, 4);
// CIE ID
unsigned CIE_ID = IsEH ? 0 : -1;
if (verboseAsm) streamer.AddComment("CIE ID Tag");
streamer.EmitIntValue(CIE_ID, 4);
// Version
if (verboseAsm) streamer.AddComment("DW_CIE_VERSION");
streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
// Augmentation String
SmallString<8> Augmentation;
if (IsEH) {
if (verboseAsm) streamer.AddComment("CIE Augmentation");
Augmentation += "z";
if (personality)
Augmentation += "P";
if (lsda)
Augmentation += "L";
Augmentation += "R";
if (IsSignalFrame)
Augmentation += "S";
streamer.EmitBytes(Augmentation.str());
}
streamer.EmitIntValue(0, 1);
// Code Alignment Factor
if (verboseAsm) streamer.AddComment("CIE Code Alignment Factor");
streamer.EmitULEB128IntValue(context.getAsmInfo()->getMinInstAlignment());
// Data Alignment Factor
if (verboseAsm) streamer.AddComment("CIE Data Alignment Factor");
streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
// Return Address Register
if (verboseAsm) streamer.AddComment("CIE Return Address Column");
streamer.EmitULEB128IntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true));
// Augmentation Data Length (optional)
unsigned augmentationLength = 0;
if (IsEH) {
if (personality) {
// Personality Encoding
augmentationLength += 1;
// Personality
augmentationLength += getSizeForEncoding(streamer, personalityEncoding);
}
if (lsda)
augmentationLength += 1;
// Encoding of the FDE pointers
augmentationLength += 1;
if (verboseAsm) streamer.AddComment("Augmentation Size");
streamer.EmitULEB128IntValue(augmentationLength);
// Augmentation Data (optional)
if (personality) {
// Personality Encoding
EmitEncodingByte(streamer, personalityEncoding,
"Personality Encoding");
// Personality
if (verboseAsm) streamer.AddComment("Personality");
EmitPersonality(streamer, *personality, personalityEncoding);
}
if (lsda)
EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding");
// Encoding of the FDE pointers
EmitEncodingByte(streamer, MOFI->getFDEEncoding(UsingCFI),
"FDE Encoding");
}
// Initial Instructions
const MCAsmInfo *MAI = context.getAsmInfo();
const std::vector<MCCFIInstruction> &Instructions =
MAI->getInitialFrameState();
EmitCFIInstructions(streamer, Instructions, NULL);
// Padding
streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize());
streamer.EmitLabel(sectionEnd);
return *sectionStart;
}
MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
const MCSymbol &cieStart,
const MCDwarfFrameInfo &frame) {
MCContext &context = streamer.getContext();
MCSymbol *fdeStart = context.CreateTempSymbol();
MCSymbol *fdeEnd = context.CreateTempSymbol();
const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
bool verboseAsm = streamer.isVerboseAsm();
if (IsEH && frame.Function && !MOFI->isFunctionEHFrameSymbolPrivate()) {
MCSymbol *EHSym =
context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh"));
streamer.EmitEHSymAttributes(frame.Function, EHSym);
streamer.EmitLabel(EHSym);
}
// Length
const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
if (verboseAsm) streamer.AddComment("FDE Length");
streamer.EmitAbsValue(Length, 4);
streamer.EmitLabel(fdeStart);
// CIE Pointer
const MCAsmInfo *asmInfo = context.getAsmInfo();
if (IsEH) {
const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
0);
if (verboseAsm) streamer.AddComment("FDE CIE Offset");
streamer.EmitAbsValue(offset, 4);
} else if (!asmInfo->doesDwarfUseRelocationsAcrossSections()) {
const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart,
cieStart, 0);
streamer.EmitAbsValue(offset, 4);
} else {
streamer.EmitSymbolValue(&cieStart, 4);
}
// PC Begin
unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI)
: (unsigned)dwarf::DW_EH_PE_absptr;
unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
- EmitSymbol(streamer, *frame.Begin, PCEncoding, "FDE initial location");
+ EmitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH, "FDE initial location");
// PC Range
const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
*frame.End, 0);
if (verboseAsm) streamer.AddComment("FDE address range");
streamer.EmitAbsValue(Range, PCSize);
if (IsEH) {
// Augmentation Data Length
unsigned augmentationLength = 0;
if (frame.Lsda)
augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding);
if (verboseAsm) streamer.AddComment("Augmentation size");
streamer.EmitULEB128IntValue(augmentationLength);
// Augmentation Data
if (frame.Lsda)
- EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding,
- "Language Specific Data Area");
+ EmitFDESymbol(streamer, *frame.Lsda, frame.LsdaEncoding, true,
+ "Language Specific Data Area");
}
// Call Frame Instructions
EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
// Padding
streamer.EmitValueToAlignment(PCSize);
return fdeEnd;
}
namespace {
struct CIEKey {
static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1, false); }
static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0, false); }
CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
unsigned LsdaEncoding_, bool IsSignalFrame_) :
Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_) {
}
const MCSymbol* Personality;
unsigned PersonalityEncoding;
unsigned LsdaEncoding;
bool IsSignalFrame;
};
}
namespace llvm {
template <>
struct DenseMapInfo<CIEKey> {
static CIEKey getEmptyKey() {
return CIEKey::getEmptyKey();
}
static CIEKey getTombstoneKey() {
return CIEKey::getTombstoneKey();
}
static unsigned getHashValue(const CIEKey &Key) {
return static_cast<unsigned>(hash_combine(Key.Personality,
Key.PersonalityEncoding,
Key.LsdaEncoding,
Key.IsSignalFrame));
}
static bool isEqual(const CIEKey &LHS,
const CIEKey &RHS) {
return LHS.Personality == RHS.Personality &&
LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
LHS.LsdaEncoding == RHS.LsdaEncoding &&
LHS.IsSignalFrame == RHS.IsSignalFrame;
}
};
}
void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB,
bool UsingCFI, bool IsEH) {
Streamer.generateCompactUnwindEncodings(MAB);
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
FrameEmitterImpl Emitter(UsingCFI, IsEH);
ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
// Emit the compact unwind info if available.
if (IsEH && MOFI->getCompactUnwindSection()) {
bool SectionEmitted = false;
for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) {
const MCDwarfFrameInfo &Frame = FrameArray[i];
if (Frame.CompactUnwindEncoding == 0) continue;
if (!SectionEmitted) {
Streamer.SwitchSection(MOFI->getCompactUnwindSection());
Streamer.EmitValueToAlignment(Context.getAsmInfo()->getPointerSize());
SectionEmitted = true;
}
Emitter.EmitCompactUnwind(Streamer, Frame);
}
}
const MCSection &Section =
IsEH ? *const_cast<MCObjectFileInfo*>(MOFI)->getEHFrameSection() :
*MOFI->getDwarfFrameSection();
Streamer.SwitchSection(&Section);
MCSymbol *SectionStart = Context.CreateTempSymbol();
Streamer.EmitLabel(SectionStart);
Emitter.setSectionStart(SectionStart);
MCSymbol *FDEEnd = NULL;
DenseMap<CIEKey, const MCSymbol*> CIEStarts;
const MCSymbol *DummyDebugKey = NULL;
for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) {
const MCDwarfFrameInfo &Frame = FrameArray[i];
CIEKey Key(Frame.Personality, Frame.PersonalityEncoding,
Frame.LsdaEncoding, Frame.IsSignalFrame);
const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
if (!CIEStart)
CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality,
Frame.PersonalityEncoding, Frame.Lsda,
Frame.IsSignalFrame,
Frame.LsdaEncoding);
FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame);
if (i != n - 1)
Streamer.EmitLabel(FDEEnd);
}
Streamer.EmitValueToAlignment(Context.getAsmInfo()->getPointerSize());
if (FDEEnd)
Streamer.EmitLabel(FDEEnd);
}
void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
uint64_t AddrDelta) {
MCContext &Context = Streamer.getContext();
SmallString<256> Tmp;
raw_svector_ostream OS(Tmp);
MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OS);
Streamer.EmitBytes(OS.str());
}
void MCDwarfFrameEmitter::EncodeAdvanceLoc(MCContext &Context,
uint64_t AddrDelta,
raw_ostream &OS) {
// Scale the address delta by the minimum instruction length.
AddrDelta = ScaleAddrDelta(Context, AddrDelta);
if (AddrDelta == 0) {
} else if (isUIntN(6, AddrDelta)) {
uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
OS << Opcode;
} else if (isUInt<8>(AddrDelta)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc1);
OS << uint8_t(AddrDelta);
} else if (isUInt<16>(AddrDelta)) {
// FIXME: check what is the correct behavior on a big endian machine.
OS << uint8_t(dwarf::DW_CFA_advance_loc2);
OS << uint8_t( AddrDelta & 0xff);
OS << uint8_t((AddrDelta >> 8) & 0xff);
} else {
// FIXME: check what is the correct behavior on a big endian machine.
assert(isUInt<32>(AddrDelta));
OS << uint8_t(dwarf::DW_CFA_advance_loc4);
OS << uint8_t( AddrDelta & 0xff);
OS << uint8_t((AddrDelta >> 8) & 0xff);
OS << uint8_t((AddrDelta >> 16) & 0xff);
OS << uint8_t((AddrDelta >> 24) & 0xff);
}
}
Index: head/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
===================================================================
--- head/contrib/llvm/lib/MC/MCParser/AsmParser.cpp (revision 265924)
+++ head/contrib/llvm/lib/MC/MCParser/AsmParser.cpp (revision 265925)
@@ -1,4320 +1,4324 @@
//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class implements the parser for assembly files.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/AsmCond.h"
#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
#include <set>
#include <string>
#include <vector>
using namespace llvm;
static cl::opt<bool>
FatalAssemblerWarnings("fatal-assembler-warnings",
cl::desc("Consider warnings as error"));
MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {}
namespace {
/// \brief Helper types for tracking macro definitions.
typedef std::vector<AsmToken> MCAsmMacroArgument;
typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
typedef std::pair<StringRef, MCAsmMacroArgument> MCAsmMacroParameter;
typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
struct MCAsmMacro {
StringRef Name;
StringRef Body;
MCAsmMacroParameters Parameters;
public:
MCAsmMacro(StringRef N, StringRef B, const MCAsmMacroParameters &P) :
Name(N), Body(B), Parameters(P) {}
MCAsmMacro(const MCAsmMacro& Other)
: Name(Other.Name), Body(Other.Body), Parameters(Other.Parameters) {}
};
/// \brief Helper class for storing information about an active macro
/// instantiation.
struct MacroInstantiation {
/// The macro being instantiated.
const MCAsmMacro *TheMacro;
/// The macro instantiation with substitutions.
MemoryBuffer *Instantiation;
/// The location of the instantiation.
SMLoc InstantiationLoc;
/// The buffer where parsing should resume upon instantiation completion.
int ExitBuffer;
/// The location where parsing should resume upon instantiation completion.
SMLoc ExitLoc;
public:
MacroInstantiation(const MCAsmMacro *M, SMLoc IL, int EB, SMLoc EL,
MemoryBuffer *I);
};
struct ParseStatementInfo {
/// \brief The parsed operands from the last parsed statement.
SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
/// \brief The opcode from the last parsed instruction.
unsigned Opcode;
/// \brief Was there an error parsing the inline assembly?
bool ParseError;
SmallVectorImpl<AsmRewrite> *AsmRewrites;
ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(0) {}
ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
: Opcode(~0), ParseError(false), AsmRewrites(rewrites) {}
~ParseStatementInfo() {
// Free any parsed operands.
for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
delete ParsedOperands[i];
ParsedOperands.clear();
}
};
/// \brief The concrete assembly parser instance.
class AsmParser : public MCAsmParser {
AsmParser(const AsmParser &) LLVM_DELETED_FUNCTION;
void operator=(const AsmParser &) LLVM_DELETED_FUNCTION;
private:
AsmLexer Lexer;
MCContext &Ctx;
MCStreamer &Out;
const MCAsmInfo &MAI;
SourceMgr &SrcMgr;
SourceMgr::DiagHandlerTy SavedDiagHandler;
void *SavedDiagContext;
MCAsmParserExtension *PlatformParser;
/// This is the current buffer index we're lexing from as managed by the
/// SourceMgr object.
int CurBuffer;
AsmCond TheCondState;
std::vector<AsmCond> TheCondStack;
/// \brief maps directive names to handler methods in parser
/// extensions. Extensions register themselves in this map by calling
/// addDirectiveHandler.
StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
/// \brief Map of currently defined macros.
StringMap<MCAsmMacro*> MacroMap;
/// \brief Stack of active macro instantiations.
std::vector<MacroInstantiation*> ActiveMacros;
/// \brief List of bodies of anonymous macros.
std::deque<MCAsmMacro> MacroLikeBodies;
/// Boolean tracking whether macro substitution is enabled.
unsigned MacrosEnabledFlag : 1;
/// Flag tracking whether any errors have been encountered.
unsigned HadError : 1;
/// The values from the last parsed cpp hash file line comment if any.
StringRef CppHashFilename;
int64_t CppHashLineNumber;
SMLoc CppHashLoc;
int CppHashBuf;
/// When generating dwarf for assembly source files we need to calculate the
/// logical line number based on the last parsed cpp hash file line comment
/// and current line. Since this is slow and messes up the SourceMgr's
/// cache we save the last info we queried with SrcMgr.FindLineNumber().
SMLoc LastQueryIDLoc;
int LastQueryBuffer;
unsigned LastQueryLine;
/// AssemblerDialect. ~OU means unset value and use value provided by MAI.
unsigned AssemblerDialect;
/// \brief is Darwin compatibility enabled?
bool IsDarwin;
/// \brief Are we parsing ms-style inline assembly?
bool ParsingInlineAsm;
public:
AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
const MCAsmInfo &MAI);
virtual ~AsmParser();
virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false);
virtual void addDirectiveHandler(StringRef Directive,
ExtensionDirectiveHandler Handler) {
ExtensionDirectiveMap[Directive] = Handler;
}
public:
/// @name MCAsmParser Interface
/// {
virtual SourceMgr &getSourceManager() { return SrcMgr; }
virtual MCAsmLexer &getLexer() { return Lexer; }
virtual MCContext &getContext() { return Ctx; }
virtual MCStreamer &getStreamer() { return Out; }
virtual unsigned getAssemblerDialect() {
if (AssemblerDialect == ~0U)
return MAI.getAssemblerDialect();
else
return AssemblerDialect;
}
virtual void setAssemblerDialect(unsigned i) {
AssemblerDialect = i;
}
virtual bool Warning(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None);
virtual bool Error(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None);
virtual const AsmToken &Lex();
void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; }
bool isParsingInlineAsm() { return ParsingInlineAsm; }
bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned &NumOutputs, unsigned &NumInputs,
SmallVectorImpl<std::pair<void *,bool> > &OpDecls,
SmallVectorImpl<std::string> &Constraints,
SmallVectorImpl<std::string> &Clobbers,
const MCInstrInfo *MII,
const MCInstPrinter *IP,
MCAsmParserSemaCallback &SI);
bool parseExpression(const MCExpr *&Res);
virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc);
virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
virtual bool parseAbsoluteExpression(int64_t &Res);
/// \brief Parse an identifier or string (as a quoted identifier)
/// and set \p Res to the identifier contents.
virtual bool parseIdentifier(StringRef &Res);
virtual void eatToEndOfStatement();
virtual void checkForValidSection();
/// }
private:
bool parseStatement(ParseStatementInfo &Info);
void eatToEndOfLine();
bool parseCppHashLineFilenameComment(const SMLoc &L);
void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
MCAsmMacroParameters Parameters);
bool expandMacro(raw_svector_ostream &OS, StringRef Body,
const MCAsmMacroParameters &Parameters,
const MCAsmMacroArguments &A,
const SMLoc &L);
/// \brief Are macros enabled in the parser?
bool areMacrosEnabled() {return MacrosEnabledFlag;}
/// \brief Control a flag in the parser that enables or disables macros.
void setMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;}
/// \brief Lookup a previously defined macro.
/// \param Name Macro name.
/// \returns Pointer to macro. NULL if no such macro was defined.
const MCAsmMacro* lookupMacro(StringRef Name);
/// \brief Define a new macro with the given name and information.
void defineMacro(StringRef Name, const MCAsmMacro& Macro);
/// \brief Undefine a macro. If no such macro was defined, it's a no-op.
void undefineMacro(StringRef Name);
/// \brief Are we inside a macro instantiation?
bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
/// \brief Handle entry to macro instantiation.
///
/// \param M The macro.
/// \param NameLoc Instantiation location.
bool handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc);
/// \brief Handle exit from macro instantiation.
void handleMacroExit();
/// \brief Extract AsmTokens for a macro argument. If the argument delimiter
/// is initially unknown, set it to AsmToken::Eof. It will be set to the
/// correct delimiter by the method.
bool parseMacroArgument(MCAsmMacroArgument &MA,
AsmToken::TokenKind &ArgumentDelimiter);
/// \brief Parse all macro arguments for a given macro.
bool parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A);
void printMacroInstantiations();
void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
ArrayRef<SMRange> Ranges = None) const {
SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
}
static void DiagHandler(const SMDiagnostic &Diag, void *Context);
/// \brief Enter the specified file. This returns true on failure.
bool enterIncludeFile(const std::string &Filename);
/// \brief Process the specified file for the .incbin directive.
/// This returns true on failure.
bool processIncbinFile(const std::string &Filename);
/// \brief Reset the current lexer position to that given by \p Loc. The
/// current token is not set; clients should ensure Lex() is called
/// subsequently.
///
/// \param InBuffer If not -1, should be the known buffer id that contains the
/// location.
void jumpToLoc(SMLoc Loc, int InBuffer=-1);
/// \brief Parse up to the end of statement and a return the contents from the
/// current token until the end of the statement; the current token on exit
/// will be either the EndOfStatement or EOF.
virtual StringRef parseStringToEndOfStatement();
/// \brief Parse until the end of a statement or a comma is encountered,
/// return the contents from the current token up to the end or comma.
StringRef parseStringToComma();
bool parseAssignment(StringRef Name, bool allow_redef,
bool NoDeadStrip = false);
bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
// Generic (target and platform independent) directive parsing.
enum DirectiveKind {
DK_NO_DIRECTIVE, // Placeholder
DK_SET, DK_EQU, DK_EQUIV, DK_ASCII, DK_ASCIZ, DK_STRING, DK_BYTE, DK_SHORT,
DK_VALUE, DK_2BYTE, DK_LONG, DK_INT, DK_4BYTE, DK_QUAD, DK_8BYTE, DK_SINGLE,
DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW,
DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, DK_ENDR,
DK_BUNDLE_ALIGN_MODE, DK_BUNDLE_LOCK, DK_BUNDLE_UNLOCK,
DK_ZERO, DK_EXTERN, DK_GLOBL, DK_GLOBAL,
DK_LAZY_REFERENCE, DK_NO_DEAD_STRIP, DK_SYMBOL_RESOLVER, DK_PRIVATE_EXTERN,
DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE,
DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT,
DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC,
DK_IF, DK_IFB, DK_IFNB, DK_IFC, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF,
DK_ELSEIF, DK_ELSE, DK_ENDIF,
DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS,
DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA,
DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER,
DK_CFI_OFFSET, DK_CFI_REL_OFFSET, DK_CFI_PERSONALITY, DK_CFI_LSDA,
DK_CFI_REMEMBER_STATE, DK_CFI_RESTORE_STATE, DK_CFI_SAME_VALUE,
DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED,
DK_CFI_REGISTER, DK_CFI_WINDOW_SAVE,
DK_MACROS_ON, DK_MACROS_OFF, DK_MACRO, DK_ENDM, DK_ENDMACRO, DK_PURGEM,
DK_SLEB128, DK_ULEB128
};
/// \brief Maps directive name --> DirectiveKind enum, for
/// directives parsed by this class.
StringMap<DirectiveKind> DirectiveKindMap;
// ".ascii", ".asciz", ".string"
bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
bool parseDirectiveValue(unsigned Size); // ".byte", ".long", ...
bool parseDirectiveRealValue(const fltSemantics &); // ".single", ...
bool parseDirectiveFill(); // ".fill"
bool parseDirectiveZero(); // ".zero"
// ".set", ".equ", ".equiv"
bool parseDirectiveSet(StringRef IDVal, bool allow_redef);
bool parseDirectiveOrg(); // ".org"
// ".align{,32}", ".p2align{,w,l}"
bool parseDirectiveAlign(bool IsPow2, unsigned ValueSize);
// ".file", ".line", ".loc", ".stabs"
bool parseDirectiveFile(SMLoc DirectiveLoc);
bool parseDirectiveLine();
bool parseDirectiveLoc();
bool parseDirectiveStabs();
// .cfi directives
bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
bool parseDirectiveCFIWindowSave();
bool parseDirectiveCFISections();
bool parseDirectiveCFIStartProc();
bool parseDirectiveCFIEndProc();
bool parseDirectiveCFIDefCfaOffset();
bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
bool parseDirectiveCFIAdjustCfaOffset();
bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
bool parseDirectiveCFIRememberState();
bool parseDirectiveCFIRestoreState();
bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
bool parseDirectiveCFIEscape();
bool parseDirectiveCFISignalFrame();
bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
// macro directives
bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
bool parseDirectiveEndMacro(StringRef Directive);
bool parseDirectiveMacro(SMLoc DirectiveLoc);
bool parseDirectiveMacrosOnOff(StringRef Directive);
// ".bundle_align_mode"
bool parseDirectiveBundleAlignMode();
// ".bundle_lock"
bool parseDirectiveBundleLock();
// ".bundle_unlock"
bool parseDirectiveBundleUnlock();
// ".space", ".skip"
bool parseDirectiveSpace(StringRef IDVal);
// .sleb128 (Signed=true) and .uleb128 (Signed=false)
bool parseDirectiveLEB128(bool Signed);
/// \brief Parse a directive like ".globl" which
/// accepts a single symbol (which should be a label or an external).
bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
bool parseDirectiveAbort(); // ".abort"
bool parseDirectiveInclude(); // ".include"
bool parseDirectiveIncbin(); // ".incbin"
bool parseDirectiveIf(SMLoc DirectiveLoc); // ".if"
// ".ifb" or ".ifnb", depending on ExpectBlank.
bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
// ".ifc" or ".ifnc", depending on ExpectEqual.
bool parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual);
// ".ifdef" or ".ifndef", depending on expect_defined
bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
bool parseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
bool parseDirectiveElse(SMLoc DirectiveLoc); // ".else"
bool parseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
virtual bool parseEscapedString(std::string &Data);
const MCExpr *applyModifierToExpr(const MCExpr *E,
MCSymbolRefExpr::VariantKind Variant);
// Macro-like directives
MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
raw_svector_ostream &OS);
bool parseDirectiveRept(SMLoc DirectiveLoc); // ".rept"
bool parseDirectiveIrp(SMLoc DirectiveLoc); // ".irp"
bool parseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc"
bool parseDirectiveEndr(SMLoc DirectiveLoc); // ".endr"
// "_emit" or "__emit"
bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
size_t Len);
// "align"
bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
void initializeDirectiveKindMap();
};
}
namespace llvm {
extern MCAsmParserExtension *createDarwinAsmParser();
extern MCAsmParserExtension *createELFAsmParser();
extern MCAsmParserExtension *createCOFFAsmParser();
}
enum { DEFAULT_ADDRSPACE = 0 };
AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
const MCAsmInfo &_MAI)
: Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
PlatformParser(0), CurBuffer(0), MacrosEnabledFlag(true),
CppHashLineNumber(0), AssemblerDialect(~0U), IsDarwin(false),
ParsingInlineAsm(false) {
// Save the old handler.
SavedDiagHandler = SrcMgr.getDiagHandler();
SavedDiagContext = SrcMgr.getDiagContext();
// Set our own handler which calls the saved handler.
SrcMgr.setDiagHandler(DiagHandler, this);
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
// Initialize the platform / file format parser.
//
// FIXME: This is a hack, we need to (majorly) cleanup how these objects are
// created.
if (_MAI.hasMicrosoftFastStdCallMangling()) {
PlatformParser = createCOFFAsmParser();
PlatformParser->Initialize(*this);
} else if (_MAI.hasSubsectionsViaSymbols()) {
PlatformParser = createDarwinAsmParser();
PlatformParser->Initialize(*this);
IsDarwin = true;
} else {
PlatformParser = createELFAsmParser();
PlatformParser->Initialize(*this);
}
initializeDirectiveKindMap();
}
AsmParser::~AsmParser() {
assert(ActiveMacros.empty() && "Unexpected active macro instantiation!");
// Destroy any macros.
for (StringMap<MCAsmMacro *>::iterator it = MacroMap.begin(),
ie = MacroMap.end();
it != ie; ++it)
delete it->getValue();
delete PlatformParser;
}
void AsmParser::printMacroInstantiations() {
// Print the active macro instantiation stack.
for (std::vector<MacroInstantiation *>::const_reverse_iterator
it = ActiveMacros.rbegin(),
ie = ActiveMacros.rend();
it != ie; ++it)
printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
"while in macro instantiation");
}
bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
if (FatalAssemblerWarnings)
return Error(L, Msg, Ranges);
printMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
printMacroInstantiations();
return false;
}
bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
HadError = true;
printMessage(L, SourceMgr::DK_Error, Msg, Ranges);
printMacroInstantiations();
return true;
}
bool AsmParser::enterIncludeFile(const std::string &Filename) {
std::string IncludedFile;
int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
if (NewBuf == -1)
return true;
CurBuffer = NewBuf;
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
return false;
}
/// Process the specified .incbin file by searching for it in the include paths
/// then just emitting the byte contents of the file to the streamer. This
/// returns true on failure.
bool AsmParser::processIncbinFile(const std::string &Filename) {
std::string IncludedFile;
int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
if (NewBuf == -1)
return true;
// Pick up the bytes from the file and emit them.
getStreamer().EmitBytes(SrcMgr.getMemoryBuffer(NewBuf)->getBuffer());
return false;
}
void AsmParser::jumpToLoc(SMLoc Loc, int InBuffer) {
if (InBuffer != -1) {
CurBuffer = InBuffer;
} else {
CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
}
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
}
const AsmToken &AsmParser::Lex() {
const AsmToken *tok = &Lexer.Lex();
if (tok->is(AsmToken::Eof)) {
// If this is the end of an included file, pop the parent file off the
// include stack.
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
jumpToLoc(ParentIncludeLoc);
tok = &Lexer.Lex();
}
}
if (tok->is(AsmToken::Error))
Error(Lexer.getErrLoc(), Lexer.getErr());
return *tok;
}
bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Create the initial section, if requested.
if (!NoInitialTextSection)
Out.InitSections();
// Prime the lexer.
Lex();
HadError = false;
AsmCond StartingCondState = TheCondState;
// If we are generating dwarf for assembly source files save the initial text
// section and generate a .file directive.
if (getContext().getGenDwarfForAssembly()) {
getContext().setGenDwarfSection(getStreamer().getCurrentSection().first);
MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
getStreamer().EmitLabel(SectionStartSym);
getContext().setGenDwarfSectionStartSym(SectionStartSym);
getStreamer().EmitDwarfFileDirective(getContext().nextGenDwarfFileNumber(),
StringRef(),
getContext().getMainFileName());
}
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof)) {
ParseStatementInfo Info;
if (!parseStatement(Info))
continue;
// We had an error, validate that one was emitted and recover by skipping to
// the next line.
assert(HadError && "Parse statement returned an error, but none emitted!");
eatToEndOfStatement();
}
if (TheCondState.TheCond != StartingCondState.TheCond ||
TheCondState.Ignore != StartingCondState.Ignore)
return TokError("unmatched .ifs or .elses");
// Check to see there are no empty DwarfFile slots.
const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
getContext().getMCDwarfFiles();
for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
if (!MCDwarfFiles[i])
TokError("unassigned file number: " + Twine(i) + " for .file directives");
}
// Check to see that all assembler local symbols were actually defined.
// Targets that don't do subsections via symbols may not want this, though,
// so conservatively exclude them. Only do this if we're finalizing, though,
// as otherwise we won't necessarilly have seen everything yet.
if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
const MCContext::SymbolTable &Symbols = getContext().getSymbols();
for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
e = Symbols.end();
i != e; ++i) {
MCSymbol *Sym = i->getValue();
// Variable symbols may not be marked as defined, so check those
// explicitly. If we know it's a variable, we have a definition for
// the purposes of this check.
if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
// FIXME: We would really like to refer back to where the symbol was
// first referenced for a source location. We need to add something
// to track that. Currently, we just point to the end of the file.
printMessage(
getLexer().getLoc(), SourceMgr::DK_Error,
"assembler local symbol '" + Sym->getName() + "' not defined");
}
}
// Finalize the output stream if there are no errors and if the client wants
// us to.
if (!HadError && !NoFinalize)
Out.Finish();
return HadError;
}
void AsmParser::checkForValidSection() {
if (!ParsingInlineAsm && !getStreamer().getCurrentSection().first) {
TokError("expected section directive before assembly directive");
Out.InitToTextSection();
}
}
/// \brief Throw away the rest of the line for testing purposes.
void AsmParser::eatToEndOfStatement() {
while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
Lex();
// Eat EOL.
if (Lexer.is(AsmToken::EndOfStatement))
Lex();
}
StringRef AsmParser::parseStringToEndOfStatement() {
const char *Start = getTok().getLoc().getPointer();
while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
Lex();
const char *End = getTok().getLoc().getPointer();
return StringRef(Start, End - Start);
}
StringRef AsmParser::parseStringToComma() {
const char *Start = getTok().getLoc().getPointer();
while (Lexer.isNot(AsmToken::EndOfStatement) &&
Lexer.isNot(AsmToken::Comma) && Lexer.isNot(AsmToken::Eof))
Lex();
const char *End = getTok().getLoc().getPointer();
return StringRef(Start, End - Start);
}
/// \brief Parse a paren expression and return it.
/// NOTE: This assumes the leading '(' has already been consumed.
///
/// parenexpr ::= expr)
///
bool AsmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
if (parseExpression(Res))
return true;
if (Lexer.isNot(AsmToken::RParen))
return TokError("expected ')' in parentheses expression");
EndLoc = Lexer.getTok().getEndLoc();
Lex();
return false;
}
/// \brief Parse a bracket expression and return it.
/// NOTE: This assumes the leading '[' has already been consumed.
///
/// bracketexpr ::= expr]
///
bool AsmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
if (parseExpression(Res))
return true;
if (Lexer.isNot(AsmToken::RBrac))
return TokError("expected ']' in brackets expression");
EndLoc = Lexer.getTok().getEndLoc();
Lex();
return false;
}
/// \brief Parse a primary expression and return it.
/// primaryexpr ::= (parenexpr
/// primaryexpr ::= symbol
/// primaryexpr ::= number
/// primaryexpr ::= '.'
/// primaryexpr ::= ~,+,- primaryexpr
bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
SMLoc FirstTokenLoc = getLexer().getLoc();
AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
switch (FirstTokenKind) {
default:
return TokError("unknown token in expression");
// If we have an error assume that we've already handled it.
case AsmToken::Error:
return true;
case AsmToken::Exclaim:
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
Res = MCUnaryExpr::CreateLNot(Res, getContext());
return false;
case AsmToken::Dollar:
case AsmToken::At:
case AsmToken::String:
case AsmToken::Identifier: {
StringRef Identifier;
if (parseIdentifier(Identifier)) {
if (FirstTokenKind == AsmToken::Dollar) {
if (Lexer.getMAI().getDollarIsPC()) {
// This is a '$' reference, which references the current PC. Emit a
// temporary label to the streamer and refer to it.
MCSymbol *Sym = Ctx.CreateTempSymbol();
Out.EmitLabel(Sym);
Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
getContext());
EndLoc = FirstTokenLoc;
return false;
} else
return Error(FirstTokenLoc, "invalid token in expression");
return true;
}
}
EndLoc = SMLoc::getFromPointer(Identifier.end());
// This is a symbol reference.
StringRef SymbolName = Identifier;
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
std::pair<StringRef, StringRef> Split = Identifier.split('@');
// Lookup the symbol variant if used.
if (Split.first.size() != Identifier.size()) {
Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
if (Variant != MCSymbolRefExpr::VK_Invalid) {
SymbolName = Split.first;
} else if (MAI.doesAllowAtInName()) {
Variant = MCSymbolRefExpr::VK_None;
} else {
Variant = MCSymbolRefExpr::VK_None;
return TokError("invalid variant '" + Split.second + "'");
}
}
MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
if (Variant)
return Error(EndLoc, "unexpected modifier on variable reference");
Res = Sym->getVariableValue();
return false;
}
// Otherwise create a symbol ref.
Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
return false;
}
case AsmToken::Integer: {
SMLoc Loc = getTok().getLoc();
int64_t IntVal = getTok().getIntVal();
Res = MCConstantExpr::Create(IntVal, getContext());
EndLoc = Lexer.getTok().getEndLoc();
Lex(); // Eat token.
// Look for 'b' or 'f' following an Integer as a directional label
if (Lexer.getKind() == AsmToken::Identifier) {
StringRef IDVal = getTok().getString();
// Lookup the symbol variant if used.
std::pair<StringRef, StringRef> Split = IDVal.split('@');
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
if (Split.first.size() != IDVal.size()) {
Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
if (Variant == MCSymbolRefExpr::VK_Invalid) {
Variant = MCSymbolRefExpr::VK_None;
return TokError("invalid variant '" + Split.second + "'");
}
IDVal = Split.first;
}
if (IDVal == "f" || IDVal == "b") {
MCSymbol *Sym =
Ctx.GetDirectionalLocalSymbol(IntVal, IDVal == "f" ? 1 : 0);
Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
if (IDVal == "b" && Sym->isUndefined())
return Error(Loc, "invalid reference to undefined symbol");
EndLoc = Lexer.getTok().getEndLoc();
Lex(); // Eat identifier.
}
}
return false;
}
case AsmToken::Real: {
APFloat RealVal(APFloat::IEEEdouble, getTok().getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
Res = MCConstantExpr::Create(IntVal, getContext());
EndLoc = Lexer.getTok().getEndLoc();
Lex(); // Eat token.
return false;
}
case AsmToken::Dot: {
// This is a '.' reference, which references the current PC. Emit a
// temporary label to the streamer and refer to it.
MCSymbol *Sym = Ctx.CreateTempSymbol();
Out.EmitLabel(Sym);
Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
EndLoc = Lexer.getTok().getEndLoc();
Lex(); // Eat identifier.
return false;
}
case AsmToken::LParen:
Lex(); // Eat the '('.
return parseParenExpr(Res, EndLoc);
case AsmToken::LBrac:
if (!PlatformParser->HasBracketExpressions())
return TokError("brackets expression not supported on this target");
Lex(); // Eat the '['.
return parseBracketExpr(Res, EndLoc);
case AsmToken::Minus:
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
Res = MCUnaryExpr::CreateMinus(Res, getContext());
return false;
case AsmToken::Plus:
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
Res = MCUnaryExpr::CreatePlus(Res, getContext());
return false;
case AsmToken::Tilde:
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
Res = MCUnaryExpr::CreateNot(Res, getContext());
return false;
}
}
bool AsmParser::parseExpression(const MCExpr *&Res) {
SMLoc EndLoc;
return parseExpression(Res, EndLoc);
}
const MCExpr *
AsmParser::applyModifierToExpr(const MCExpr *E,
MCSymbolRefExpr::VariantKind Variant) {
// Ask the target implementation about this expression first.
const MCExpr *NewE = getTargetParser().applyModifierToExpr(E, Variant, Ctx);
if (NewE)
return NewE;
// Recurse over the given expression, rebuilding it to apply the given variant
// if there is exactly one symbol.
switch (E->getKind()) {
case MCExpr::Target:
case MCExpr::Constant:
return 0;
case MCExpr::SymbolRef: {
const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
if (SRE->getKind() != MCSymbolRefExpr::VK_None) {
TokError("invalid variant on expression '" + getTok().getIdentifier() +
"' (already modified)");
return E;
}
return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
}
case MCExpr::Unary: {
const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
const MCExpr *Sub = applyModifierToExpr(UE->getSubExpr(), Variant);
if (!Sub)
return 0;
return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext());
}
case MCExpr::Binary: {
const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
const MCExpr *LHS = applyModifierToExpr(BE->getLHS(), Variant);
const MCExpr *RHS = applyModifierToExpr(BE->getRHS(), Variant);
if (!LHS && !RHS)
return 0;
if (!LHS)
LHS = BE->getLHS();
if (!RHS)
RHS = BE->getRHS();
return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
}
}
llvm_unreachable("Invalid expression kind!");
}
/// \brief Parse an expression and return it.
///
/// expr ::= expr &&,|| expr -> lowest.
/// expr ::= expr |,^,&,! expr
/// expr ::= expr ==,!=,<>,<,<=,>,>= expr
/// expr ::= expr <<,>> expr
/// expr ::= expr +,- expr
/// expr ::= expr *,/,% expr -> highest.
/// expr ::= primaryexpr
///
bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
// Parse the expression.
Res = 0;
if (parsePrimaryExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc))
return true;
// As a special case, we support 'a op b @ modifier' by rewriting the
// expression to include the modifier. This is inefficient, but in general we
// expect users to use 'a@modifier op b'.
if (Lexer.getKind() == AsmToken::At) {
Lex();
if (Lexer.isNot(AsmToken::Identifier))
return TokError("unexpected symbol modifier following '@'");
MCSymbolRefExpr::VariantKind Variant =
MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier());
if (Variant == MCSymbolRefExpr::VK_Invalid)
return TokError("invalid variant '" + getTok().getIdentifier() + "'");
const MCExpr *ModifiedRes = applyModifierToExpr(Res, Variant);
if (!ModifiedRes) {
return TokError("invalid modifier '" + getTok().getIdentifier() +
"' (no symbols present)");
}
Res = ModifiedRes;
Lex();
}
// Try to constant fold it up front, if possible.
int64_t Value;
if (Res->EvaluateAsAbsolute(Value))
Res = MCConstantExpr::Create(Value, getContext());
return false;
}
bool AsmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
Res = 0;
return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
}
bool AsmParser::parseAbsoluteExpression(int64_t &Res) {
const MCExpr *Expr;
SMLoc StartLoc = Lexer.getLoc();
if (parseExpression(Expr))
return true;
if (!Expr->EvaluateAsAbsolute(Res))
return Error(StartLoc, "expected absolute expression");
return false;
}
static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
MCBinaryExpr::Opcode &Kind) {
switch (K) {
default:
return 0; // not a binop.
// Lowest Precedence: &&, ||
case AsmToken::AmpAmp:
Kind = MCBinaryExpr::LAnd;
return 1;
case AsmToken::PipePipe:
Kind = MCBinaryExpr::LOr;
return 1;
// Low Precedence: |, &, ^
//
// FIXME: gas seems to support '!' as an infix operator?
case AsmToken::Pipe:
Kind = MCBinaryExpr::Or;
return 2;
case AsmToken::Caret:
Kind = MCBinaryExpr::Xor;
return 2;
case AsmToken::Amp:
Kind = MCBinaryExpr::And;
return 2;
// Low Intermediate Precedence: ==, !=, <>, <, <=, >, >=
case AsmToken::EqualEqual:
Kind = MCBinaryExpr::EQ;
return 3;
case AsmToken::ExclaimEqual:
case AsmToken::LessGreater:
Kind = MCBinaryExpr::NE;
return 3;
case AsmToken::Less:
Kind = MCBinaryExpr::LT;
return 3;
case AsmToken::LessEqual:
Kind = MCBinaryExpr::LTE;
return 3;
case AsmToken::Greater:
Kind = MCBinaryExpr::GT;
return 3;
case AsmToken::GreaterEqual:
Kind = MCBinaryExpr::GTE;
return 3;
// Intermediate Precedence: <<, >>
case AsmToken::LessLess:
Kind = MCBinaryExpr::Shl;
return 4;
case AsmToken::GreaterGreater:
Kind = MCBinaryExpr::Shr;
return 4;
// High Intermediate Precedence: +, -
case AsmToken::Plus:
Kind = MCBinaryExpr::Add;
return 5;
case AsmToken::Minus:
Kind = MCBinaryExpr::Sub;
return 5;
// Highest Precedence: *, /, %
case AsmToken::Star:
Kind = MCBinaryExpr::Mul;
return 6;
case AsmToken::Slash:
Kind = MCBinaryExpr::Div;
return 6;
case AsmToken::Percent:
Kind = MCBinaryExpr::Mod;
return 6;
}
}
/// \brief Parse all binary operators with precedence >= 'Precedence'.
/// Res contains the LHS of the expression on input.
bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
SMLoc &EndLoc) {
while (1) {
MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
// If the next token is lower precedence than we are allowed to eat, return
// successfully with what we ate already.
if (TokPrec < Precedence)
return false;
Lex();
// Eat the next primary expression.
const MCExpr *RHS;
if (parsePrimaryExpr(RHS, EndLoc))
return true;
// If BinOp binds less tightly with RHS than the operator after RHS, let
// the pending operator take RHS as its LHS.
MCBinaryExpr::Opcode Dummy;
unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
return true;
// Merge LHS and RHS according to operator.
Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext());
}
}
/// ParseStatement:
/// ::= EndOfStatement
/// ::= Label* Directive ...Operands... EndOfStatement
/// ::= Label* Identifier OperandList* EndOfStatement
bool AsmParser::parseStatement(ParseStatementInfo &Info) {
if (Lexer.is(AsmToken::EndOfStatement)) {
Out.AddBlankLine();
Lex();
return false;
}
// Statements always start with an identifier or are a full line comment.
AsmToken ID = getTok();
SMLoc IDLoc = ID.getLoc();
StringRef IDVal;
int64_t LocalLabelVal = -1;
// A full line comment is a '#' as the first token.
if (Lexer.is(AsmToken::Hash))
return parseCppHashLineFilenameComment(IDLoc);
// Allow an integer followed by a ':' as a directional local label.
if (Lexer.is(AsmToken::Integer)) {
LocalLabelVal = getTok().getIntVal();
if (LocalLabelVal < 0) {
if (!TheCondState.Ignore)
return TokError("unexpected token at start of statement");
IDVal = "";
} else {
IDVal = getTok().getString();
Lex(); // Consume the integer token to be used as an identifier token.
if (Lexer.getKind() != AsmToken::Colon) {
if (!TheCondState.Ignore)
return TokError("unexpected token at start of statement");
}
}
} else if (Lexer.is(AsmToken::Dot)) {
// Treat '.' as a valid identifier in this context.
Lex();
IDVal = ".";
} else if (parseIdentifier(IDVal)) {
if (!TheCondState.Ignore)
return TokError("unexpected token at start of statement");
IDVal = "";
}
// Handle conditional assembly here before checking for skipping. We
// have to do this so that .endif isn't skipped in a ".if 0" block for
// example.
StringMap<DirectiveKind>::const_iterator DirKindIt =
DirectiveKindMap.find(IDVal);
DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
? DK_NO_DIRECTIVE
: DirKindIt->getValue();
switch (DirKind) {
default:
break;
case DK_IF:
return parseDirectiveIf(IDLoc);
case DK_IFB:
return parseDirectiveIfb(IDLoc, true);
case DK_IFNB:
return parseDirectiveIfb(IDLoc, false);
case DK_IFC:
return parseDirectiveIfc(IDLoc, true);
case DK_IFNC:
return parseDirectiveIfc(IDLoc, false);
case DK_IFDEF:
return parseDirectiveIfdef(IDLoc, true);
case DK_IFNDEF:
case DK_IFNOTDEF:
return parseDirectiveIfdef(IDLoc, false);
case DK_ELSEIF:
return parseDirectiveElseIf(IDLoc);
case DK_ELSE:
return parseDirectiveElse(IDLoc);
case DK_ENDIF:
return parseDirectiveEndIf(IDLoc);
}
// Ignore the statement if in the middle of inactive conditional
// (e.g. ".if 0").
if (TheCondState.Ignore) {
eatToEndOfStatement();
return false;
}
// FIXME: Recurse on local labels?
// See what kind of statement we have.
switch (Lexer.getKind()) {
case AsmToken::Colon: {
checkForValidSection();
// identifier ':' -> Label.
Lex();
// Diagnose attempt to use '.' as a label.
if (IDVal == ".")
return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
// Diagnose attempt to use a variable as a label.
//
// FIXME: Diagnostics. Note the location of the definition as a label.
// FIXME: This doesn't diagnose assignment to a symbol which has been
// implicitly marked as external.
MCSymbol *Sym;
if (LocalLabelVal == -1)
Sym = getContext().GetOrCreateSymbol(IDVal);
else
Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
if (!Sym->isUndefined() || Sym->isVariable())
return Error(IDLoc, "invalid symbol redefinition");
// Emit the label.
if (!ParsingInlineAsm)
Out.EmitLabel(Sym);
// If we are generating dwarf for assembly source files then gather the
// info to make a dwarf label entry for this label if needed.
if (getContext().getGenDwarfForAssembly())
MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
IDLoc);
getTargetParser().onLabelParsed(Sym);
// Consume any end of statement token, if present, to avoid spurious
// AddBlankLine calls().
if (Lexer.is(AsmToken::EndOfStatement)) {
Lex();
if (Lexer.is(AsmToken::Eof))
return false;
}
return false;
}
case AsmToken::Equal:
// identifier '=' ... -> assignment statement
Lex();
return parseAssignment(IDVal, true);
default: // Normal instruction or directive.
break;
}
// If macros are enabled, check to see if this is a macro instantiation.
if (areMacrosEnabled())
if (const MCAsmMacro *M = lookupMacro(IDVal)) {
return handleMacroEntry(M, IDLoc);
}
// Otherwise, we have a normal instruction or directive.
// Directives start with "."
if (IDVal[0] == '.' && IDVal != ".") {
// There are several entities interested in parsing directives:
//
// 1. The target-specific assembly parser. Some directives are target
// specific or may potentially behave differently on certain targets.
// 2. Asm parser extensions. For example, platform-specific parsers
// (like the ELF parser) register themselves as extensions.
// 3. The generic directive parser implemented by this class. These are
// all the directives that behave in a target and platform independent
// manner, or at least have a default behavior that's shared between
// all targets and platforms.
// First query the target-specific parser. It will return 'true' if it
// isn't interested in this directive.
if (!getTargetParser().ParseDirective(ID))
return false;
// Next, check the extention directive map to see if any extension has
// registered itself to parse this directive.
std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
ExtensionDirectiveMap.lookup(IDVal);
if (Handler.first)
return (*Handler.second)(Handler.first, IDVal, IDLoc);
// Finally, if no one else is interested in this directive, it must be
// generic and familiar to this class.
switch (DirKind) {
default:
break;
case DK_SET:
case DK_EQU:
return parseDirectiveSet(IDVal, true);
case DK_EQUIV:
return parseDirectiveSet(IDVal, false);
case DK_ASCII:
return parseDirectiveAscii(IDVal, false);
case DK_ASCIZ:
case DK_STRING:
return parseDirectiveAscii(IDVal, true);
case DK_BYTE:
return parseDirectiveValue(1);
case DK_SHORT:
case DK_VALUE:
case DK_2BYTE:
return parseDirectiveValue(2);
case DK_LONG:
case DK_INT:
case DK_4BYTE:
return parseDirectiveValue(4);
case DK_QUAD:
case DK_8BYTE:
return parseDirectiveValue(8);
case DK_SINGLE:
case DK_FLOAT:
return parseDirectiveRealValue(APFloat::IEEEsingle);
case DK_DOUBLE:
return parseDirectiveRealValue(APFloat::IEEEdouble);
case DK_ALIGN: {
bool IsPow2 = !getContext().getAsmInfo()->getAlignmentIsInBytes();
return parseDirectiveAlign(IsPow2, /*ExprSize=*/1);
}
case DK_ALIGN32: {
bool IsPow2 = !getContext().getAsmInfo()->getAlignmentIsInBytes();
return parseDirectiveAlign(IsPow2, /*ExprSize=*/4);
}
case DK_BALIGN:
return parseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
case DK_BALIGNW:
return parseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
case DK_BALIGNL:
return parseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
case DK_P2ALIGN:
return parseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
case DK_P2ALIGNW:
return parseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
case DK_P2ALIGNL:
return parseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
case DK_ORG:
return parseDirectiveOrg();
case DK_FILL:
return parseDirectiveFill();
case DK_ZERO:
return parseDirectiveZero();
case DK_EXTERN:
eatToEndOfStatement(); // .extern is the default, ignore it.
return false;
case DK_GLOBL:
case DK_GLOBAL:
return parseDirectiveSymbolAttribute(MCSA_Global);
case DK_LAZY_REFERENCE:
return parseDirectiveSymbolAttribute(MCSA_LazyReference);
case DK_NO_DEAD_STRIP:
return parseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
case DK_SYMBOL_RESOLVER:
return parseDirectiveSymbolAttribute(MCSA_SymbolResolver);
case DK_PRIVATE_EXTERN:
return parseDirectiveSymbolAttribute(MCSA_PrivateExtern);
case DK_REFERENCE:
return parseDirectiveSymbolAttribute(MCSA_Reference);
case DK_WEAK_DEFINITION:
return parseDirectiveSymbolAttribute(MCSA_WeakDefinition);
case DK_WEAK_REFERENCE:
return parseDirectiveSymbolAttribute(MCSA_WeakReference);
case DK_WEAK_DEF_CAN_BE_HIDDEN:
return parseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
case DK_COMM:
case DK_COMMON:
return parseDirectiveComm(/*IsLocal=*/false);
case DK_LCOMM:
return parseDirectiveComm(/*IsLocal=*/true);
case DK_ABORT:
return parseDirectiveAbort();
case DK_INCLUDE:
return parseDirectiveInclude();
case DK_INCBIN:
return parseDirectiveIncbin();
case DK_CODE16:
case DK_CODE16GCC:
return TokError(Twine(IDVal) + " not supported yet");
case DK_REPT:
return parseDirectiveRept(IDLoc);
case DK_IRP:
return parseDirectiveIrp(IDLoc);
case DK_IRPC:
return parseDirectiveIrpc(IDLoc);
case DK_ENDR:
return parseDirectiveEndr(IDLoc);
case DK_BUNDLE_ALIGN_MODE:
return parseDirectiveBundleAlignMode();
case DK_BUNDLE_LOCK:
return parseDirectiveBundleLock();
case DK_BUNDLE_UNLOCK:
return parseDirectiveBundleUnlock();
case DK_SLEB128:
return parseDirectiveLEB128(true);
case DK_ULEB128:
return parseDirectiveLEB128(false);
case DK_SPACE:
case DK_SKIP:
return parseDirectiveSpace(IDVal);
case DK_FILE:
return parseDirectiveFile(IDLoc);
case DK_LINE:
return parseDirectiveLine();
case DK_LOC:
return parseDirectiveLoc();
case DK_STABS:
return parseDirectiveStabs();
case DK_CFI_SECTIONS:
return parseDirectiveCFISections();
case DK_CFI_STARTPROC:
return parseDirectiveCFIStartProc();
case DK_CFI_ENDPROC:
return parseDirectiveCFIEndProc();
case DK_CFI_DEF_CFA:
return parseDirectiveCFIDefCfa(IDLoc);
case DK_CFI_DEF_CFA_OFFSET:
return parseDirectiveCFIDefCfaOffset();
case DK_CFI_ADJUST_CFA_OFFSET:
return parseDirectiveCFIAdjustCfaOffset();
case DK_CFI_DEF_CFA_REGISTER:
return parseDirectiveCFIDefCfaRegister(IDLoc);
case DK_CFI_OFFSET:
return parseDirectiveCFIOffset(IDLoc);
case DK_CFI_REL_OFFSET:
return parseDirectiveCFIRelOffset(IDLoc);
case DK_CFI_PERSONALITY:
return parseDirectiveCFIPersonalityOrLsda(true);
case DK_CFI_LSDA:
return parseDirectiveCFIPersonalityOrLsda(false);
case DK_CFI_REMEMBER_STATE:
return parseDirectiveCFIRememberState();
case DK_CFI_RESTORE_STATE:
return parseDirectiveCFIRestoreState();
case DK_CFI_SAME_VALUE:
return parseDirectiveCFISameValue(IDLoc);
case DK_CFI_RESTORE:
return parseDirectiveCFIRestore(IDLoc);
case DK_CFI_ESCAPE:
return parseDirectiveCFIEscape();
case DK_CFI_SIGNAL_FRAME:
return parseDirectiveCFISignalFrame();
case DK_CFI_UNDEFINED:
return parseDirectiveCFIUndefined(IDLoc);
case DK_CFI_REGISTER:
return parseDirectiveCFIRegister(IDLoc);
case DK_CFI_WINDOW_SAVE:
return parseDirectiveCFIWindowSave();
case DK_MACROS_ON:
case DK_MACROS_OFF:
return parseDirectiveMacrosOnOff(IDVal);
case DK_MACRO:
return parseDirectiveMacro(IDLoc);
case DK_ENDM:
case DK_ENDMACRO:
return parseDirectiveEndMacro(IDVal);
case DK_PURGEM:
return parseDirectivePurgeMacro(IDLoc);
}
return Error(IDLoc, "unknown directive");
}
// __asm _emit or __asm __emit
if (ParsingInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
IDVal == "_EMIT" || IDVal == "__EMIT"))
return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
// __asm align
if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
return parseDirectiveMSAlign(IDLoc, Info);
checkForValidSection();
// Canonicalize the opcode to lower case.
std::string OpcodeStr = IDVal.lower();
ParseInstructionInfo IInfo(Info.AsmRewrites);
bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, IDLoc,
Info.ParsedOperands);
Info.ParseError = HadError;
// Dump the parsed representation, if requested.
if (getShowParsedOperands()) {
SmallString<256> Str;
raw_svector_ostream OS(Str);
OS << "parsed instruction: [";
for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
if (i != 0)
OS << ", ";
Info.ParsedOperands[i]->print(OS);
}
OS << "]";
printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
}
// If we are generating dwarf for assembly source files and the current
// section is the initial text section then generate a .loc directive for
// the instruction.
if (!HadError && getContext().getGenDwarfForAssembly() &&
getContext().getGenDwarfSection() ==
getStreamer().getCurrentSection().first) {
unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
// If we previously parsed a cpp hash file line comment then make sure the
// current Dwarf File is for the CppHashFilename if not then emit the
// Dwarf File table for it and adjust the line number for the .loc.
const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
getContext().getMCDwarfFiles();
if (CppHashFilename.size() != 0) {
if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() !=
CppHashFilename)
getStreamer().EmitDwarfFileDirective(
getContext().nextGenDwarfFileNumber(), StringRef(),
CppHashFilename);
// Since SrcMgr.FindLineNumber() is slow and messes up the SourceMgr's
// cache with the different Loc from the call above we save the last
// info we queried here with SrcMgr.FindLineNumber().
unsigned CppHashLocLineNo;
if (LastQueryIDLoc == CppHashLoc && LastQueryBuffer == CppHashBuf)
CppHashLocLineNo = LastQueryLine;
else {
CppHashLocLineNo = SrcMgr.FindLineNumber(CppHashLoc, CppHashBuf);
LastQueryLine = CppHashLocLineNo;
LastQueryIDLoc = CppHashLoc;
LastQueryBuffer = CppHashBuf;
}
Line = CppHashLineNumber - 1 + (Line - CppHashLocLineNo);
}
getStreamer().EmitDwarfLocDirective(
getContext().getGenDwarfFileNumber(), Line, 0,
DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0,
StringRef());
}
// If parsing succeeded, match the instruction.
if (!HadError) {
unsigned ErrorInfo;
HadError = getTargetParser().MatchAndEmitInstruction(
IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
ParsingInlineAsm);
}
// Don't skip the rest of the line, the instruction parser is responsible for
// that.
return false;
}
/// eatToEndOfLine uses the Lexer to eat the characters to the end of the line
/// since they may not be able to be tokenized to get to the end of line token.
void AsmParser::eatToEndOfLine() {
if (!Lexer.is(AsmToken::EndOfStatement))
Lexer.LexUntilEndOfLine();
// Eat EOL.
Lex();
}
/// parseCppHashLineFilenameComment as this:
/// ::= # number "filename"
/// or just as a full line comment if it doesn't have a number and a string.
bool AsmParser::parseCppHashLineFilenameComment(const SMLoc &L) {
Lex(); // Eat the hash token.
if (getLexer().isNot(AsmToken::Integer)) {
// Consume the line since in cases it is not a well-formed line directive,
// as if were simply a full line comment.
eatToEndOfLine();
return false;
}
int64_t LineNumber = getTok().getIntVal();
Lex();
if (getLexer().isNot(AsmToken::String)) {
eatToEndOfLine();
return false;
}
StringRef Filename = getTok().getString();
// Get rid of the enclosing quotes.
Filename = Filename.substr(1, Filename.size() - 2);
// Save the SMLoc, Filename and LineNumber for later use by diagnostics.
CppHashLoc = L;
CppHashFilename = Filename;
CppHashLineNumber = LineNumber;
CppHashBuf = CurBuffer;
// Ignore any trailing characters, they're just comment.
eatToEndOfLine();
return false;
}
/// \brief will use the last parsed cpp hash line filename comment
/// for the Filename and LineNo if any in the diagnostic.
void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
const AsmParser *Parser = static_cast<const AsmParser *>(Context);
raw_ostream &OS = errs();
const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
const SMLoc &DiagLoc = Diag.getLoc();
int DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
int CppHashBuf = Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc);
// Like SourceMgr::printMessage() we need to print the include stack if any
// before printing the message.
int DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
if (!Parser->SavedDiagHandler && DiagCurBuffer > 0) {
SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
}
// If we have not parsed a cpp hash line filename comment or the source
// manager changed or buffer changed (like in a nested include) then just
// print the normal diagnostic using its Filename and LineNo.
if (!Parser->CppHashLineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
DiagBuf != CppHashBuf) {
if (Parser->SavedDiagHandler)
Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
else
Diag.print(0, OS);
return;
}
// Use the CppHashFilename and calculate a line number based on the
// CppHashLoc and CppHashLineNumber relative to this Diag's SMLoc for
// the diagnostic.
const std::string &Filename = Parser->CppHashFilename;
int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
int CppHashLocLineNo =
Parser->SrcMgr.FindLineNumber(Parser->CppHashLoc, CppHashBuf);
int LineNo =
Parser->CppHashLineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
Diag.getLineContents(), Diag.getRanges());
if (Parser->SavedDiagHandler)
Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
else
NewDiag.print(0, OS);
}
// FIXME: This is mostly duplicated from the function in AsmLexer.cpp. The
// difference being that that function accepts '@' as part of identifiers and
// we can't do that. AsmLexer.cpp should probably be changed to handle
// '@' as a special case when needed.
static bool isIdentifierChar(char c) {
return isalnum(static_cast<unsigned char>(c)) || c == '_' || c == '$' ||
c == '.';
}
bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
const MCAsmMacroParameters &Parameters,
const MCAsmMacroArguments &A, const SMLoc &L) {
unsigned NParameters = Parameters.size();
if (NParameters != 0 && NParameters != A.size())
return Error(L, "Wrong number of arguments");
// A macro without parameters is handled differently on Darwin:
// gas accepts no arguments and does no substitutions
while (!Body.empty()) {
// Scan for the next substitution.
std::size_t End = Body.size(), Pos = 0;
for (; Pos != End; ++Pos) {
// Check for a substitution or escape.
if (!NParameters) {
// This macro has no parameters, look for $0, $1, etc.
if (Body[Pos] != '$' || Pos + 1 == End)
continue;
char Next = Body[Pos + 1];
if (Next == '$' || Next == 'n' ||
isdigit(static_cast<unsigned char>(Next)))
break;
} else {
// This macro has parameters, look for \foo, \bar, etc.
if (Body[Pos] == '\\' && Pos + 1 != End)
break;
}
}
// Add the prefix.
OS << Body.slice(0, Pos);
// Check if we reached the end.
if (Pos == End)
break;
if (!NParameters) {
switch (Body[Pos + 1]) {
// $$ => $
case '$':
OS << '$';
break;
// $n => number of arguments
case 'n':
OS << A.size();
break;
// $[0-9] => argument
default: {
// Missing arguments are ignored.
unsigned Index = Body[Pos + 1] - '0';
if (Index >= A.size())
break;
// Otherwise substitute with the token values, with spaces eliminated.
for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
ie = A[Index].end();
it != ie; ++it)
OS << it->getString();
break;
}
}
Pos += 2;
} else {
unsigned I = Pos + 1;
while (isIdentifierChar(Body[I]) && I + 1 != End)
++I;
const char *Begin = Body.data() + Pos + 1;
StringRef Argument(Begin, I - (Pos + 1));
unsigned Index = 0;
for (; Index < NParameters; ++Index)
if (Parameters[Index].first == Argument)
break;
if (Index == NParameters) {
if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')')
Pos += 3;
else {
OS << '\\' << Argument;
Pos = I;
}
} else {
for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
ie = A[Index].end();
it != ie; ++it)
if (it->getKind() == AsmToken::String)
OS << it->getStringContents();
else
OS << it->getString();
Pos += 1 + Argument.size();
}
}
// Update the scan point.
Body = Body.substr(Pos);
}
return false;
}
MacroInstantiation::MacroInstantiation(const MCAsmMacro *M, SMLoc IL, int EB,
SMLoc EL, MemoryBuffer *I)
: TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitBuffer(EB),
ExitLoc(EL) {}
static bool isOperator(AsmToken::TokenKind kind) {
switch (kind) {
default:
return false;
case AsmToken::Plus:
case AsmToken::Minus:
case AsmToken::Tilde:
case AsmToken::Slash:
case AsmToken::Star:
case AsmToken::Dot:
case AsmToken::Equal:
case AsmToken::EqualEqual:
case AsmToken::Pipe:
case AsmToken::PipePipe:
case AsmToken::Caret:
case AsmToken::Amp:
case AsmToken::AmpAmp:
case AsmToken::Exclaim:
case AsmToken::ExclaimEqual:
case AsmToken::Percent:
case AsmToken::Less:
case AsmToken::LessEqual:
case AsmToken::LessLess:
case AsmToken::LessGreater:
case AsmToken::Greater:
case AsmToken::GreaterEqual:
case AsmToken::GreaterGreater:
return true;
}
}
bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA,
AsmToken::TokenKind &ArgumentDelimiter) {
unsigned ParenLevel = 0;
unsigned AddTokens = 0;
// gas accepts arguments separated by whitespace, except on Darwin
if (!IsDarwin)
Lexer.setSkipSpace(false);
for (;;) {
if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal)) {
Lexer.setSkipSpace(true);
return TokError("unexpected token in macro instantiation");
}
if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) {
// Spaces and commas cannot be mixed to delimit parameters
if (ArgumentDelimiter == AsmToken::Eof)
ArgumentDelimiter = AsmToken::Comma;
else if (ArgumentDelimiter != AsmToken::Comma) {
Lexer.setSkipSpace(true);
return TokError("expected ' ' for macro argument separator");
}
break;
}
if (Lexer.is(AsmToken::Space)) {
Lex(); // Eat spaces
// Spaces can delimit parameters, but could also be part an expression.
// If the token after a space is an operator, add the token and the next
// one into this argument
if (ArgumentDelimiter == AsmToken::Space ||
ArgumentDelimiter == AsmToken::Eof) {
if (isOperator(Lexer.getKind())) {
// Check to see whether the token is used as an operator,
// or part of an identifier
const char *NextChar = getTok().getEndLoc().getPointer();
if (*NextChar == ' ')
AddTokens = 2;
}
if (!AddTokens && ParenLevel == 0) {
if (ArgumentDelimiter == AsmToken::Eof &&
!isOperator(Lexer.getKind()))
ArgumentDelimiter = AsmToken::Space;
break;
}
}
}
// handleMacroEntry relies on not advancing the lexer here
// to be able to fill in the remaining default parameter values
if (Lexer.is(AsmToken::EndOfStatement))
break;
// Adjust the current parentheses level.
if (Lexer.is(AsmToken::LParen))
++ParenLevel;
else if (Lexer.is(AsmToken::RParen) && ParenLevel)
--ParenLevel;
// Append the token to the current argument list.
MA.push_back(getTok());
if (AddTokens)
AddTokens--;
Lex();
}
Lexer.setSkipSpace(true);
if (ParenLevel != 0)
return TokError("unbalanced parentheses in macro argument");
return false;
}
// Parse the macro instantiation arguments.
bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
MCAsmMacroArguments &A) {
const unsigned NParameters = M ? M->Parameters.size() : 0;
// Argument delimiter is initially unknown. It will be set by
// parseMacroArgument()
AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
// Parse two kinds of macro invocations:
// - macros defined without any parameters accept an arbitrary number of them
// - macros defined with parameters accept at most that many of them
for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
++Parameter) {
MCAsmMacroArgument MA;
if (parseMacroArgument(MA, ArgumentDelimiter))
return true;
if (!MA.empty() || !NParameters)
A.push_back(MA);
else if (NParameters) {
if (!M->Parameters[Parameter].second.empty())
A.push_back(M->Parameters[Parameter].second);
}
// At the end of the statement, fill in remaining arguments that have
// default values. If there aren't any, then the next argument is
// required but missing
if (Lexer.is(AsmToken::EndOfStatement)) {
if (NParameters && Parameter < NParameters - 1) {
if (M->Parameters[Parameter + 1].second.empty())
return TokError("macro argument '" +
Twine(M->Parameters[Parameter + 1].first) +
"' is missing");
else
continue;
}
return false;
}
if (Lexer.is(AsmToken::Comma))
Lex();
}
return TokError("Too many arguments");
}
const MCAsmMacro *AsmParser::lookupMacro(StringRef Name) {
StringMap<MCAsmMacro *>::iterator I = MacroMap.find(Name);
return (I == MacroMap.end()) ? NULL : I->getValue();
}
void AsmParser::defineMacro(StringRef Name, const MCAsmMacro &Macro) {
MacroMap[Name] = new MCAsmMacro(Macro);
}
void AsmParser::undefineMacro(StringRef Name) {
StringMap<MCAsmMacro *>::iterator I = MacroMap.find(Name);
if (I != MacroMap.end()) {
delete I->getValue();
MacroMap.erase(I);
}
}
bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
// Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
// this, although we should protect against infinite loops.
if (ActiveMacros.size() == 20)
return TokError("macros cannot be nested more than 20 levels deep");
MCAsmMacroArguments A;
if (parseMacroArguments(M, A))
return true;
// Remove any trailing empty arguments. Do this after-the-fact as we have
// to keep empty arguments in the middle of the list or positionality
// gets off. e.g., "foo 1, , 2" vs. "foo 1, 2,"
while (!A.empty() && A.back().empty())
A.pop_back();
// Macro instantiation is lexical, unfortunately. We construct a new buffer
// to hold the macro body with substitutions.
SmallString<256> Buf;
StringRef Body = M->Body;
raw_svector_ostream OS(Buf);
if (expandMacro(OS, Body, M->Parameters, A, getTok().getLoc()))
return true;
// We include the .endmacro in the buffer as our cue to exit the macro
// instantiation.
OS << ".endmacro\n";
MemoryBuffer *Instantiation =
MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
// Create the macro instantiation object and add to the current macro
// instantiation stack.
MacroInstantiation *MI = new MacroInstantiation(
M, NameLoc, CurBuffer, getTok().getLoc(), Instantiation);
ActiveMacros.push_back(MI);
// Jump to the macro instantiation and prime the lexer.
CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
Lex();
return false;
}
void AsmParser::handleMacroExit() {
// Jump to the EndOfStatement we should return to, and consume it.
jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer);
Lex();
// Pop the instantiation entry.
delete ActiveMacros.back();
ActiveMacros.pop_back();
}
static bool isUsedIn(const MCSymbol *Sym, const MCExpr *Value) {
switch (Value->getKind()) {
case MCExpr::Binary: {
const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Value);
return isUsedIn(Sym, BE->getLHS()) || isUsedIn(Sym, BE->getRHS());
}
case MCExpr::Target:
case MCExpr::Constant:
return false;
case MCExpr::SymbolRef: {
const MCSymbol &S =
static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
if (S.isVariable())
return isUsedIn(Sym, S.getVariableValue());
return &S == Sym;
}
case MCExpr::Unary:
return isUsedIn(Sym, static_cast<const MCUnaryExpr *>(Value)->getSubExpr());
}
llvm_unreachable("Unknown expr kind!");
}
bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
bool NoDeadStrip) {
// FIXME: Use better location, we should use proper tokens.
SMLoc EqualLoc = Lexer.getLoc();
const MCExpr *Value;
if (parseExpression(Value))
return true;
// Note: we don't count b as used in "a = b". This is to allow
// a = b
// b = c
if (Lexer.isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in assignment");
// Error on assignment to '.'.
if (Name == ".") {
return Error(EqualLoc, ("assignment to pseudo-symbol '.' is unsupported "
"(use '.space' or '.org').)"));
}
// Eat the end of statement marker.
Lex();
// Validate that the LHS is allowed to be a variable (either it has not been
// used as a symbol, or it is an absolute symbol).
MCSymbol *Sym = getContext().LookupSymbol(Name);
if (Sym) {
// Diagnose assignment to a label.
//
// FIXME: Diagnostics. Note the location of the definition as a label.
// FIXME: Diagnose assignment to protected identifier (e.g., register name).
if (isUsedIn(Sym, Value))
return Error(EqualLoc, "Recursive use of '" + Name + "'");
else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
; // Allow redefinitions of undefined symbols only used in directives.
else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
; // Allow redefinitions of variables that haven't yet been used.
else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
return Error(EqualLoc, "redefinition of '" + Name + "'");
else if (!Sym->isVariable())
return Error(EqualLoc, "invalid assignment to '" + Name + "'");
else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
Name + "'");
// Don't count these checks as uses.
Sym->setUsed(false);
} else
Sym = getContext().GetOrCreateSymbol(Name);
// FIXME: Handle '.'.
// Do the assignment.
Out.EmitAssignment(Sym, Value);
if (NoDeadStrip)
Out.EmitSymbolAttribute(Sym, MCSA_NoDeadStrip);
return false;
}
/// parseIdentifier:
/// ::= identifier
/// ::= string
bool AsmParser::parseIdentifier(StringRef &Res) {
// The assembler has relaxed rules for accepting identifiers, in particular we
// allow things like '.globl $foo' and '.def @feat.00', which would normally be
// separate tokens. At this level, we have already lexed so we cannot (currently)
// handle this as a context dependent token, instead we detect adjacent tokens
// and return the combined identifier.
if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
SMLoc PrefixLoc = getLexer().getLoc();
// Consume the prefix character, and check for a following identifier.
Lex();
if (Lexer.isNot(AsmToken::Identifier))
return true;
// We have a '$' or '@' followed by an identifier, make sure they are adjacent.
if (PrefixLoc.getPointer() + 1 != getTok().getLoc().getPointer())
return true;
// Construct the joined identifier and consume the token.
Res =
StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
Lex();
return false;
}
if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
return true;
Res = getTok().getIdentifier();
Lex(); // Consume the identifier token.
return false;
}
/// parseDirectiveSet:
/// ::= .equ identifier ',' expression
/// ::= .equiv identifier ',' expression
/// ::= .set identifier ',' expression
bool AsmParser::parseDirectiveSet(StringRef IDVal, bool allow_redef) {
StringRef Name;
if (parseIdentifier(Name))
return TokError("expected identifier after '" + Twine(IDVal) + "'");
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '" + Twine(IDVal) + "'");
Lex();
return parseAssignment(Name, allow_redef, true);
}
bool AsmParser::parseEscapedString(std::string &Data) {
assert(getLexer().is(AsmToken::String) && "Unexpected current token!");
Data = "";
StringRef Str = getTok().getStringContents();
for (unsigned i = 0, e = Str.size(); i != e; ++i) {
if (Str[i] != '\\') {
Data += Str[i];
continue;
}
// Recognize escaped characters. Note that this escape semantics currently
// loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
++i;
if (i == e)
return TokError("unexpected backslash at end of string");
// Recognize octal sequences.
if ((unsigned)(Str[i] - '0') <= 7) {
// Consume up to three octal characters.
unsigned Value = Str[i] - '0';
if (i + 1 != e && ((unsigned)(Str[i + 1] - '0')) <= 7) {
++i;
Value = Value * 8 + (Str[i] - '0');
if (i + 1 != e && ((unsigned)(Str[i + 1] - '0')) <= 7) {
++i;
Value = Value * 8 + (Str[i] - '0');
}
}
if (Value > 255)
return TokError("invalid octal escape sequence (out of range)");
Data += (unsigned char)Value;
continue;
}
// Otherwise recognize individual escapes.
switch (Str[i]) {
default:
// Just reject invalid escape sequences for now.
return TokError("invalid escape sequence (unrecognized character)");
case 'b': Data += '\b'; break;
case 'f': Data += '\f'; break;
case 'n': Data += '\n'; break;
case 'r': Data += '\r'; break;
case 't': Data += '\t'; break;
case '"': Data += '"'; break;
case '\\': Data += '\\'; break;
}
}
return false;
}
/// parseDirectiveAscii:
/// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
checkForValidSection();
for (;;) {
if (getLexer().isNot(AsmToken::String))
return TokError("expected string in '" + Twine(IDVal) + "' directive");
std::string Data;
if (parseEscapedString(Data))
return true;
getStreamer().EmitBytes(Data);
if (ZeroTerminated)
getStreamer().EmitBytes(StringRef("\0", 1));
Lex();
if (getLexer().is(AsmToken::EndOfStatement))
break;
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
Lex();
}
}
Lex();
return false;
}
/// parseDirectiveValue
/// ::= (.byte | .short | ... ) [ expression (, expression)* ]
bool AsmParser::parseDirectiveValue(unsigned Size) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
checkForValidSection();
for (;;) {
const MCExpr *Value;
SMLoc ExprLoc = getLexer().getLoc();
if (parseExpression(Value))
return true;
// Special case constant expressions to match code generator.
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
assert(Size <= 8 && "Invalid size");
uint64_t IntValue = MCE->getValue();
if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
return Error(ExprLoc, "literal value out of range for directive");
getStreamer().EmitIntValue(IntValue, Size);
} else
getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
}
}
Lex();
return false;
}
/// parseDirectiveRealValue
/// ::= (.single | .double) [ expression (, expression)* ]
bool AsmParser::parseDirectiveRealValue(const fltSemantics &Semantics) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
checkForValidSection();
for (;;) {
// We don't truly support arithmetic on floating point expressions, so we
// have to manually parse unary prefixes.
bool IsNeg = false;
if (getLexer().is(AsmToken::Minus)) {
Lex();
IsNeg = true;
} else if (getLexer().is(AsmToken::Plus))
Lex();
if (getLexer().isNot(AsmToken::Integer) &&
getLexer().isNot(AsmToken::Real) &&
getLexer().isNot(AsmToken::Identifier))
return TokError("unexpected token in directive");
// Convert to an APFloat.
APFloat Value(Semantics);
StringRef IDVal = getTok().getString();
if (getLexer().is(AsmToken::Identifier)) {
if (!IDVal.compare_lower("infinity") || !IDVal.compare_lower("inf"))
Value = APFloat::getInf(Semantics);
else if (!IDVal.compare_lower("nan"))
Value = APFloat::getNaN(Semantics, false, ~0);
else
return TokError("invalid floating point literal");
} else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) ==
APFloat::opInvalidOp)
return TokError("invalid floating point literal");
if (IsNeg)
Value.changeSign();
// Consume the numeric token.
Lex();
// Emit the value as an integer.
APInt AsInt = Value.bitcastToAPInt();
getStreamer().EmitIntValue(AsInt.getLimitedValue(),
AsInt.getBitWidth() / 8);
if (getLexer().is(AsmToken::EndOfStatement))
break;
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
}
}
Lex();
return false;
}
/// parseDirectiveZero
/// ::= .zero expression
bool AsmParser::parseDirectiveZero() {
checkForValidSection();
int64_t NumBytes;
if (parseAbsoluteExpression(NumBytes))
return true;
int64_t Val = 0;
if (getLexer().is(AsmToken::Comma)) {
Lex();
if (parseAbsoluteExpression(Val))
return true;
}
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.zero' directive");
Lex();
getStreamer().EmitFill(NumBytes, Val);
return false;
}
/// parseDirectiveFill
/// ::= .fill expression [ , expression [ , expression ] ]
bool AsmParser::parseDirectiveFill() {
checkForValidSection();
int64_t NumValues;
if (parseAbsoluteExpression(NumValues))
return true;
int64_t FillSize = 1;
int64_t FillExpr = 0;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.fill' directive");
Lex();
if (parseAbsoluteExpression(FillSize))
return true;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.fill' directive");
Lex();
if (parseAbsoluteExpression(FillExpr))
return true;
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.fill' directive");
Lex();
}
}
if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
for (uint64_t i = 0, e = NumValues; i != e; ++i)
getStreamer().EmitIntValue(FillExpr, FillSize);
return false;
}
/// parseDirectiveOrg
/// ::= .org expression [ , expression ]
bool AsmParser::parseDirectiveOrg() {
checkForValidSection();
const MCExpr *Offset;
SMLoc Loc = getTok().getLoc();
if (parseExpression(Offset))
return true;
// Parse optional fill expression.
int64_t FillExpr = 0;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.org' directive");
Lex();
if (parseAbsoluteExpression(FillExpr))
return true;
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.org' directive");
}
Lex();
// Only limited forms of relocatable expressions are accepted here, it
// has to be relative to the current section. The streamer will return
// 'true' if the expression wasn't evaluatable.
if (getStreamer().EmitValueToOffset(Offset, FillExpr))
return Error(Loc, "expected assembly-time absolute expression");
return false;
}
/// parseDirectiveAlign
/// ::= {.align, ...} expression [ , expression [ , expression ]]
bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
checkForValidSection();
SMLoc AlignmentLoc = getLexer().getLoc();
int64_t Alignment;
if (parseAbsoluteExpression(Alignment))
return true;
SMLoc MaxBytesLoc;
bool HasFillExpr = false;
int64_t FillExpr = 0;
int64_t MaxBytesToFill = 0;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
// The fill expression can be omitted while specifying a maximum number of
// alignment bytes, e.g:
// .align 3,,4
if (getLexer().isNot(AsmToken::Comma)) {
HasFillExpr = true;
if (parseAbsoluteExpression(FillExpr))
return true;
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
MaxBytesLoc = getLexer().getLoc();
if (parseAbsoluteExpression(MaxBytesToFill))
return true;
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
}
}
Lex();
if (!HasFillExpr)
FillExpr = 0;
// Compute alignment in bytes.
if (IsPow2) {
// FIXME: Diagnose overflow.
if (Alignment >= 32) {
Error(AlignmentLoc, "invalid alignment value");
Alignment = 31;
}
Alignment = 1ULL << Alignment;
} else {
// Reject alignments that aren't a power of two, for gas compatibility.
if (!isPowerOf2_64(Alignment))
Error(AlignmentLoc, "alignment must be a power of 2");
}
// Diagnose non-sensical max bytes to align.
if (MaxBytesLoc.isValid()) {
if (MaxBytesToFill < 1) {
Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
"many bytes, ignoring maximum bytes expression");
MaxBytesToFill = 0;
}
if (MaxBytesToFill >= Alignment) {
Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and "
"has no effect");
MaxBytesToFill = 0;
}
}
// Check whether we should use optimal code alignment for this .align
// directive.
bool UseCodeAlign = getStreamer().getCurrentSection().first->UseCodeAlign();
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
ValueSize == 1 && UseCodeAlign) {
getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
} else {
// FIXME: Target specific behavior about how the "extra" bytes are filled.
getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize,
MaxBytesToFill);
}
return false;
}
/// parseDirectiveFile
/// ::= .file [number] filename
/// ::= .file number directory filename
bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
// FIXME: I'm not sure what this is.
int64_t FileNumber = -1;
SMLoc FileNumberLoc = getLexer().getLoc();
if (getLexer().is(AsmToken::Integer)) {
FileNumber = getTok().getIntVal();
Lex();
if (FileNumber < 1)
return TokError("file number less than one");
}
if (getLexer().isNot(AsmToken::String))
return TokError("unexpected token in '.file' directive");
// Usually the directory and filename together, otherwise just the directory.
// Allow the strings to have escaped octal character sequence.
std::string Path = getTok().getString();
if (parseEscapedString(Path))
return true;
Lex();
StringRef Directory;
StringRef Filename;
std::string FilenameData;
if (getLexer().is(AsmToken::String)) {
if (FileNumber == -1)
return TokError("explicit path specified, but no file number");
if (parseEscapedString(FilenameData))
return true;
Filename = FilenameData;
Directory = Path;
Lex();
} else {
Filename = Path;
}
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.file' directive");
if (FileNumber == -1)
getStreamer().EmitFileDirective(Filename);
else {
if (getContext().getGenDwarfForAssembly() == true)
Error(DirectiveLoc,
"input can't have .file dwarf directives when -g is "
"used to generate dwarf debug info for assembly code");
if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename))
Error(FileNumberLoc, "file number already allocated");
}
return false;
}
/// parseDirectiveLine
/// ::= .line [number]
bool AsmParser::parseDirectiveLine() {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (getLexer().isNot(AsmToken::Integer))
return TokError("unexpected token in '.line' directive");
int64_t LineNumber = getTok().getIntVal();
(void)LineNumber;
Lex();
// FIXME: Do something with the .line.
}
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.line' directive");
return false;
}
/// parseDirectiveLoc
/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
/// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
/// The first number is a file number, must have been previously assigned with
/// a .file directive, the second number is the line number and optionally the
/// third number is a column position (zero if not specified). The remaining
/// optional items are .loc sub-directives.
bool AsmParser::parseDirectiveLoc() {
if (getLexer().isNot(AsmToken::Integer))
return TokError("unexpected token in '.loc' directive");
int64_t FileNumber = getTok().getIntVal();
if (FileNumber < 1)
return TokError("file number less than one in '.loc' directive");
if (!getContext().isValidDwarfFileNumber(FileNumber))
return TokError("unassigned file number in '.loc' directive");
Lex();
int64_t LineNumber = 0;
if (getLexer().is(AsmToken::Integer)) {
LineNumber = getTok().getIntVal();
if (LineNumber < 0)
return TokError("line number less than zero in '.loc' directive");
Lex();
}
int64_t ColumnPos = 0;
if (getLexer().is(AsmToken::Integer)) {
ColumnPos = getTok().getIntVal();
if (ColumnPos < 0)
return TokError("column position less than zero in '.loc' directive");
Lex();
}
unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
unsigned Isa = 0;
int64_t Discriminator = 0;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
if (getLexer().is(AsmToken::EndOfStatement))
break;
StringRef Name;
SMLoc Loc = getTok().getLoc();
if (parseIdentifier(Name))
return TokError("unexpected token in '.loc' directive");
if (Name == "basic_block")
Flags |= DWARF2_FLAG_BASIC_BLOCK;
else if (Name == "prologue_end")
Flags |= DWARF2_FLAG_PROLOGUE_END;
else if (Name == "epilogue_begin")
Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
else if (Name == "is_stmt") {
Loc = getTok().getLoc();
const MCExpr *Value;
if (parseExpression(Value))
return true;
// The expression must be the constant 0 or 1.
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
int Value = MCE->getValue();
if (Value == 0)
Flags &= ~DWARF2_FLAG_IS_STMT;
else if (Value == 1)
Flags |= DWARF2_FLAG_IS_STMT;
else
return Error(Loc, "is_stmt value not 0 or 1");
} else {
return Error(Loc, "is_stmt value not the constant value of 0 or 1");
}
} else if (Name == "isa") {
Loc = getTok().getLoc();
const MCExpr *Value;
if (parseExpression(Value))
return true;
// The expression must be a constant greater or equal to 0.
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
int Value = MCE->getValue();
if (Value < 0)
return Error(Loc, "isa number less than zero");
Isa = Value;
} else {
return Error(Loc, "isa number not a constant value");
}
} else if (Name == "discriminator") {
if (parseAbsoluteExpression(Discriminator))
return true;
} else {
return Error(Loc, "unknown sub-directive in '.loc' directive");
}
if (getLexer().is(AsmToken::EndOfStatement))
break;
}
}
getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
Isa, Discriminator, StringRef());
return false;
}
/// parseDirectiveStabs
/// ::= .stabs string, number, number, number
bool AsmParser::parseDirectiveStabs() {
return TokError("unsupported directive '.stabs'");
}
/// parseDirectiveCFISections
/// ::= .cfi_sections section [, section]
bool AsmParser::parseDirectiveCFISections() {
StringRef Name;
bool EH = false;
bool Debug = false;
if (parseIdentifier(Name))
return TokError("Expected an identifier");
if (Name == ".eh_frame")
EH = true;
else if (Name == ".debug_frame")
Debug = true;
if (getLexer().is(AsmToken::Comma)) {
Lex();
if (parseIdentifier(Name))
return TokError("Expected an identifier");
if (Name == ".eh_frame")
EH = true;
else if (Name == ".debug_frame")
Debug = true;
}
getStreamer().EmitCFISections(EH, Debug);
return false;
}
/// parseDirectiveCFIStartProc
/// ::= .cfi_startproc
bool AsmParser::parseDirectiveCFIStartProc() {
getStreamer().EmitCFIStartProc();
return false;
}
/// parseDirectiveCFIEndProc
/// ::= .cfi_endproc
bool AsmParser::parseDirectiveCFIEndProc() {
getStreamer().EmitCFIEndProc();
return false;
}
/// \brief parse register name or number.
bool AsmParser::parseRegisterOrRegisterNumber(int64_t &Register,
SMLoc DirectiveLoc) {
unsigned RegNo;
if (getLexer().isNot(AsmToken::Integer)) {
if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc))
return true;
Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
} else
return parseAbsoluteExpression(Register);
return false;
}
/// parseDirectiveCFIDefCfa
/// ::= .cfi_def_cfa register, offset
bool AsmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
int64_t Register = 0;
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
int64_t Offset = 0;
if (parseAbsoluteExpression(Offset))
return true;
getStreamer().EmitCFIDefCfa(Register, Offset);
return false;
}
/// parseDirectiveCFIDefCfaOffset
/// ::= .cfi_def_cfa_offset offset
bool AsmParser::parseDirectiveCFIDefCfaOffset() {
int64_t Offset = 0;
if (parseAbsoluteExpression(Offset))
return true;
getStreamer().EmitCFIDefCfaOffset(Offset);
return false;
}
/// parseDirectiveCFIRegister
/// ::= .cfi_register register, register
bool AsmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
int64_t Register1 = 0;
if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc))
return true;
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
int64_t Register2 = 0;
if (parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
return true;
getStreamer().EmitCFIRegister(Register1, Register2);
return false;
}
/// parseDirectiveCFIWindowSave
/// ::= .cfi_window_save
bool AsmParser::parseDirectiveCFIWindowSave() {
getStreamer().EmitCFIWindowSave();
return false;
}
/// parseDirectiveCFIAdjustCfaOffset
/// ::= .cfi_adjust_cfa_offset adjustment
bool AsmParser::parseDirectiveCFIAdjustCfaOffset() {
int64_t Adjustment = 0;
if (parseAbsoluteExpression(Adjustment))
return true;
getStreamer().EmitCFIAdjustCfaOffset(Adjustment);
return false;
}
/// parseDirectiveCFIDefCfaRegister
/// ::= .cfi_def_cfa_register register
bool AsmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
int64_t Register = 0;
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
getStreamer().EmitCFIDefCfaRegister(Register);
return false;
}
/// parseDirectiveCFIOffset
/// ::= .cfi_offset register, offset
bool AsmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
int64_t Register = 0;
int64_t Offset = 0;
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
if (parseAbsoluteExpression(Offset))
return true;
getStreamer().EmitCFIOffset(Register, Offset);
return false;
}
/// parseDirectiveCFIRelOffset
/// ::= .cfi_rel_offset register, offset
bool AsmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
int64_t Register = 0;
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
int64_t Offset = 0;
if (parseAbsoluteExpression(Offset))
return true;
getStreamer().EmitCFIRelOffset(Register, Offset);
return false;
}
static bool isValidEncoding(int64_t Encoding) {
if (Encoding & ~0xff)
return false;
if (Encoding == dwarf::DW_EH_PE_omit)
return true;
const unsigned Format = Encoding & 0xf;
if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
return false;
const unsigned Application = Encoding & 0x70;
if (Application != dwarf::DW_EH_PE_absptr &&
Application != dwarf::DW_EH_PE_pcrel)
return false;
return true;
}
/// parseDirectiveCFIPersonalityOrLsda
/// IsPersonality true for cfi_personality, false for cfi_lsda
/// ::= .cfi_personality encoding, [symbol_name]
/// ::= .cfi_lsda encoding, [symbol_name]
bool AsmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
int64_t Encoding = 0;
if (parseAbsoluteExpression(Encoding))
return true;
if (Encoding == dwarf::DW_EH_PE_omit)
return false;
if (!isValidEncoding(Encoding))
return TokError("unsupported encoding.");
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
StringRef Name;
if (parseIdentifier(Name))
return TokError("expected identifier in directive");
MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
if (IsPersonality)
getStreamer().EmitCFIPersonality(Sym, Encoding);
else
getStreamer().EmitCFILsda(Sym, Encoding);
return false;
}
/// parseDirectiveCFIRememberState
/// ::= .cfi_remember_state
bool AsmParser::parseDirectiveCFIRememberState() {
getStreamer().EmitCFIRememberState();
return false;
}
/// parseDirectiveCFIRestoreState
/// ::= .cfi_remember_state
bool AsmParser::parseDirectiveCFIRestoreState() {
getStreamer().EmitCFIRestoreState();
return false;
}
/// parseDirectiveCFISameValue
/// ::= .cfi_same_value register
bool AsmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
int64_t Register = 0;
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
getStreamer().EmitCFISameValue(Register);
return false;
}
/// parseDirectiveCFIRestore
/// ::= .cfi_restore register
bool AsmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
int64_t Register = 0;
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
getStreamer().EmitCFIRestore(Register);
return false;
}
/// parseDirectiveCFIEscape
/// ::= .cfi_escape expression[,...]
bool AsmParser::parseDirectiveCFIEscape() {
std::string Values;
int64_t CurrValue;
if (parseAbsoluteExpression(CurrValue))
return true;
Values.push_back((uint8_t)CurrValue);
while (getLexer().is(AsmToken::Comma)) {
Lex();
if (parseAbsoluteExpression(CurrValue))
return true;
Values.push_back((uint8_t)CurrValue);
}
getStreamer().EmitCFIEscape(Values);
return false;
}
/// parseDirectiveCFISignalFrame
/// ::= .cfi_signal_frame
bool AsmParser::parseDirectiveCFISignalFrame() {
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(getLexer().getLoc(),
"unexpected token in '.cfi_signal_frame'");
getStreamer().EmitCFISignalFrame();
return false;
}
/// parseDirectiveCFIUndefined
/// ::= .cfi_undefined register
bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
int64_t Register = 0;
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
getStreamer().EmitCFIUndefined(Register);
return false;
}
/// parseDirectiveMacrosOnOff
/// ::= .macros_on
/// ::= .macros_off
bool AsmParser::parseDirectiveMacrosOnOff(StringRef Directive) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(getLexer().getLoc(),
"unexpected token in '" + Directive + "' directive");
setMacrosEnabled(Directive == ".macros_on");
return false;
}
/// parseDirectiveMacro
/// ::= .macro name [parameters]
bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
StringRef Name;
if (parseIdentifier(Name))
return TokError("expected identifier in '.macro' directive");
MCAsmMacroParameters Parameters;
// Argument delimiter is initially unknown. It will be set by
// parseMacroArgument()
AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
MCAsmMacroParameter Parameter;
if (parseIdentifier(Parameter.first))
return TokError("expected identifier in '.macro' directive");
if (getLexer().is(AsmToken::Equal)) {
Lex();
if (parseMacroArgument(Parameter.second, ArgumentDelimiter))
return true;
}
Parameters.push_back(Parameter);
if (getLexer().is(AsmToken::Comma))
Lex();
else if (getLexer().is(AsmToken::EndOfStatement))
break;
}
}
// Eat the end of statement.
Lex();
AsmToken EndToken, StartToken = getTok();
// Lex the macro definition.
for (;;) {
// Check whether we have reached the end of the file.
if (getLexer().is(AsmToken::Eof))
return Error(DirectiveLoc, "no matching '.endmacro' in definition");
// Otherwise, check whether we have reach the .endmacro.
if (getLexer().is(AsmToken::Identifier) &&
(getTok().getIdentifier() == ".endm" ||
getTok().getIdentifier() == ".endmacro")) {
EndToken = getTok();
Lex();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '" + EndToken.getIdentifier() +
"' directive");
break;
}
// Otherwise, scan til the end of the statement.
eatToEndOfStatement();
}
if (lookupMacro(Name)) {
return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
}
const char *BodyStart = StartToken.getLoc().getPointer();
const char *BodyEnd = EndToken.getLoc().getPointer();
StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
checkForBadMacro(DirectiveLoc, Name, Body, Parameters);
defineMacro(Name, MCAsmMacro(Name, Body, Parameters));
return false;
}
/// checkForBadMacro
///
/// With the support added for named parameters there may be code out there that
/// is transitioning from positional parameters. In versions of gas that did
/// not support named parameters they would be ignored on the macro defintion.
/// But to support both styles of parameters this is not possible so if a macro
/// defintion has named parameters but does not use them and has what appears
/// to be positional parameters, strings like $1, $2, ... and $n, then issue a
/// warning that the positional parameter found in body which have no effect.
/// Hoping the developer will either remove the named parameters from the macro
/// definiton so the positional parameters get used if that was what was
/// intended or change the macro to use the named parameters. It is possible
/// this warning will trigger when the none of the named parameters are used
/// and the strings like $1 are infact to simply to be passed trough unchanged.
void AsmParser::checkForBadMacro(SMLoc DirectiveLoc, StringRef Name,
StringRef Body,
MCAsmMacroParameters Parameters) {
// If this macro is not defined with named parameters the warning we are
// checking for here doesn't apply.
unsigned NParameters = Parameters.size();
if (NParameters == 0)
return;
bool NamedParametersFound = false;
bool PositionalParametersFound = false;
// Look at the body of the macro for use of both the named parameters and what
// are likely to be positional parameters. This is what expandMacro() is
// doing when it finds the parameters in the body.
while (!Body.empty()) {
// Scan for the next possible parameter.
std::size_t End = Body.size(), Pos = 0;
for (; Pos != End; ++Pos) {
// Check for a substitution or escape.
// This macro is defined with parameters, look for \foo, \bar, etc.
if (Body[Pos] == '\\' && Pos + 1 != End)
break;
// This macro should have parameters, but look for $0, $1, ..., $n too.
if (Body[Pos] != '$' || Pos + 1 == End)
continue;
char Next = Body[Pos + 1];
if (Next == '$' || Next == 'n' ||
isdigit(static_cast<unsigned char>(Next)))
break;
}
// Check if we reached the end.
if (Pos == End)
break;
if (Body[Pos] == '$') {
switch (Body[Pos + 1]) {
// $$ => $
case '$':
break;
// $n => number of arguments
case 'n':
PositionalParametersFound = true;
break;
// $[0-9] => argument
default: {
PositionalParametersFound = true;
break;
}
}
Pos += 2;
} else {
unsigned I = Pos + 1;
while (isIdentifierChar(Body[I]) && I + 1 != End)
++I;
const char *Begin = Body.data() + Pos + 1;
StringRef Argument(Begin, I - (Pos + 1));
unsigned Index = 0;
for (; Index < NParameters; ++Index)
if (Parameters[Index].first == Argument)
break;
if (Index == NParameters) {
if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')')
Pos += 3;
else {
Pos = I;
}
} else {
NamedParametersFound = true;
Pos += 1 + Argument.size();
}
}
// Update the scan point.
Body = Body.substr(Pos);
}
if (!NamedParametersFound && PositionalParametersFound)
Warning(DirectiveLoc, "macro defined with named parameters which are not "
"used in macro body, possible positional parameter "
"found in body which will have no effect");
}
/// parseDirectiveEndMacro
/// ::= .endm
/// ::= .endmacro
bool AsmParser::parseDirectiveEndMacro(StringRef Directive) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '" + Directive + "' directive");
// If we are inside a macro instantiation, terminate the current
// instantiation.
if (isInsideMacroInstantiation()) {
handleMacroExit();
return false;
}
// Otherwise, this .endmacro is a stray entry in the file; well formed
// .endmacro directives are handled during the macro definition parsing.
return TokError("unexpected '" + Directive + "' in file, "
"no current macro definition");
}
/// parseDirectivePurgeMacro
/// ::= .purgem
bool AsmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
StringRef Name;
if (parseIdentifier(Name))
return TokError("expected identifier in '.purgem' directive");
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.purgem' directive");
if (!lookupMacro(Name))
return Error(DirectiveLoc, "macro '" + Name + "' is not defined");
undefineMacro(Name);
return false;
}
/// parseDirectiveBundleAlignMode
/// ::= {.bundle_align_mode} expression
bool AsmParser::parseDirectiveBundleAlignMode() {
checkForValidSection();
// Expect a single argument: an expression that evaluates to a constant
// in the inclusive range 0-30.
SMLoc ExprLoc = getLexer().getLoc();
int64_t AlignSizePow2;
if (parseAbsoluteExpression(AlignSizePow2))
return true;
else if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token after expression in"
" '.bundle_align_mode' directive");
else if (AlignSizePow2 < 0 || AlignSizePow2 > 30)
return Error(ExprLoc,
"invalid bundle alignment size (expected between 0 and 30)");
Lex();
// Because of AlignSizePow2's verified range we can safely truncate it to
// unsigned.
getStreamer().EmitBundleAlignMode(static_cast<unsigned>(AlignSizePow2));
return false;
}
/// parseDirectiveBundleLock
/// ::= {.bundle_lock} [align_to_end]
bool AsmParser::parseDirectiveBundleLock() {
checkForValidSection();
bool AlignToEnd = false;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
StringRef Option;
SMLoc Loc = getTok().getLoc();
const char *kInvalidOptionError =
"invalid option for '.bundle_lock' directive";
if (parseIdentifier(Option))
return Error(Loc, kInvalidOptionError);
if (Option != "align_to_end")
return Error(Loc, kInvalidOptionError);
else if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(Loc,
"unexpected token after '.bundle_lock' directive option");
AlignToEnd = true;
}
Lex();
getStreamer().EmitBundleLock(AlignToEnd);
return false;
}
/// parseDirectiveBundleLock
/// ::= {.bundle_lock}
bool AsmParser::parseDirectiveBundleUnlock() {
checkForValidSection();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.bundle_unlock' directive");
Lex();
getStreamer().EmitBundleUnlock();
return false;
}
/// parseDirectiveSpace
/// ::= (.skip | .space) expression [ , expression ]
bool AsmParser::parseDirectiveSpace(StringRef IDVal) {
checkForValidSection();
int64_t NumBytes;
if (parseAbsoluteExpression(NumBytes))
return true;
int64_t FillExpr = 0;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
Lex();
if (parseAbsoluteExpression(FillExpr))
return true;
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
}
Lex();
if (NumBytes <= 0)
return TokError("invalid number of bytes in '" + Twine(IDVal) +
"' directive");
// FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
getStreamer().EmitFill(NumBytes, FillExpr);
return false;
}
/// parseDirectiveLEB128
/// ::= (.sleb128 | .uleb128) expression
bool AsmParser::parseDirectiveLEB128(bool Signed) {
checkForValidSection();
const MCExpr *Value;
if (parseExpression(Value))
return true;
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
if (Signed)
getStreamer().EmitSLEB128Value(Value);
else
getStreamer().EmitULEB128Value(Value);
return false;
}
/// parseDirectiveSymbolAttribute
/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
StringRef Name;
SMLoc Loc = getTok().getLoc();
if (parseIdentifier(Name))
return Error(Loc, "expected identifier in directive");
MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
// Assembler local symbols don't make any sense here. Complain loudly.
if (Sym->isTemporary())
return Error(Loc, "non-local symbol required in directive");
if (!getStreamer().EmitSymbolAttribute(Sym, Attr))
return Error(Loc, "unable to emit symbol attribute");
if (getLexer().is(AsmToken::EndOfStatement))
break;
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
}
}
Lex();
return false;
}
/// parseDirectiveComm
/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
bool AsmParser::parseDirectiveComm(bool IsLocal) {
checkForValidSection();
SMLoc IDLoc = getLexer().getLoc();
StringRef Name;
if (parseIdentifier(Name))
return TokError("expected identifier in directive");
// Handle the identifier as the key symbol.
MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
int64_t Size;
SMLoc SizeLoc = getLexer().getLoc();
if (parseAbsoluteExpression(Size))
return true;
int64_t Pow2Alignment = 0;
SMLoc Pow2AlignmentLoc;
if (getLexer().is(AsmToken::Comma)) {
Lex();
Pow2AlignmentLoc = getLexer().getLoc();
if (parseAbsoluteExpression(Pow2Alignment))
return true;
LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
if (IsLocal && LCOMM == LCOMM::NoAlignment)
return Error(Pow2AlignmentLoc, "alignment not supported on this target");
// If this target takes alignments in bytes (not log) validate and convert.
if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
(IsLocal && LCOMM == LCOMM::ByteAlignment)) {
if (!isPowerOf2_64(Pow2Alignment))
return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
Pow2Alignment = Log2_64(Pow2Alignment);
}
}
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.comm' or '.lcomm' directive");
Lex();
// NOTE: a size of zero for a .comm should create a undefined symbol
// but a size of .lcomm creates a bss symbol of size zero.
if (Size < 0)
return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
"be less than zero");
// NOTE: The alignment in the directive is a power of 2 value, the assembler
// may internally end up wanting an alignment in bytes.
// FIXME: Diagnose overflow.
if (Pow2Alignment < 0)
return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
"alignment, can't be less than zero");
if (!Sym->isUndefined())
return Error(IDLoc, "invalid symbol redefinition");
// Create the Symbol as a common or local common with Size and Pow2Alignment
if (IsLocal) {
getStreamer().EmitLocalCommonSymbol(Sym, Size, 1 << Pow2Alignment);
return false;
}
getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
return false;
}
/// parseDirectiveAbort
/// ::= .abort [... message ...]
bool AsmParser::parseDirectiveAbort() {
// FIXME: Use loc from directive.
SMLoc Loc = getLexer().getLoc();
StringRef Str = parseStringToEndOfStatement();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.abort' directive");
Lex();
if (Str.empty())
Error(Loc, ".abort detected. Assembly stopping.");
else
Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
// FIXME: Actually abort assembly here.
return false;
}
/// parseDirectiveInclude
/// ::= .include "filename"
bool AsmParser::parseDirectiveInclude() {
if (getLexer().isNot(AsmToken::String))
return TokError("expected string in '.include' directive");
// Allow the strings to have escaped octal character sequence.
std::string Filename;
if (parseEscapedString(Filename))
return true;
SMLoc IncludeLoc = getLexer().getLoc();
Lex();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.include' directive");
// Attempt to switch the lexer to the included file before consuming the end
// of statement to avoid losing it when we switch.
if (enterIncludeFile(Filename)) {
Error(IncludeLoc, "Could not find include file '" + Filename + "'");
return true;
}
return false;
}
/// parseDirectiveIncbin
/// ::= .incbin "filename"
bool AsmParser::parseDirectiveIncbin() {
if (getLexer().isNot(AsmToken::String))
return TokError("expected string in '.incbin' directive");
// Allow the strings to have escaped octal character sequence.
std::string Filename;
if (parseEscapedString(Filename))
return true;
SMLoc IncbinLoc = getLexer().getLoc();
Lex();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.incbin' directive");
// Attempt to process the included file.
if (processIncbinFile(Filename)) {
Error(IncbinLoc, "Could not find incbin file '" + Filename + "'");
return true;
}
return false;
}
/// parseDirectiveIf
/// ::= .if expression
bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) {
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
if (TheCondState.Ignore) {
eatToEndOfStatement();
} else {
int64_t ExprValue;
if (parseAbsoluteExpression(ExprValue))
return true;
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.if' directive");
Lex();
TheCondState.CondMet = ExprValue;
TheCondState.Ignore = !TheCondState.CondMet;
}
return false;
}
/// parseDirectiveIfb
/// ::= .ifb string
bool AsmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
if (TheCondState.Ignore) {
eatToEndOfStatement();
} else {
StringRef Str = parseStringToEndOfStatement();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.ifb' directive");
Lex();
TheCondState.CondMet = ExpectBlank == Str.empty();
TheCondState.Ignore = !TheCondState.CondMet;
}
return false;
}
/// parseDirectiveIfc
/// ::= .ifc string1, string2
bool AsmParser::parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) {
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
if (TheCondState.Ignore) {
eatToEndOfStatement();
} else {
StringRef Str1 = parseStringToComma();
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.ifc' directive");
Lex();
StringRef Str2 = parseStringToEndOfStatement();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.ifc' directive");
Lex();
TheCondState.CondMet = ExpectEqual == (Str1 == Str2);
TheCondState.Ignore = !TheCondState.CondMet;
}
return false;
}
/// parseDirectiveIfdef
/// ::= .ifdef symbol
bool AsmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
StringRef Name;
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
if (TheCondState.Ignore) {
eatToEndOfStatement();
} else {
if (parseIdentifier(Name))
return TokError("expected identifier after '.ifdef'");
Lex();
MCSymbol *Sym = getContext().LookupSymbol(Name);
if (expect_defined)
TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined());
else
TheCondState.CondMet = (Sym == NULL || Sym->isUndefined());
TheCondState.Ignore = !TheCondState.CondMet;
}
return false;
}
/// parseDirectiveElseIf
/// ::= .elseif expression
bool AsmParser::parseDirectiveElseIf(SMLoc DirectiveLoc) {
if (TheCondState.TheCond != AsmCond::IfCond &&
TheCondState.TheCond != AsmCond::ElseIfCond)
Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
" an .elseif");
TheCondState.TheCond = AsmCond::ElseIfCond;
bool LastIgnoreState = false;
if (!TheCondStack.empty())
LastIgnoreState = TheCondStack.back().Ignore;
if (LastIgnoreState || TheCondState.CondMet) {
TheCondState.Ignore = true;
eatToEndOfStatement();
} else {
int64_t ExprValue;
if (parseAbsoluteExpression(ExprValue))
return true;
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.elseif' directive");
Lex();
TheCondState.CondMet = ExprValue;
TheCondState.Ignore = !TheCondState.CondMet;
}
return false;
}
/// parseDirectiveElse
/// ::= .else
bool AsmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.else' directive");
Lex();
if (TheCondState.TheCond != AsmCond::IfCond &&
TheCondState.TheCond != AsmCond::ElseIfCond)
Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
".elseif");
TheCondState.TheCond = AsmCond::ElseCond;
bool LastIgnoreState = false;
if (!TheCondStack.empty())
LastIgnoreState = TheCondStack.back().Ignore;
if (LastIgnoreState || TheCondState.CondMet)
TheCondState.Ignore = true;
else
TheCondState.Ignore = false;
return false;
}
/// parseDirectiveEndIf
/// ::= .endif
bool AsmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.endif' directive");
Lex();
if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
".else");
if (!TheCondStack.empty()) {
TheCondState = TheCondStack.back();
TheCondStack.pop_back();
}
return false;
}
void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".set"] = DK_SET;
DirectiveKindMap[".equ"] = DK_EQU;
DirectiveKindMap[".equiv"] = DK_EQUIV;
DirectiveKindMap[".ascii"] = DK_ASCII;
DirectiveKindMap[".asciz"] = DK_ASCIZ;
DirectiveKindMap[".string"] = DK_STRING;
DirectiveKindMap[".byte"] = DK_BYTE;
DirectiveKindMap[".short"] = DK_SHORT;
DirectiveKindMap[".value"] = DK_VALUE;
DirectiveKindMap[".2byte"] = DK_2BYTE;
DirectiveKindMap[".long"] = DK_LONG;
DirectiveKindMap[".int"] = DK_INT;
DirectiveKindMap[".4byte"] = DK_4BYTE;
DirectiveKindMap[".quad"] = DK_QUAD;
DirectiveKindMap[".8byte"] = DK_8BYTE;
DirectiveKindMap[".single"] = DK_SINGLE;
DirectiveKindMap[".float"] = DK_FLOAT;
DirectiveKindMap[".double"] = DK_DOUBLE;
DirectiveKindMap[".align"] = DK_ALIGN;
DirectiveKindMap[".align32"] = DK_ALIGN32;
DirectiveKindMap[".balign"] = DK_BALIGN;
DirectiveKindMap[".balignw"] = DK_BALIGNW;
DirectiveKindMap[".balignl"] = DK_BALIGNL;
DirectiveKindMap[".p2align"] = DK_P2ALIGN;
DirectiveKindMap[".p2alignw"] = DK_P2ALIGNW;
DirectiveKindMap[".p2alignl"] = DK_P2ALIGNL;
DirectiveKindMap[".org"] = DK_ORG;
DirectiveKindMap[".fill"] = DK_FILL;
DirectiveKindMap[".zero"] = DK_ZERO;
DirectiveKindMap[".extern"] = DK_EXTERN;
DirectiveKindMap[".globl"] = DK_GLOBL;
DirectiveKindMap[".global"] = DK_GLOBAL;
DirectiveKindMap[".lazy_reference"] = DK_LAZY_REFERENCE;
DirectiveKindMap[".no_dead_strip"] = DK_NO_DEAD_STRIP;
DirectiveKindMap[".symbol_resolver"] = DK_SYMBOL_RESOLVER;
DirectiveKindMap[".private_extern"] = DK_PRIVATE_EXTERN;
DirectiveKindMap[".reference"] = DK_REFERENCE;
DirectiveKindMap[".weak_definition"] = DK_WEAK_DEFINITION;
DirectiveKindMap[".weak_reference"] = DK_WEAK_REFERENCE;
DirectiveKindMap[".weak_def_can_be_hidden"] = DK_WEAK_DEF_CAN_BE_HIDDEN;
DirectiveKindMap[".comm"] = DK_COMM;
DirectiveKindMap[".common"] = DK_COMMON;
DirectiveKindMap[".lcomm"] = DK_LCOMM;
DirectiveKindMap[".abort"] = DK_ABORT;
DirectiveKindMap[".include"] = DK_INCLUDE;
DirectiveKindMap[".incbin"] = DK_INCBIN;
DirectiveKindMap[".code16"] = DK_CODE16;
DirectiveKindMap[".code16gcc"] = DK_CODE16GCC;
DirectiveKindMap[".rept"] = DK_REPT;
DirectiveKindMap[".irp"] = DK_IRP;
DirectiveKindMap[".irpc"] = DK_IRPC;
DirectiveKindMap[".endr"] = DK_ENDR;
DirectiveKindMap[".bundle_align_mode"] = DK_BUNDLE_ALIGN_MODE;
DirectiveKindMap[".bundle_lock"] = DK_BUNDLE_LOCK;
DirectiveKindMap[".bundle_unlock"] = DK_BUNDLE_UNLOCK;
DirectiveKindMap[".if"] = DK_IF;
DirectiveKindMap[".ifb"] = DK_IFB;
DirectiveKindMap[".ifnb"] = DK_IFNB;
DirectiveKindMap[".ifc"] = DK_IFC;
DirectiveKindMap[".ifnc"] = DK_IFNC;
DirectiveKindMap[".ifdef"] = DK_IFDEF;
DirectiveKindMap[".ifndef"] = DK_IFNDEF;
DirectiveKindMap[".ifnotdef"] = DK_IFNOTDEF;
DirectiveKindMap[".elseif"] = DK_ELSEIF;
DirectiveKindMap[".else"] = DK_ELSE;
DirectiveKindMap[".endif"] = DK_ENDIF;
DirectiveKindMap[".skip"] = DK_SKIP;
DirectiveKindMap[".space"] = DK_SPACE;
DirectiveKindMap[".file"] = DK_FILE;
DirectiveKindMap[".line"] = DK_LINE;
DirectiveKindMap[".loc"] = DK_LOC;
DirectiveKindMap[".stabs"] = DK_STABS;
DirectiveKindMap[".sleb128"] = DK_SLEB128;
DirectiveKindMap[".uleb128"] = DK_ULEB128;
DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
DirectiveKindMap[".macros_on"] = DK_MACROS_ON;
DirectiveKindMap[".macros_off"] = DK_MACROS_OFF;
DirectiveKindMap[".macro"] = DK_MACRO;
DirectiveKindMap[".endm"] = DK_ENDM;
DirectiveKindMap[".endmacro"] = DK_ENDMACRO;
DirectiveKindMap[".purgem"] = DK_PURGEM;
}
MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
AsmToken EndToken, StartToken = getTok();
unsigned NestLevel = 0;
for (;;) {
// Check whether we have reached the end of the file.
if (getLexer().is(AsmToken::Eof)) {
Error(DirectiveLoc, "no matching '.endr' in definition");
return 0;
}
if (Lexer.is(AsmToken::Identifier) &&
(getTok().getIdentifier() == ".rept")) {
++NestLevel;
}
// Otherwise, check whether we have reached the .endr.
if (Lexer.is(AsmToken::Identifier) && getTok().getIdentifier() == ".endr") {
if (NestLevel == 0) {
EndToken = getTok();
Lex();
if (Lexer.isNot(AsmToken::EndOfStatement)) {
TokError("unexpected token in '.endr' directive");
return 0;
}
break;
}
--NestLevel;
}
// Otherwise, scan till the end of the statement.
eatToEndOfStatement();
}
const char *BodyStart = StartToken.getLoc().getPointer();
const char *BodyEnd = EndToken.getLoc().getPointer();
StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
// We Are Anonymous.
StringRef Name;
MCAsmMacroParameters Parameters;
MacroLikeBodies.push_back(MCAsmMacro(Name, Body, Parameters));
return &MacroLikeBodies.back();
}
void AsmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
raw_svector_ostream &OS) {
OS << ".endr\n";
MemoryBuffer *Instantiation =
MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
// Create the macro instantiation object and add to the current macro
// instantiation stack.
MacroInstantiation *MI = new MacroInstantiation(
M, DirectiveLoc, CurBuffer, getTok().getLoc(), Instantiation);
ActiveMacros.push_back(MI);
// Jump to the macro instantiation and prime the lexer.
CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
Lex();
}
bool AsmParser::parseDirectiveRept(SMLoc DirectiveLoc) {
int64_t Count;
if (parseAbsoluteExpression(Count))
return TokError("unexpected token in '.rept' directive");
if (Count < 0)
return TokError("Count is negative");
if (Lexer.isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.rept' directive");
// Eat the end of statement.
Lex();
// Lex the rept definition.
MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
if (!M)
return true;
// Macro instantiation is lexical, unfortunately. We construct a new buffer
// to hold the macro body with substitutions.
SmallString<256> Buf;
MCAsmMacroParameters Parameters;
MCAsmMacroArguments A;
raw_svector_ostream OS(Buf);
while (Count--) {
if (expandMacro(OS, M->Body, Parameters, A, getTok().getLoc()))
return true;
}
instantiateMacroLikeBody(M, DirectiveLoc, OS);
return false;
}
/// parseDirectiveIrp
/// ::= .irp symbol,values
bool AsmParser::parseDirectiveIrp(SMLoc DirectiveLoc) {
MCAsmMacroParameters Parameters;
MCAsmMacroParameter Parameter;
if (parseIdentifier(Parameter.first))
return TokError("expected identifier in '.irp' directive");
Parameters.push_back(Parameter);
if (Lexer.isNot(AsmToken::Comma))
return TokError("expected comma in '.irp' directive");
Lex();
MCAsmMacroArguments A;
if (parseMacroArguments(0, A))
return true;
// Eat the end of statement.
Lex();
// Lex the irp definition.
MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
if (!M)
return true;
// Macro instantiation is lexical, unfortunately. We construct a new buffer
// to hold the macro body with substitutions.
SmallString<256> Buf;
raw_svector_ostream OS(Buf);
for (MCAsmMacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) {
MCAsmMacroArguments Args;
Args.push_back(*i);
if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc()))
return true;
}
instantiateMacroLikeBody(M, DirectiveLoc, OS);
return false;
}
/// parseDirectiveIrpc
/// ::= .irpc symbol,values
bool AsmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) {
MCAsmMacroParameters Parameters;
MCAsmMacroParameter Parameter;
if (parseIdentifier(Parameter.first))
return TokError("expected identifier in '.irpc' directive");
Parameters.push_back(Parameter);
if (Lexer.isNot(AsmToken::Comma))
return TokError("expected comma in '.irpc' directive");
Lex();
MCAsmMacroArguments A;
if (parseMacroArguments(0, A))
return true;
if (A.size() != 1 || A.front().size() != 1)
return TokError("unexpected token in '.irpc' directive");
// Eat the end of statement.
Lex();
// Lex the irpc definition.
MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
if (!M)
return true;
// Macro instantiation is lexical, unfortunately. We construct a new buffer
// to hold the macro body with substitutions.
SmallString<256> Buf;
raw_svector_ostream OS(Buf);
StringRef Values = A.front().front().getString();
std::size_t I, End = Values.size();
for (I = 0; I < End; ++I) {
MCAsmMacroArgument Arg;
Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I + 1)));
MCAsmMacroArguments Args;
Args.push_back(Arg);
if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc()))
return true;
}
instantiateMacroLikeBody(M, DirectiveLoc, OS);
return false;
}
bool AsmParser::parseDirectiveEndr(SMLoc DirectiveLoc) {
if (ActiveMacros.empty())
return TokError("unmatched '.endr' directive");
// The only .repl that should get here are the ones created by
// instantiateMacroLikeBody.
assert(getLexer().is(AsmToken::EndOfStatement));
handleMacroExit();
return false;
}
bool AsmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
size_t Len) {
const MCExpr *Value;
SMLoc ExprLoc = getLexer().getLoc();
if (parseExpression(Value))
return true;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
if (!MCE)
return Error(ExprLoc, "unexpected expression in _emit");
uint64_t IntValue = MCE->getValue();
if (!isUIntN(8, IntValue) && !isIntN(8, IntValue))
return Error(ExprLoc, "literal value out of range for directive");
Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, Len));
return false;
}
bool AsmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
const MCExpr *Value;
SMLoc ExprLoc = getLexer().getLoc();
if (parseExpression(Value))
return true;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
if (!MCE)
return Error(ExprLoc, "unexpected expression in align");
uint64_t IntValue = MCE->getValue();
if (!isPowerOf2_64(IntValue))
return Error(ExprLoc, "literal value not a power of two greater then zero");
Info.AsmRewrites->push_back(
AsmRewrite(AOK_Align, IDLoc, 5, Log2_64(IntValue)));
return false;
}
// We are comparing pointers, but the pointers are relative to a single string.
// Thus, this should always be deterministic.
static int rewritesSort(const AsmRewrite *AsmRewriteA,
const AsmRewrite *AsmRewriteB) {
if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
return -1;
if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
return 1;
// It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
// rewrite to the same location. Make sure the SizeDirective rewrite is
// performed first, then the Imm/ImmPrefix and finally the Input/Output. This
// ensures the sort algorithm is stable.
if (AsmRewritePrecedence[AsmRewriteA->Kind] >
AsmRewritePrecedence[AsmRewriteB->Kind])
return -1;
if (AsmRewritePrecedence[AsmRewriteA->Kind] <
AsmRewritePrecedence[AsmRewriteB->Kind])
return 1;
llvm_unreachable("Unstable rewrite sort.");
}
bool AsmParser::parseMSInlineAsm(
void *AsmLoc, std::string &AsmString, unsigned &NumOutputs,
unsigned &NumInputs, SmallVectorImpl<std::pair<void *, bool> > &OpDecls,
SmallVectorImpl<std::string> &Constraints,
SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
SmallVector<void *, 4> InputDecls;
SmallVector<void *, 4> OutputDecls;
SmallVector<bool, 4> InputDeclsAddressOf;
SmallVector<bool, 4> OutputDeclsAddressOf;
SmallVector<std::string, 4> InputConstraints;
SmallVector<std::string, 4> OutputConstraints;
SmallVector<unsigned, 4> ClobberRegs;
SmallVector<AsmRewrite, 4> AsmStrRewrites;
// Prime the lexer.
Lex();
// While we have input, parse each statement.
unsigned InputIdx = 0;
unsigned OutputIdx = 0;
while (getLexer().isNot(AsmToken::Eof)) {
ParseStatementInfo Info(&AsmStrRewrites);
if (parseStatement(Info))
return true;
if (Info.ParseError)
return true;
if (Info.Opcode == ~0U)
continue;
const MCInstrDesc &Desc = MII->get(Info.Opcode);
// Build the list of clobbers, outputs and inputs.
for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
MCParsedAsmOperand *Operand = Info.ParsedOperands[i];
// Immediate.
if (Operand->isImm())
continue;
// Register operand.
if (Operand->isReg() && !Operand->needAddressOf()) {
unsigned NumDefs = Desc.getNumDefs();
// Clobber.
if (NumDefs && Operand->getMCOperandNum() < NumDefs)
ClobberRegs.push_back(Operand->getReg());
continue;
}
// Expr/Input or Output.
StringRef SymName = Operand->getSymName();
if (SymName.empty())
continue;
void *OpDecl = Operand->getOpDecl();
if (!OpDecl)
continue;
bool isOutput = (i == 1) && Desc.mayStore();
SMLoc Start = SMLoc::getFromPointer(SymName.data());
if (isOutput) {
++InputIdx;
OutputDecls.push_back(OpDecl);
OutputDeclsAddressOf.push_back(Operand->needAddressOf());
OutputConstraints.push_back('=' + Operand->getConstraint().str());
AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
} else {
InputDecls.push_back(OpDecl);
InputDeclsAddressOf.push_back(Operand->needAddressOf());
InputConstraints.push_back(Operand->getConstraint().str());
AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size()));
}
}
// Consider implicit defs to be clobbers. Think of cpuid and push.
const uint16_t *ImpDefs = Desc.getImplicitDefs();
for (unsigned I = 0, E = Desc.getNumImplicitDefs(); I != E; ++I)
ClobberRegs.push_back(ImpDefs[I]);
}
// Set the number of Outputs and Inputs.
NumOutputs = OutputDecls.size();
NumInputs = InputDecls.size();
// Set the unique clobbers.
array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
ClobberRegs.erase(std::unique(ClobberRegs.begin(), ClobberRegs.end()),
ClobberRegs.end());
Clobbers.assign(ClobberRegs.size(), std::string());
for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
raw_string_ostream OS(Clobbers[I]);
IP->printRegName(OS, ClobberRegs[I]);
}
// Merge the various outputs and inputs. Output are expected first.
if (NumOutputs || NumInputs) {
unsigned NumExprs = NumOutputs + NumInputs;
OpDecls.resize(NumExprs);
Constraints.resize(NumExprs);
for (unsigned i = 0; i < NumOutputs; ++i) {
OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
Constraints[i] = OutputConstraints[i];
}
for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
Constraints[j] = InputConstraints[i];
}
}
// Build the IR assembly string.
std::string AsmStringIR;
raw_string_ostream OS(AsmStringIR);
const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart();
const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
E = AsmStrRewrites.end();
I != E; ++I) {
AsmRewriteKind Kind = (*I).Kind;
if (Kind == AOK_Delete)
continue;
const char *Loc = (*I).Loc.getPointer();
assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
// Emit everything up to the immediate/expression.
unsigned Len = Loc - AsmStart;
if (Len)
OS << StringRef(AsmStart, Len);
// Skip the original expression.
if (Kind == AOK_Skip) {
AsmStart = Loc + (*I).Len;
continue;
}
unsigned AdditionalSkip = 0;
// Rewrite expressions in $N notation.
switch (Kind) {
default:
break;
case AOK_Imm:
OS << "$$" << (*I).Val;
break;
case AOK_ImmPrefix:
OS << "$$";
break;
case AOK_Input:
OS << '$' << InputIdx++;
break;
case AOK_Output:
OS << '$' << OutputIdx++;
break;
case AOK_SizeDirective:
switch ((*I).Val) {
default: break;
case 8: OS << "byte ptr "; break;
case 16: OS << "word ptr "; break;
case 32: OS << "dword ptr "; break;
case 64: OS << "qword ptr "; break;
case 80: OS << "xword ptr "; break;
case 128: OS << "xmmword ptr "; break;
case 256: OS << "ymmword ptr "; break;
}
break;
case AOK_Emit:
OS << ".byte";
break;
case AOK_Align: {
unsigned Val = (*I).Val;
OS << ".align " << Val;
// Skip the original immediate.
assert(Val < 10 && "Expected alignment less then 2^10.");
AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
break;
}
case AOK_DotOperator:
+ // Insert the dot if the user omitted it.
+ OS.flush();
+ if (AsmStringIR.at(AsmStringIR.size() - 1) != '.')
+ OS << '.';
OS << (*I).Val;
break;
}
// Skip the original expression.
AsmStart = Loc + (*I).Len + AdditionalSkip;
}
// Emit the remainder of the asm string.
if (AsmStart != AsmEnd)
OS << StringRef(AsmStart, AsmEnd - AsmStart);
AsmString = OS.str();
return false;
}
/// \brief Create an MCAsmParser instance.
MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, MCContext &C,
MCStreamer &Out, const MCAsmInfo &MAI) {
return new AsmParser(SM, C, Out, MAI);
}
Index: head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (revision 265925)
@@ -1,4599 +1,4595 @@
//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that AArch64 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "aarch64-isel"
#include "AArch64.h"
#include "AArch64ISelLowering.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/CallingConv.h"
using namespace llvm;
static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
-
- if (Subtarget->isTargetLinux())
- return new AArch64LinuxTargetObjectFile();
- if (Subtarget->isTargetELF())
- return new TargetLoweringObjectFileELF();
- llvm_unreachable("unknown subtarget type");
+ assert (Subtarget->isTargetELF() && "unknown subtarget type");
+ return new AArch64ElfTargetObjectFile();
}
AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
// SIMD compares set the entire lane's bits to 1
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Scalar register <-> type mapping
addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
if (Subtarget->hasFPARMv8()) {
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
}
if (Subtarget->hasNEON()) {
// And the vectors
addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass);
addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
}
computeRegisterProperties();
// We combine OR nodes for bitfield and NEON BSL operations.
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
// AArch64 does not have i1 loads, or much of anything for i1 really.
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setStackPointerRegisterToSaveRestore(AArch64::XSP);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
// We'll lower globals to wrappers for selection.
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
// A64 instructions have the comparison predicate attached to the user of the
// result, but having a separate comparison is valuable for matching.
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
// Legal floating-point operations.
setOperationAction(ISD::FABS, MVT::f32, Legal);
setOperationAction(ISD::FABS, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FNEG, MVT::f32, Legal);
setOperationAction(ISD::FNEG, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FSQRT, MVT::f32, Legal);
setOperationAction(ISD::FSQRT, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
// Illegal floating-point operations.
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FEXP, MVT::f32, Expand);
setOperationAction(ISD::FEXP, MVT::f64, Expand);
setOperationAction(ISD::FEXP2, MVT::f32, Expand);
setOperationAction(ISD::FEXP2, MVT::f64, Expand);
setOperationAction(ISD::FLOG, MVT::f32, Expand);
setOperationAction(ISD::FLOG, MVT::f64, Expand);
setOperationAction(ISD::FLOG2, MVT::f32, Expand);
setOperationAction(ISD::FLOG2, MVT::f64, Expand);
setOperationAction(ISD::FLOG10, MVT::f32, Expand);
setOperationAction(ISD::FLOG10, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FPOWI, MVT::f32, Expand);
setOperationAction(ISD::FPOWI, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
setOperationAction(ISD::FADD, MVT::f128, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOS, MVT::f128, Expand);
setOperationAction(ISD::FDIV, MVT::f128, Custom);
setOperationAction(ISD::FMA, MVT::f128, Expand);
setOperationAction(ISD::FMUL, MVT::f128, Custom);
setOperationAction(ISD::FNEG, MVT::f128, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
setOperationAction(ISD::FP_ROUND, MVT::f128, Expand);
setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
setOperationAction(ISD::FRINT, MVT::f128, Expand);
setOperationAction(ISD::FSIN, MVT::f128, Expand);
setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
setOperationAction(ISD::FSUB, MVT::f128, Custom);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
setOperationAction(ISD::SETCC, MVT::f128, Custom);
setOperationAction(ISD::BR_CC, MVT::f128, Custom);
setOperationAction(ISD::SELECT, MVT::f128, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
// Lowering for many of the conversions is actually specified by the non-f128
// type. The LowerXXX function will be trivial when f128 isn't involved.
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
// This prevents LLVM trying to compress double constants into a floating
// constant-pool entry and trying to load from there. It's of doubtful benefit
// for A64: we'd need LDR followed by FCVT, I believe.
setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setExceptionPointerRegister(AArch64::X0);
setExceptionSelectorRegister(AArch64::X1);
if (Subtarget->hasNEON()) {
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal);
setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
setOperationAction(ISD::SETCC, MVT::v1i64, Custom);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
setOperationAction(ISD::SETCC, MVT::v1f32, Custom);
setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
setOperationAction(ISD::SETCC, MVT::v1f64, Custom);
setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v1f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::FRINT, MVT::v1f64, Legal);
setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v1f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
}
}
EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
// It's reasonably important that this value matches the "natural" legal
// promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
// in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
if (!VT.isVector()) return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
unsigned &LdrOpc,
unsigned &StrOpc) {
static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
AArch64::LDXR_word, AArch64::LDXR_dword};
static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
AArch64::LDAXR_word, AArch64::LDAXR_dword};
static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
AArch64::STXR_word, AArch64::STXR_dword};
static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword,
AArch64::STLXR_word, AArch64::STLXR_dword};
const unsigned *LoadOps, *StoreOps;
if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
LoadOps = LoadAcqs;
else
LoadOps = LoadBares;
if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
StoreOps = StoreRels;
else
StoreOps = StoreBares;
assert(isPowerOf2_32(Size) && Size <= 8 &&
"unsupported size for atomic binary op!");
LdrOpc = LoadOps[Log2_32(Size)];
StrOpc = StoreOps[Log2_32(Size)];
}
// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really
// have value type mapped, and they are both being defined as MVT::untyped.
// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost
// would fail to figure out the register pressure correctly.
std::pair<const TargetRegisterClass*, uint8_t>
AArch64TargetLowering::findRepresentativeClass(MVT VT) const{
const TargetRegisterClass *RRC = 0;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(VT);
case MVT::v4i64:
RRC = &AArch64::QPairRegClass;
Cost = 2;
break;
case MVT::v8i64:
RRC = &AArch64::QQuadRegClass;
Cost = 4;
break;
}
return std::make_pair(RRC, Cost);
}
MachineBasicBlock *
AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size,
unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *MF = BB->getParent();
MachineFunction::iterator It = BB;
++It;
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned ldrOpc, strOpc;
getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
const TargetRegisterClass *TRC
= Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
// thisMBB:
// ...
// fallthrough --> loopMBB
BB->addSuccessor(loopMBB);
// loopMBB:
// ldxr dest, ptr
// <binop> scratch, dest, incr
// stxr stxr_status, scratch, ptr
// cbnz stxr_status, loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
if (BinOpcode) {
// All arithmetic operations we'll be creating are designed to take an extra
// shift or extend operand, which we can conveniently set to zero.
// Operand order needs to go the other way for NAND.
if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
BuildMI(BB, dl, TII->get(BinOpcode), scratch)
.addReg(incr).addReg(dest).addImm(0);
else
BuildMI(BB, dl, TII->get(BinOpcode), scratch)
.addReg(dest).addReg(incr).addImm(0);
}
// From the stxr, the register is GPR32; from the cmp it's GPR32wsp
unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
BuildMI(BB, dl, TII->get(AArch64::CBNZw))
.addReg(stxr_status).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size,
unsigned CmpOp,
A64CC::CondCodes Cond) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *MF = BB->getParent();
MachineFunction::iterator It = BB;
++It;
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
unsigned oldval = dest;
DebugLoc dl = MI->getDebugLoc();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
const TargetRegisterClass *TRC, *TRCsp;
if (Size == 8) {
TRC = &AArch64::GPR64RegClass;
TRCsp = &AArch64::GPR64xspRegClass;
} else {
TRC = &AArch64::GPR32RegClass;
TRCsp = &AArch64::GPR32wspRegClass;
}
unsigned ldrOpc, strOpc;
getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
unsigned scratch = MRI.createVirtualRegister(TRC);
MRI.constrainRegClass(scratch, TRCsp);
// thisMBB:
// ...
// fallthrough --> loopMBB
BB->addSuccessor(loopMBB);
// loopMBB:
// ldxr dest, ptr
// cmp incr, dest (, sign extend if necessary)
// csel scratch, dest, incr, cond
// stxr stxr_status, scratch, ptr
// cbnz stxr_status, loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
// Build compare and cmov instructions.
MRI.constrainRegClass(incr, TRCsp);
BuildMI(BB, dl, TII->get(CmpOp))
.addReg(incr).addReg(oldval).addImm(0);
BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
scratch)
.addReg(oldval).addReg(incr).addImm(Cond);
unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
BuildMI(BB, dl, TII->get(strOpc), stxr_status)
.addReg(scratch).addReg(ptr);
BuildMI(BB, dl, TII->get(AArch64::CBNZw))
.addReg(stxr_status).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size) const {
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
const TargetRegisterClass *TRCsp;
TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
unsigned ldrOpc, strOpc;
getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It; // insert the new blocks after the current block
MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// ...
// fallthrough --> loop1MBB
BB->addSuccessor(loop1MBB);
// loop1MBB:
// ldxr dest, [ptr]
// cmp dest, oldval
// b.ne exitMBB
BB = loop1MBB;
BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
MRI.constrainRegClass(dest, TRCsp);
BuildMI(BB, dl, TII->get(CmpOp))
.addReg(dest).addReg(oldval).addImm(0);
BuildMI(BB, dl, TII->get(AArch64::Bcc))
.addImm(A64CC::NE).addMBB(exitMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(exitMBB);
// loop2MBB:
// strex stxr_status, newval, [ptr]
// cbnz stxr_status, loop1MBB
BB = loop2MBB;
unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
BuildMI(BB, dl, TII->get(AArch64::CBNZw))
.addReg(stxr_status).addMBB(loop1MBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
MachineBasicBlock *MBB) const {
// We materialise the F128CSEL pseudo-instruction using conditional branches
// and loads, giving an instruciton sequence like:
// str q0, [sp]
// b.ne IfTrue
// b Finish
// IfTrue:
// str q1, [sp]
// Finish:
// ldr q0, [sp]
//
// Using virtual registers would probably not be beneficial since COPY
// instructions are expensive for f128 (there's no actual instruction to
// implement them).
//
// An alternative would be to do an integer-CSEL on some address. E.g.:
// mov x0, sp
// add x1, sp, #16
// str q0, [x0]
// str q1, [x1]
// csel x0, x0, x1, ne
// ldr q0, [x0]
//
// It's unclear which approach is actually optimal.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
MachineFunction *MF = MBB->getParent();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI->getDebugLoc();
MachineFunction::iterator It = MBB;
++It;
unsigned DestReg = MI->getOperand(0).getReg();
unsigned IfTrueReg = MI->getOperand(1).getReg();
unsigned IfFalseReg = MI->getOperand(2).getReg();
unsigned CondCode = MI->getOperand(3).getImm();
bool NZCVKilled = MI->getOperand(4).isKill();
MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, TrueBB);
MF->insert(It, EndBB);
// Transfer rest of current basic-block to EndBB
EndBB->splice(EndBB->begin(), MBB,
llvm::next(MachineBasicBlock::iterator(MI)),
MBB->end());
EndBB->transferSuccessorsAndUpdatePHIs(MBB);
// We need somewhere to store the f128 value needed.
int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
// [... start of incoming MBB ...]
// str qIFFALSE, [sp]
// b.cc IfTrue
// b Done
BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
.addReg(IfFalseReg)
.addFrameIndex(ScratchFI)
.addImm(0);
BuildMI(MBB, DL, TII->get(AArch64::Bcc))
.addImm(CondCode)
.addMBB(TrueBB);
BuildMI(MBB, DL, TII->get(AArch64::Bimm))
.addMBB(EndBB);
MBB->addSuccessor(TrueBB);
MBB->addSuccessor(EndBB);
if (!NZCVKilled) {
// NZCV is live-through TrueBB.
TrueBB->addLiveIn(AArch64::NZCV);
EndBB->addLiveIn(AArch64::NZCV);
}
// IfTrue:
// str qIFTRUE, [sp]
BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
.addReg(IfTrueReg)
.addFrameIndex(ScratchFI)
.addImm(0);
// Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
// blocks.
TrueBB->addSuccessor(EndBB);
// Done:
// ldr qDEST, [sp]
// [... rest of incoming MBB ...]
MachineInstr *StartOfEnd = EndBB->begin();
BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
.addFrameIndex(ScratchFI)
.addImm(0);
MI->eraseFromParent();
return EndBB;
}
MachineBasicBlock *
AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
default: llvm_unreachable("Unhandled instruction with custom inserter");
case AArch64::F128CSEL:
return EmitF128CSEL(MI, MBB);
case AArch64::ATOMIC_LOAD_ADD_I8:
return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
case AArch64::ATOMIC_LOAD_ADD_I16:
return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
case AArch64::ATOMIC_LOAD_ADD_I32:
return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
case AArch64::ATOMIC_LOAD_ADD_I64:
return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
case AArch64::ATOMIC_LOAD_SUB_I8:
return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
case AArch64::ATOMIC_LOAD_SUB_I16:
return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
case AArch64::ATOMIC_LOAD_SUB_I32:
return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
case AArch64::ATOMIC_LOAD_SUB_I64:
return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
case AArch64::ATOMIC_LOAD_AND_I8:
return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
case AArch64::ATOMIC_LOAD_AND_I16:
return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
case AArch64::ATOMIC_LOAD_AND_I32:
return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
case AArch64::ATOMIC_LOAD_AND_I64:
return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
case AArch64::ATOMIC_LOAD_OR_I8:
return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
case AArch64::ATOMIC_LOAD_OR_I16:
return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
case AArch64::ATOMIC_LOAD_OR_I32:
return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
case AArch64::ATOMIC_LOAD_OR_I64:
return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
case AArch64::ATOMIC_LOAD_XOR_I8:
return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
case AArch64::ATOMIC_LOAD_XOR_I16:
return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
case AArch64::ATOMIC_LOAD_XOR_I32:
return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
case AArch64::ATOMIC_LOAD_XOR_I64:
return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
case AArch64::ATOMIC_LOAD_NAND_I8:
return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
case AArch64::ATOMIC_LOAD_NAND_I16:
return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
case AArch64::ATOMIC_LOAD_NAND_I32:
return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
case AArch64::ATOMIC_LOAD_NAND_I64:
return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
case AArch64::ATOMIC_LOAD_MIN_I8:
return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
case AArch64::ATOMIC_LOAD_MIN_I16:
return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
case AArch64::ATOMIC_LOAD_MIN_I32:
return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
case AArch64::ATOMIC_LOAD_MIN_I64:
return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
case AArch64::ATOMIC_LOAD_MAX_I8:
return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
case AArch64::ATOMIC_LOAD_MAX_I16:
return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
case AArch64::ATOMIC_LOAD_MAX_I32:
return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
case AArch64::ATOMIC_LOAD_MAX_I64:
return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
case AArch64::ATOMIC_LOAD_UMIN_I8:
return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
case AArch64::ATOMIC_LOAD_UMIN_I16:
return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
case AArch64::ATOMIC_LOAD_UMIN_I32:
return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
case AArch64::ATOMIC_LOAD_UMIN_I64:
return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
case AArch64::ATOMIC_LOAD_UMAX_I8:
return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
case AArch64::ATOMIC_LOAD_UMAX_I16:
return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
case AArch64::ATOMIC_LOAD_UMAX_I32:
return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
case AArch64::ATOMIC_LOAD_UMAX_I64:
return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
case AArch64::ATOMIC_SWAP_I8:
return emitAtomicBinary(MI, MBB, 1, 0);
case AArch64::ATOMIC_SWAP_I16:
return emitAtomicBinary(MI, MBB, 2, 0);
case AArch64::ATOMIC_SWAP_I32:
return emitAtomicBinary(MI, MBB, 4, 0);
case AArch64::ATOMIC_SWAP_I64:
return emitAtomicBinary(MI, MBB, 8, 0);
case AArch64::ATOMIC_CMP_SWAP_I8:
return emitAtomicCmpSwap(MI, MBB, 1);
case AArch64::ATOMIC_CMP_SWAP_I16:
return emitAtomicCmpSwap(MI, MBB, 2);
case AArch64::ATOMIC_CMP_SWAP_I32:
return emitAtomicCmpSwap(MI, MBB, 4);
case AArch64::ATOMIC_CMP_SWAP_I64:
return emitAtomicCmpSwap(MI, MBB, 8);
}
}
const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC";
case AArch64ISD::Call: return "AArch64ISD::Call";
case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV";
case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad";
case AArch64ISD::BFI: return "AArch64ISD::BFI";
case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
case AArch64ISD::Ret: return "AArch64ISD::Ret";
case AArch64ISD::SBFX: return "AArch64ISD::SBFX";
case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC";
case AArch64ISD::SETCC: return "AArch64ISD::SETCC";
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL";
case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
case AArch64ISD::NEON_BSL:
return "AArch64ISD::NEON_BSL";
case AArch64ISD::NEON_MOVIMM:
return "AArch64ISD::NEON_MOVIMM";
case AArch64ISD::NEON_MVNIMM:
return "AArch64ISD::NEON_MVNIMM";
case AArch64ISD::NEON_FMOVIMM:
return "AArch64ISD::NEON_FMOVIMM";
case AArch64ISD::NEON_CMP:
return "AArch64ISD::NEON_CMP";
case AArch64ISD::NEON_CMPZ:
return "AArch64ISD::NEON_CMPZ";
case AArch64ISD::NEON_TST:
return "AArch64ISD::NEON_TST";
case AArch64ISD::NEON_QSHLs:
return "AArch64ISD::NEON_QSHLs";
case AArch64ISD::NEON_QSHLu:
return "AArch64ISD::NEON_QSHLu";
case AArch64ISD::NEON_VDUP:
return "AArch64ISD::NEON_VDUP";
case AArch64ISD::NEON_VDUPLANE:
return "AArch64ISD::NEON_VDUPLANE";
case AArch64ISD::NEON_REV16:
return "AArch64ISD::NEON_REV16";
case AArch64ISD::NEON_REV32:
return "AArch64ISD::NEON_REV32";
case AArch64ISD::NEON_REV64:
return "AArch64ISD::NEON_REV64";
case AArch64ISD::NEON_UZP1:
return "AArch64ISD::NEON_UZP1";
case AArch64ISD::NEON_UZP2:
return "AArch64ISD::NEON_UZP2";
case AArch64ISD::NEON_ZIP1:
return "AArch64ISD::NEON_ZIP1";
case AArch64ISD::NEON_ZIP2:
return "AArch64ISD::NEON_ZIP2";
case AArch64ISD::NEON_TRN1:
return "AArch64ISD::NEON_TRN1";
case AArch64ISD::NEON_TRN2:
return "AArch64ISD::NEON_TRN2";
case AArch64ISD::NEON_LD1_UPD:
return "AArch64ISD::NEON_LD1_UPD";
case AArch64ISD::NEON_LD2_UPD:
return "AArch64ISD::NEON_LD2_UPD";
case AArch64ISD::NEON_LD3_UPD:
return "AArch64ISD::NEON_LD3_UPD";
case AArch64ISD::NEON_LD4_UPD:
return "AArch64ISD::NEON_LD4_UPD";
case AArch64ISD::NEON_ST1_UPD:
return "AArch64ISD::NEON_ST1_UPD";
case AArch64ISD::NEON_ST2_UPD:
return "AArch64ISD::NEON_ST2_UPD";
case AArch64ISD::NEON_ST3_UPD:
return "AArch64ISD::NEON_ST3_UPD";
case AArch64ISD::NEON_ST4_UPD:
return "AArch64ISD::NEON_ST4_UPD";
case AArch64ISD::NEON_LD1x2_UPD:
return "AArch64ISD::NEON_LD1x2_UPD";
case AArch64ISD::NEON_LD1x3_UPD:
return "AArch64ISD::NEON_LD1x3_UPD";
case AArch64ISD::NEON_LD1x4_UPD:
return "AArch64ISD::NEON_LD1x4_UPD";
case AArch64ISD::NEON_ST1x2_UPD:
return "AArch64ISD::NEON_ST1x2_UPD";
case AArch64ISD::NEON_ST1x3_UPD:
return "AArch64ISD::NEON_ST1x3_UPD";
case AArch64ISD::NEON_ST1x4_UPD:
return "AArch64ISD::NEON_ST1x4_UPD";
case AArch64ISD::NEON_LD2DUP:
return "AArch64ISD::NEON_LD2DUP";
case AArch64ISD::NEON_LD3DUP:
return "AArch64ISD::NEON_LD3DUP";
case AArch64ISD::NEON_LD4DUP:
return "AArch64ISD::NEON_LD4DUP";
case AArch64ISD::NEON_LD2DUP_UPD:
return "AArch64ISD::NEON_LD2DUP_UPD";
case AArch64ISD::NEON_LD3DUP_UPD:
return "AArch64ISD::NEON_LD3DUP_UPD";
case AArch64ISD::NEON_LD4DUP_UPD:
return "AArch64ISD::NEON_LD4DUP_UPD";
case AArch64ISD::NEON_LD2LN_UPD:
return "AArch64ISD::NEON_LD2LN_UPD";
case AArch64ISD::NEON_LD3LN_UPD:
return "AArch64ISD::NEON_LD3LN_UPD";
case AArch64ISD::NEON_LD4LN_UPD:
return "AArch64ISD::NEON_LD4LN_UPD";
case AArch64ISD::NEON_ST2LN_UPD:
return "AArch64ISD::NEON_ST2LN_UPD";
case AArch64ISD::NEON_ST3LN_UPD:
return "AArch64ISD::NEON_ST3LN_UPD";
case AArch64ISD::NEON_ST4LN_UPD:
return "AArch64ISD::NEON_ST4LN_UPD";
case AArch64ISD::NEON_VEXTRACT:
return "AArch64ISD::NEON_VEXTRACT";
default:
return NULL;
}
}
static const uint16_t AArch64FPRArgRegs[] = {
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
};
static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
static const uint16_t AArch64ArgRegs[] = {
AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
};
static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
// Mark all remaining general purpose registers as allocated. We don't
// backtrack: if (for example) an i128 gets put on the stack, no subsequent
// i64 will go in registers (C.11).
for (unsigned i = 0; i < NumArgRegs; ++i)
State.AllocateReg(AArch64ArgRegs[i]);
return false;
}
#include "AArch64GenCallingConv.inc"
CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
switch(CC) {
default: llvm_unreachable("Unsupported calling convention");
case CallingConv::Fast:
case CallingConv::C:
return CC_A64_APCS;
}
}
void
AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
SDLoc DL, SDValue &Chain) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
AArch64MachineFunctionInfo *FuncInfo
= MF.getInfo<AArch64MachineFunctionInfo>();
SmallVector<SDValue, 8> MemOps;
unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
NumArgRegs);
unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
NumFPRArgRegs);
unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
int GPRIdx = 0;
if (GPRSaveSize != 0) {
GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(i * 8),
false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
DAG.getConstant(8, getPointerTy()));
}
}
if (getSubtarget()->hasFPARMv8()) {
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
int FPRIdx = 0;
// According to the AArch64 Procedure Call Standard, section B.1/B.3, we
// can omit a register save area if we know we'll never use registers of
// that class.
if (FPRSaveSize != 0) {
FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
&AArch64::FPR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(i * 16),
false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
DAG.getConstant(16, getPointerTy()));
}
}
FuncInfo->setVariadicFPRIdx(FPRIdx);
FuncInfo->setVariadicFPRSize(FPRSaveSize);
}
int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
FuncInfo->setVariadicStackIdx(StackIdx);
FuncInfo->setVariadicGPRIdx(GPRIdx);
FuncInfo->setVariadicGPRSize(GPRSaveSize);
if (!MemOps.empty()) {
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
MemOps.size());
}
}
SDValue
AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
AArch64MachineFunctionInfo *FuncInfo
= MF.getInfo<AArch64MachineFunctionInfo>();
MachineFrameInfo *MFI = MF.getFrameInfo();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
SmallVector<SDValue, 16> ArgValues;
SDValue ArgValue;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
ISD::ArgFlagsTy Flags = Ins[i].Flags;
if (Flags.isByVal()) {
// Byval is used for small structs and HFAs in the PCS, but the system
// should work in a non-compliant manner for larger structs.
EVT PtrTy = getPointerTy();
int Size = Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
VA.getLocMemOffset(),
false);
SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
InVals.push_back(FrameIdxN);
continue;
} else if (VA.isRegLoc()) {
MVT RegVT = VA.getLocVT();
const TargetRegisterClass *RC = getRegClassFor(RegVT);
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
} else { // VA.isRegLoc()
assert(VA.isMemLoc());
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
}
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
break;
case CCValAssign::SExt:
case CCValAssign::ZExt:
case CCValAssign::AExt: {
unsigned DestSize = VA.getValVT().getSizeInBits();
unsigned DestSubReg;
switch (DestSize) {
case 8: DestSubReg = AArch64::sub_8; break;
case 16: DestSubReg = AArch64::sub_16; break;
case 32: DestSubReg = AArch64::sub_32; break;
case 64: DestSubReg = AArch64::sub_64; break;
default: llvm_unreachable("Unexpected argument promotion");
}
ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
VA.getValVT(), ArgValue,
DAG.getTargetConstant(DestSubReg, MVT::i32)),
0);
break;
}
}
InVals.push_back(ArgValue);
}
if (isVarArg)
SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
unsigned StackArgSize = CCInfo.getNextStackOffset();
if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
// This is a non-standard ABI so by fiat I say we're allowed to make full
// use of the stack area to be popped, which must be aligned to 16 bytes in
// any case:
StackArgSize = RoundUpToAlignment(StackArgSize, 16);
// If we're expected to restore the stack (e.g. fastcc) then we'll be adding
// a multiple of 16.
FuncInfo->setArgumentStackToRestore(StackArgSize);
// This realignment carries over to the available bytes below. Our own
// callers will guarantee the space is free by giving an aligned value to
// CALLSEQ_START.
}
// Even if we're not expected to free up the space, it's useful to know how
// much is there while considering tail calls (because we can reuse it).
FuncInfo->setBytesInStackArgArea(StackArgSize);
return Chain;
}
SDValue
AArch64TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slots.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
// PCS: "If the type, T, of the result of a function is such that
// void func(T arg) would require that arg be passed as a value in a
// register (or set of registers) according to the rules in 5.4, then the
// result is returned in the same registers as would be used for such an
// argument.
//
// Otherwise, the caller shall reserve a block of memory of sufficient
// size and alignment to hold the result. The address of the memory block
// shall be passed as an additional argument to the function in x8."
//
// This is implemented in two places. The register-return values are dealt
// with here, more complex returns are passed as an sret parameter, which
// means we don't have to worry about it during actual return.
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
SDValue Arg = OutVals[i];
// There's no convenient note in the ABI about this as there is for normal
// arguments, but it says return values are passed in the same registers as
// an argument would be. I believe that includes the comments about
// unspecified higher bits, putting the burden of widening on the *caller*
// for return values.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info");
case CCValAssign::Full: break;
case CCValAssign::SExt:
case CCValAssign::ZExt:
case CCValAssign::AExt:
// Floating-point values should only be extended when they're going into
// memory, which can't happen here so an integer extend is acceptable.
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other,
&RetOps[0], RetOps.size());
}
SDValue
AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &dl = CLI.DL;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
AArch64MachineFunctionInfo *FuncInfo
= MF.getInfo<AArch64MachineFunctionInfo>();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
bool IsSibCall = false;
if (IsTailCall) {
IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
// A sibling call is one where we're under the usual C ABI and not planning
// to change that but can still do a tail call:
if (!TailCallOpt && IsTailCall)
IsSibCall = true;
}
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
// On AArch64 (and all other architectures I'm aware of) the most this has to
// do is adjust the stack pointer.
unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
if (IsSibCall) {
// Since we're not changing the ABI to make this a tail call, the memory
// operands are already available in the caller's incoming argument space.
NumBytes = 0;
}
// FPDiff is the byte offset of the call's argument area from the callee's.
// Stores to callee stack arguments will be placed in FixedStackSlots offset
// by this amount for a tail call. In a sibling call it must be 0 because the
// caller will deallocate the entire stack and the callee still expects its
// arguments to begin at SP+0. Completely unused for non-tail calls.
int FPDiff = 0;
if (IsTailCall && !IsSibCall) {
unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
// FPDiff will be negative if this tail call requires more space than we
// would automatically have in our incoming argument space. Positive if we
// can actually shrink the stack.
FPDiff = NumReusableBytes - NumBytes;
// The stack pointer must be 16-byte aligned at all times it's used for a
// memory operation, which in practice means at *all* times and in
// particular across call boundaries. Therefore our own arguments started at
// a 16-byte aligned SP and the delta applied for the tail call should
// satisfy the same constraint.
assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
}
if (!IsSibCall)
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
dl);
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
getPointerTy());
SmallVector<SDValue, 8> MemOpChains;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
SDValue Arg = OutVals[i];
// Callee does the actual widening, so all extensions just use an implicit
// definition of the rest of the Loc. Aesthetically, this would be nicer as
// an ANY_EXTEND, but that isn't valid for floating-point types and this
// alternative works on integer types too.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
case CCValAssign::ZExt:
case CCValAssign::AExt: {
unsigned SrcSize = VA.getValVT().getSizeInBits();
unsigned SrcSubReg;
switch (SrcSize) {
case 8: SrcSubReg = AArch64::sub_8; break;
case 16: SrcSubReg = AArch64::sub_16; break;
case 32: SrcSubReg = AArch64::sub_32; break;
case 64: SrcSubReg = AArch64::sub_64; break;
default: llvm_unreachable("Unexpected argument promotion");
}
Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
VA.getLocVT(),
DAG.getUNDEF(VA.getLocVT()),
Arg,
DAG.getTargetConstant(SrcSubReg, MVT::i32)),
0);
break;
}
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
if (VA.isRegLoc()) {
// A normal register (sub-) argument. For now we just note it down because
// we want to copy things into registers as late as possible to avoid
// register-pressure (and possibly worse).
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
continue;
}
assert(VA.isMemLoc() && "unexpected argument location");
SDValue DstAddr;
MachinePointerInfo DstInfo;
if (IsTailCall) {
uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
VA.getLocVT().getSizeInBits();
OpSize = (OpSize + 7) / 8;
int32_t Offset = VA.getLocMemOffset() + FPDiff;
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
DstAddr = DAG.getFrameIndex(FI, getPointerTy());
DstInfo = MachinePointerInfo::getFixedStack(FI);
// Make sure any stack arguments overlapping with where we're storing are
// loaded before this eventual operation. Otherwise they'll be clobbered.
Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
} else {
SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
}
if (Flags.isByVal()) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
Flags.getByValAlign(),
/*isVolatile = */ false,
/*alwaysInline = */ false,
DstInfo, MachinePointerInfo(0));
MemOpChains.push_back(Cpy);
} else {
// Normal stack argument, put it where it's needed.
SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
false, false, 0);
MemOpChains.push_back(Store);
}
}
// The loads and stores generated above shouldn't clash with each
// other. Combining them with this TokenFactor notes that fact for the rest of
// the backend.
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Most of the rest of the instructions need to be glued together; we don't
// want assignments to actual registers used by a call to be rearranged by a
// well-meaning scheduler.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
// The linker is responsible for inserting veneers when necessary to put a
// function call destination in range, so we don't need to bother with a
// wrapper here.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
}
// We don't usually want to end the call-sequence here because we would tidy
// the frame up *after* the call, however in the ABI-changing tail-call case
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (IsTailCall && !IsSibCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
// We produce the following DAG scheme for the actual call instruction:
// (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
//
// Most arguments aren't going to be used and just keep the values live as
// far as LLVM is concerned. It's expected to be selected as simply "bl
// callee" (for a direct, non-tail call).
std::vector<SDValue> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
if (IsTailCall) {
// Each tail call may have to adjust the stack by a different amount, so
// this information must travel along with the operation for eventual
// consumption by emitEpilogue.
Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
}
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers. This
// is used later in codegen to constrain register-allocation.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
// If we needed glue, put it in as the last argument.
if (InFlag.getNode())
Ops.push_back(InFlag);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (IsTailCall) {
return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
}
Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Now we can reclaim the stack, just as well do it before working out where
// our return value is.
if (!IsSibCall) {
uint64_t CalleePopBytes
= DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(CalleePopBytes, true),
InFlag, dl);
InFlag = Chain.getValue(1);
}
return LowerCallResult(Chain, InFlag, CallConv,
IsVarArg, Ins, dl, DAG, InVals);
}
SDValue
AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
// Return values that are too big to fit into registers should use an sret
// pointer, so this can be a lot simpler than the main argument code.
assert(VA.isRegLoc() && "Memory locations not expected for call return");
SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
break;
case CCValAssign::ZExt:
case CCValAssign::SExt:
case CCValAssign::AExt:
// Floating-point arguments only get extended/truncated if they're going
// in memory, so using the integer operation is acceptable here.
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
break;
}
InVals.push_back(Val);
}
return Chain;
}
bool
AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool IsVarArg,
bool IsCalleeStructRet,
bool IsCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
// For CallingConv::C this function knows whether the ABI needs
// changing. That's not true for other conventions so they will have to opt in
// manually.
if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
return false;
const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
for (Function::const_arg_iterator i = CallerF->arg_begin(),
e = CallerF->arg_end(); i != e; ++i)
if (i->hasByValAttr())
return false;
if (getTargetMachine().Options.GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) && CCMatch)
return true;
return false;
}
// Now we search for cases where we can use a tail call without changing the
// ABI. Sibcall is used in some places (particularly gcc) to refer to this
// concept.
// I want anyone implementing a new calling convention to think long and hard
// about this assert.
assert((!IsVarArg || CalleeCC == CallingConv::C)
&& "Unexpected variadic calling convention");
if (IsVarArg && !Outs.empty()) {
// At least two cases here: if caller is fastcc then we can't have any
// memory arguments (we'd be expected to clean up the stack afterwards). If
// caller is C then we could potentially use its argument area.
// FIXME: for now we take the most conservative of these in both cases:
// disallow all variadic memory operands.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
if (!ArgLocs[i].isRegLoc())
return false;
}
// If the calling conventions do not match, then we'd better make sure the
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
getTargetMachine(), RVLocs1, *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
SmallVector<CCValAssign, 16> RVLocs2;
CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
getTargetMachine(), RVLocs2, *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
if (RVLocs1.size() != RVLocs2.size())
return false;
for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
return false;
if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
return false;
if (RVLocs1[i].isRegLoc()) {
if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
return false;
} else {
if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
return false;
}
}
}
// Nothing more to check if the callee is taking no arguments
if (Outs.empty())
return true;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
const AArch64MachineFunctionInfo *FuncInfo
= MF.getInfo<AArch64MachineFunctionInfo>();
// If the stack arguments for this call would fit into our own save area then
// the call can be made tail.
return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
}
bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
bool TailCallOpt) const {
return CallCC == CallingConv::Fast && TailCallOpt;
}
bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
return CallCC == CallingConv::Fast;
}
SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
SelectionDAG &DAG,
MachineFrameInfo *MFI,
int ClobberedFI) const {
SmallVector<SDValue, 8> ArgChains;
int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
// Include the original chain at the beginning of the list. When this is
// used by target LowerCall hooks, this helps legalize find the
// CALLSEQ_BEGIN node.
ArgChains.push_back(Chain);
// Add a chain value for each stack argument corresponding
for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0) {
int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
int64_t InLastByte = InFirstByte;
InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
(FirstByte <= InFirstByte && InFirstByte <= LastByte))
ArgChains.push_back(SDValue(L, 1));
}
// Build a tokenfactor for all the chains.
return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other,
&ArgChains[0], ArgChains.size());
}
static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
switch (CC) {
case ISD::SETEQ: return A64CC::EQ;
case ISD::SETGT: return A64CC::GT;
case ISD::SETGE: return A64CC::GE;
case ISD::SETLT: return A64CC::LT;
case ISD::SETLE: return A64CC::LE;
case ISD::SETNE: return A64CC::NE;
case ISD::SETUGT: return A64CC::HI;
case ISD::SETUGE: return A64CC::HS;
case ISD::SETULT: return A64CC::LO;
case ISD::SETULE: return A64CC::LS;
default: llvm_unreachable("Unexpected condition code");
}
}
bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
// icmp is implemented using adds/subs immediate, which take an unsigned
// 12-bit immediate, optionally shifted left by 12 bits.
// Symmetric by using adds/subs
if (Val < 0)
Val = -Val;
return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
}
SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
ISD::CondCode CC, SDValue &A64cc,
SelectionDAG &DAG, SDLoc &dl) const {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
int64_t C = 0;
EVT VT = RHSC->getValueType(0);
bool knownInvalid = false;
// I'm not convinced the rest of LLVM handles these edge cases properly, but
// we can at least get it right.
if (isSignedIntSetCC(CC)) {
C = RHSC->getSExtValue();
} else if (RHSC->getZExtValue() > INT64_MAX) {
// A 64-bit constant not representable by a signed 64-bit integer is far
// too big to fit into a SUBS immediate anyway.
knownInvalid = true;
} else {
C = RHSC->getZExtValue();
}
if (!knownInvalid && !isLegalICmpImmediate(C)) {
// Constant does not fit, try adjusting it by one?
switch (CC) {
default: break;
case ISD::SETLT:
case ISD::SETGE:
if (isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
RHS = DAG.getConstant(C-1, VT);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
if (isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
RHS = DAG.getConstant(C-1, VT);
}
break;
case ISD::SETLE:
case ISD::SETGT:
if (isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
RHS = DAG.getConstant(C+1, VT);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
if (isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
RHS = DAG.getConstant(C+1, VT);
}
break;
}
}
}
A64CC::CondCodes CondCode = IntCCToA64CC(CC);
A64cc = DAG.getConstant(CondCode, MVT::i32);
return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
DAG.getCondCode(CC));
}
static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
A64CC::CondCodes &Alternative) {
A64CC::CondCodes CondCode = A64CC::Invalid;
Alternative = A64CC::Invalid;
switch (CC) {
default: llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
case ISD::SETOEQ: CondCode = A64CC::EQ; break;
case ISD::SETGT:
case ISD::SETOGT: CondCode = A64CC::GT; break;
case ISD::SETGE:
case ISD::SETOGE: CondCode = A64CC::GE; break;
case ISD::SETOLT: CondCode = A64CC::MI; break;
case ISD::SETOLE: CondCode = A64CC::LS; break;
case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
case ISD::SETO: CondCode = A64CC::VC; break;
case ISD::SETUO: CondCode = A64CC::VS; break;
case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
case ISD::SETUGT: CondCode = A64CC::HI; break;
case ISD::SETUGE: CondCode = A64CC::PL; break;
case ISD::SETLT:
case ISD::SETULT: CondCode = A64CC::LT; break;
case ISD::SETLE:
case ISD::SETULE: CondCode = A64CC::LE; break;
case ISD::SETNE:
case ISD::SETUNE: CondCode = A64CC::NE; break;
}
return CondCode;
}
SDValue
AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT PtrVT = getPointerTy();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
switch(getTargetMachine().getCodeModel()) {
case CodeModel::Small:
// The most efficient code is PC-relative anyway for the small memory model,
// so we don't need to worry about relocation model.
return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
DAG.getTargetBlockAddress(BA, PtrVT, 0,
AArch64II::MO_NO_FLAG),
DAG.getTargetBlockAddress(BA, PtrVT, 0,
AArch64II::MO_LO12),
DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
case CodeModel::Large:
return DAG.getNode(
AArch64ISD::WrapperLarge, DL, PtrVT,
DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
default:
llvm_unreachable("Only small and large code models supported now");
}
}
// (BRCOND chain, val, dest)
SDValue
AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Chain = Op.getOperand(0);
SDValue TheBit = Op.getOperand(1);
SDValue DestBB = Op.getOperand(2);
// AArch64 BooleanContents is the default UndefinedBooleanContent, which means
// that as the consumer we are responsible for ignoring rubbish in higher
// bits.
TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
DAG.getConstant(1, MVT::i32));
SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
DAG.getConstant(0, TheBit.getValueType()),
DAG.getCondCode(ISD::SETNE));
return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
DestBB);
}
// (BR_CC chain, condcode, lhs, rhs, dest)
SDValue
AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue DestBB = Op.getOperand(4);
if (LHS.getValueType() == MVT::f128) {
// f128 comparisons are lowered to runtime calls by a routine which sets
// LHS, RHS and CC appropriately for the rest of this function to continue.
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (RHS.getNode() == 0) {
RHS = DAG.getConstant(0, LHS.getValueType());
CC = ISD::SETNE;
}
}
if (LHS.getValueType().isInteger()) {
SDValue A64cc;
// Integers are handled in a separate function because the combinations of
// immediates and tests can get hairy and we may want to fiddle things.
SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
Chain, CmpOp, A64cc, DestBB);
}
// Note that some LLVM floating-point CondCodes can't be lowered to a single
// conditional branch, hence FPCCToA64CC can set a second test, where either
// passing is sufficient.
A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
CondCode = FPCCToA64CC(CC, Alternative);
SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
DAG.getCondCode(CC));
SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
Chain, SetCC, A64cc, DestBB);
if (Alternative != A64CC::Invalid) {
A64cc = DAG.getConstant(Alternative, MVT::i32);
A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
A64BR_CC, SetCC, A64cc, DestBB);
}
return A64BR_CC;
}
SDValue
AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
ArgListTy Args;
ArgListEntry Entry;
for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
EVT ArgVT = Op.getOperand(i).getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
// By default, the input chain to this libcall is the entry node of the
// function. If the libcall is going to be emitted as a tail call then
// isUsedByReturnOnly will change it to the right chain if the return
// node which is being folded has a non-entry input chain.
SDValue InChain = DAG.getEntryNode();
// isTailCall may be true since the callee does not reference caller stack
// frame. Check if it's in the right position.
SDValue TCChain = InChain;
bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
if (isTailCall)
InChain = TCChain;
TargetLowering::
CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
0, getLibcallCallingConv(Call), isTailCall,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, SDLoc(Op));
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
if (!CallInfo.second.getNode())
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
return CallInfo.first;
}
SDValue
AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
if (Op.getOperand(0).getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
RTLIB::Libcall LC;
LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
/*isSigned*/ false, SDLoc(Op)).first;
}
SDValue
AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
RTLIB::Libcall LC;
LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
return LowerF128ToCall(Op, DAG, LC);
}
SDValue
AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
bool IsSigned) const {
if (Op.getOperand(0).getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
RTLIB::Libcall LC;
if (IsSigned)
LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
return LowerF128ToCall(Op, DAG, LC);
}
SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(8, MVT::i64);
return DAG.getLoad(VT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
}
// Return X30, which contains the return address. Mark it an implicit live-in.
unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64);
}
SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG)
const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned FrameReg = AArch64::X29;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo(),
false, false, false, 0);
return FrameAddr;
}
SDValue
AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
SelectionDAG &DAG) const {
assert(getTargetMachine().getCodeModel() == CodeModel::Large);
assert(getTargetMachine().getRelocationModel() == Reloc::Static);
EVT PtrVT = getPointerTy();
SDLoc dl(Op);
const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
SDValue GlobalAddr = DAG.getNode(
AArch64ISD::WrapperLarge, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
if (GN->getOffset() != 0)
return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
DAG.getConstant(GN->getOffset(), PtrVT));
return GlobalAddr;
}
SDValue
AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
SelectionDAG &DAG) const {
assert(getTargetMachine().getCodeModel() == CodeModel::Small);
EVT PtrVT = getPointerTy();
SDLoc dl(Op);
const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
unsigned Alignment = GV->getAlignment();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
// Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
// to zero when they remain undefined. In PIC mode the GOT can take care of
// this, but in absolute mode we use a constant pool load.
SDValue PoolAddr;
PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
AArch64II::MO_NO_FLAG),
DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
AArch64II::MO_LO12),
DAG.getConstant(8, MVT::i32));
SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
MachinePointerInfo::getConstantPool(),
/*isVolatile=*/ false,
/*isNonTemporal=*/ true,
/*isInvariant=*/ true, 8);
if (GN->getOffset() != 0)
return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
DAG.getConstant(GN->getOffset(), PtrVT));
return GlobalAddr;
}
if (Alignment == 0) {
const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
if (GVPtrTy->getElementType()->isSized()) {
Alignment
= getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
} else {
// Be conservative if we can't guess, not that it really matters:
// functions and labels aren't valid for loads, and the methods used to
// actually calculate an address work with any alignment.
Alignment = 1;
}
}
unsigned char HiFixup, LoFixup;
bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM);
if (UseGOT) {
HiFixup = AArch64II::MO_GOT;
LoFixup = AArch64II::MO_GOT_LO12;
Alignment = 8;
} else {
HiFixup = AArch64II::MO_NO_FLAG;
LoFixup = AArch64II::MO_LO12;
}
// AArch64's small model demands the following sequence:
// ADRP x0, somewhere
// ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
HiFixup),
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
LoFixup),
DAG.getConstant(Alignment, MVT::i32));
if (UseGOT) {
GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
GlobalRef);
}
if (GN->getOffset() != 0)
return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
DAG.getConstant(GN->getOffset(), PtrVT));
return GlobalRef;
}
SDValue
AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
// TableGen doesn't have easy access to the CodeModel or RelocationModel, so
// we make those distinctions here.
switch (getTargetMachine().getCodeModel()) {
case CodeModel::Small:
return LowerGlobalAddressELFSmall(Op, DAG);
case CodeModel::Large:
return LowerGlobalAddressELFLarge(Op, DAG);
default:
llvm_unreachable("Only small and large code models supported now");
}
}
SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
SDValue DescAddr,
SDLoc DL,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
// The function we need to call is simply the first entry in the GOT for this
// descriptor, load it in preparation.
SDValue Func, Chain;
Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
DescAddr);
// The function takes only one argument: the address of the descriptor itself
// in X0.
SDValue Glue;
Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
Glue = Chain.getValue(1);
// Finally, there's a special calling-convention which means that the lookup
// must preserve all registers (except X0, obviously).
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const AArch64RegisterInfo *A64RI
= static_cast<const AArch64RegisterInfo *>(TRI);
const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
// We're now ready to populate the argument list, as with a normal call:
std::vector<SDValue> Ops;
Ops.push_back(Chain);
Ops.push_back(Func);
Ops.push_back(SymAddr);
Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
Ops.push_back(DAG.getRegisterMask(Mask));
Ops.push_back(Glue);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0],
Ops.size());
Glue = Chain.getValue(1);
// After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
// back to the generic handling code.
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
SDValue
AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(getSubtarget()->isTargetELF() &&
"TLS not implemented for non-ELF targets");
assert(getTargetMachine().getCodeModel() == CodeModel::Small
&& "TLS only supported in small memory model");
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
SDValue TPOff;
EVT PtrVT = getPointerTy();
SDLoc DL(Op);
const GlobalValue *GV = GA->getGlobal();
SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
if (Model == TLSModel::InitialExec) {
TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
AArch64II::MO_GOTTPREL),
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
AArch64II::MO_GOTTPREL_LO12),
DAG.getConstant(8, MVT::i32));
TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
TPOff);
} else if (Model == TLSModel::LocalExec) {
SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
AArch64II::MO_TPREL_G1);
SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
AArch64II::MO_TPREL_G0_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
DAG.getTargetConstant(1, MVT::i32)), 0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
TPOff, LoVar,
DAG.getTargetConstant(0, MVT::i32)), 0);
} else if (Model == TLSModel::GeneralDynamic) {
// Accesses used in this sequence go via the TLS descriptor which lives in
// the GOT. Prepare an address we can use to handle this.
SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
AArch64II::MO_TLSDESC);
SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
AArch64II::MO_TLSDESC_LO12);
SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
HiDesc, LoDesc,
DAG.getConstant(8, MVT::i32));
SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
} else if (Model == TLSModel::LocalDynamic) {
// Local-dynamic accesses proceed in two phases. A general-dynamic TLS
// descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
// the beginning of the module's TLS region, followed by a DTPREL offset
// calculation.
// These accesses will need deduplicating if there's more than one.
AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
.getInfo<AArch64MachineFunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
// Get the location of _TLS_MODULE_BASE_:
SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
AArch64II::MO_TLSDESC);
SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
AArch64II::MO_TLSDESC_LO12);
SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
HiDesc, LoDesc,
DAG.getConstant(8, MVT::i32));
SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
// Get the variable's offset from _TLS_MODULE_BASE_
SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
AArch64II::MO_DTPREL_G1);
SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
AArch64II::MO_DTPREL_G0_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
DAG.getTargetConstant(0, MVT::i32)), 0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
TPOff, LoVar,
DAG.getTargetConstant(0, MVT::i32)), 0);
} else
llvm_unreachable("Unsupported TLS access model");
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
SDValue
AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
bool IsSigned) const {
if (Op.getValueType() != MVT::f128) {
// Legal for everything except f128.
return Op;
}
RTLIB::Libcall LC;
if (IsSigned)
LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
else
LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
return LowerF128ToCall(Op, DAG, LC);
}
SDValue
AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
SDLoc dl(JT);
EVT PtrVT = getPointerTy();
// When compiling PIC, jump tables get put in the code section so a static
// relocation-style is acceptable for both cases.
switch (getTargetMachine().getCodeModel()) {
case CodeModel::Small:
return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
AArch64II::MO_LO12),
DAG.getConstant(1, MVT::i32));
case CodeModel::Large:
return DAG.getNode(
AArch64ISD::WrapperLarge, dl, PtrVT,
DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3),
DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC),
DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC),
DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC));
default:
llvm_unreachable("Only small and large code models supported now");
}
}
// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
SDValue
AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue IfTrue = Op.getOperand(2);
SDValue IfFalse = Op.getOperand(3);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
if (LHS.getValueType() == MVT::f128) {
// f128 comparisons are lowered to libcalls, but slot in nicely here
// afterwards.
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (RHS.getNode() == 0) {
RHS = DAG.getConstant(0, LHS.getValueType());
CC = ISD::SETNE;
}
}
if (LHS.getValueType().isInteger()) {
SDValue A64cc;
// Integers are handled in a separate function because the combinations of
// immediates and tests can get hairy and we may want to fiddle things.
SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
CmpOp, IfTrue, IfFalse, A64cc);
}
// Note that some LLVM floating-point CondCodes can't be lowered to a single
// conditional branch, hence FPCCToA64CC can set a second test, where either
// passing is sufficient.
A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
CondCode = FPCCToA64CC(CC, Alternative);
SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
DAG.getCondCode(CC));
SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
Op.getValueType(),
SetCC, IfTrue, IfFalse, A64cc);
if (Alternative != A64CC::Invalid) {
A64cc = DAG.getConstant(Alternative, MVT::i32);
A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
SetCC, IfTrue, A64SELECT_CC, A64cc);
}
return A64SELECT_CC;
}
// (SELECT testbit, iftrue, iffalse)
SDValue
AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue TheBit = Op.getOperand(0);
SDValue IfTrue = Op.getOperand(1);
SDValue IfFalse = Op.getOperand(2);
// AArch64 BooleanContents is the default UndefinedBooleanContent, which means
// that as the consumer we are responsible for ignoring rubbish in higher
// bits.
TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
DAG.getConstant(1, MVT::i32));
SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
DAG.getConstant(0, TheBit.getValueType()),
DAG.getCondCode(ISD::SETNE));
return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
A64CMP, IfTrue, IfFalse,
DAG.getConstant(A64CC::NE, MVT::i32));
}
static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
EVT VT = Op.getValueType();
bool Invert = false;
SDValue Op0, Op1;
unsigned Opcode;
if (LHS.getValueType().isInteger()) {
// Attempt to use Vector Integer Compare Mask Test instruction.
// TST = icmp ne (and (op0, op1), zero).
if (CC == ISD::SETNE) {
if (((LHS.getOpcode() == ISD::AND) &&
ISD::isBuildVectorAllZeros(RHS.getNode())) ||
((RHS.getOpcode() == ISD::AND) &&
ISD::isBuildVectorAllZeros(LHS.getNode()))) {
SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
}
}
// Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
// Note: Compare against Zero does not support unsigned predicates.
if ((ISD::isBuildVectorAllZeros(RHS.getNode()) ||
ISD::isBuildVectorAllZeros(LHS.getNode())) &&
!isUnsignedIntSetCC(CC)) {
// If LHS is the zero value, swap operands and CondCode.
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
CC = getSetCCSwappedOperands(CC);
Op0 = RHS;
} else
Op0 = LHS;
// Ensure valid CondCode for Compare Mask against Zero instruction:
// EQ, GE, GT, LE, LT.
if (ISD::SETNE == CC) {
Invert = true;
CC = ISD::SETEQ;
}
// Using constant type to differentiate integer and FP compares with zero.
Op1 = DAG.getConstant(0, MVT::i32);
Opcode = AArch64ISD::NEON_CMPZ;
} else {
// Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
// Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
bool Swap = false;
switch (CC) {
default:
llvm_unreachable("Illegal integer comparison.");
case ISD::SETEQ:
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETUGT:
case ISD::SETUGE:
break;
case ISD::SETNE:
Invert = true;
CC = ISD::SETEQ;
break;
case ISD::SETULT:
case ISD::SETULE:
case ISD::SETLT:
case ISD::SETLE:
Swap = true;
CC = getSetCCSwappedOperands(CC);
}
if (Swap)
std::swap(LHS, RHS);
Opcode = AArch64ISD::NEON_CMP;
Op0 = LHS;
Op1 = RHS;
}
// Generate Compare Mask instr or Compare Mask against Zero instr.
SDValue NeonCmp =
DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
if (Invert)
NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
return NeonCmp;
}
// Now handle Floating Point cases.
// Attempt to use Vector Floating Point Compare Mask against Zero instruction.
if (ISD::isBuildVectorAllZeros(RHS.getNode()) ||
ISD::isBuildVectorAllZeros(LHS.getNode())) {
// If LHS is the zero value, swap operands and CondCode.
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
CC = getSetCCSwappedOperands(CC);
Op0 = RHS;
} else
Op0 = LHS;
// Using constant type to differentiate integer and FP compares with zero.
Op1 = DAG.getConstantFP(0, MVT::f32);
Opcode = AArch64ISD::NEON_CMPZ;
} else {
// Attempt to use Vector Floating Point Compare Mask instruction.
Op0 = LHS;
Op1 = RHS;
Opcode = AArch64ISD::NEON_CMP;
}
SDValue NeonCmpAlt;
// Some register compares have to be implemented with swapped CC and operands,
// e.g.: OLT implemented as OGT with swapped operands.
bool SwapIfRegArgs = false;
// Ensure valid CondCode for FP Compare Mask against Zero instruction:
// EQ, GE, GT, LE, LT.
// And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
switch (CC) {
default:
llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
case ISD::SETNE:
Invert = true; // Fallthrough
case ISD::SETOEQ:
case ISD::SETEQ:
CC = ISD::SETEQ;
break;
case ISD::SETOLT:
case ISD::SETLT:
CC = ISD::SETLT;
SwapIfRegArgs = true;
break;
case ISD::SETOGT:
case ISD::SETGT:
CC = ISD::SETGT;
break;
case ISD::SETOLE:
case ISD::SETLE:
CC = ISD::SETLE;
SwapIfRegArgs = true;
break;
case ISD::SETOGE:
case ISD::SETGE:
CC = ISD::SETGE;
break;
case ISD::SETUGE:
Invert = true;
CC = ISD::SETLT;
SwapIfRegArgs = true;
break;
case ISD::SETULE:
Invert = true;
CC = ISD::SETGT;
break;
case ISD::SETUGT:
Invert = true;
CC = ISD::SETLE;
SwapIfRegArgs = true;
break;
case ISD::SETULT:
Invert = true;
CC = ISD::SETGE;
break;
case ISD::SETUEQ:
Invert = true; // Fallthrough
case ISD::SETONE:
// Expand this to (OGT |OLT).
NeonCmpAlt =
DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
CC = ISD::SETLT;
SwapIfRegArgs = true;
break;
case ISD::SETUO:
Invert = true; // Fallthrough
case ISD::SETO:
// Expand this to (OGE | OLT).
NeonCmpAlt =
DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
CC = ISD::SETLT;
SwapIfRegArgs = true;
break;
}
if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
CC = getSetCCSwappedOperands(CC);
std::swap(Op0, Op1);
}
// Generate FP Compare Mask instr or FP Compare Mask against Zero instr
SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
if (NeonCmpAlt.getNode())
NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
if (Invert)
NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
return NeonCmp;
}
// (SETCC lhs, rhs, condcode)
SDValue
AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorSETCC(Op, DAG);
if (LHS.getValueType() == MVT::f128) {
// f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
// for the rest of the function (some i32 or i64 values).
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
// If softenSetCCOperands returned a scalar, use it.
if (RHS.getNode() == 0) {
assert(LHS.getValueType() == Op.getValueType() &&
"Unexpected setcc expansion!");
return LHS;
}
}
if (LHS.getValueType().isInteger()) {
SDValue A64cc;
// Integers are handled in a separate function because the combinations of
// immediates and tests can get hairy and we may want to fiddle things.
SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
A64cc);
}
// Note that some LLVM floating-point CondCodes can't be lowered to a single
// conditional branch, hence FPCCToA64CC can set a second test, where either
// passing is sufficient.
A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
CondCode = FPCCToA64CC(CC, Alternative);
SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
DAG.getCondCode(CC));
SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
CmpOp, DAG.getConstant(1, VT),
DAG.getConstant(0, VT), A64cc);
if (Alternative != A64CC::Invalid) {
A64cc = DAG.getConstant(Alternative, MVT::i32);
A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
}
return A64SELECT_CC;
}
SDValue
AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
- const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
// We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
// rather than just 8.
return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op),
Op.getOperand(1), Op.getOperand(2),
DAG.getConstant(32, MVT::i32), 8, false, false,
MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
}
SDValue
AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
// The layout of the va_list struct is specified in the AArch64 Procedure Call
// Standard, section B.3.
MachineFunction &MF = DAG.getMachineFunction();
AArch64MachineFunctionInfo *FuncInfo
= MF.getInfo<AArch64MachineFunctionInfo>();
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SDValue VAList = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SmallVector<SDValue, 4> MemOps;
// void *__stack at offset 0
SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
getPointerTy());
MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
MachinePointerInfo(SV), false, false, 0));
// void *__gr_top at offset 8
int GPRSize = FuncInfo->getVariadicGPRSize();
if (GPRSize > 0) {
SDValue GRTop, GRTopAddr;
GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
DAG.getConstant(8, getPointerTy()));
GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
DAG.getConstant(GPRSize, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
MachinePointerInfo(SV, 8),
false, false, 0));
}
// void *__vr_top at offset 16
int FPRSize = FuncInfo->getVariadicFPRSize();
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
DAG.getConstant(16, getPointerTy()));
VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
DAG.getConstant(FPRSize, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
MachinePointerInfo(SV, 16),
false, false, 0));
}
// int __gr_offs at offset 24
SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
DAG.getConstant(24, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
GROffsAddr, MachinePointerInfo(SV, 24),
false, false, 0));
// int __vr_offs at offset 28
SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
DAG.getConstant(28, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
VROffsAddr, MachinePointerInfo(SV, 28),
false, false, 0));
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
MemOps.size());
}
SDValue
AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
}
return SDValue();
}
/// Check if the specified splat value corresponds to a valid vector constant
/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If
/// so, return the encoded 8-bit immediate and the OpCmode instruction fields
/// values.
static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
bool is128Bits, NeonModImmType type, EVT &VT,
unsigned &Imm, unsigned &OpCmode) {
switch (SplatBitSize) {
default:
llvm_unreachable("unexpected size for isNeonModifiedImm");
case 8: {
if (type != Neon_Mov_Imm)
return false;
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
// Neon movi per byte: Op=0, Cmode=1110.
OpCmode = 0xe;
Imm = SplatBits;
VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
break;
}
case 16: {
// Neon move inst per halfword
VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
if ((SplatBits & ~0xff) == 0) {
// Value = 0x00nn is 0x00nn LSL 0
// movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
// bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001
// Op=x, Cmode=100y
Imm = SplatBits;
OpCmode = 0x8;
break;
}
if ((SplatBits & ~0xff00) == 0) {
// Value = 0xnn00 is 0x00nn LSL 8
// movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
// bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011
// Op=x, Cmode=101x
Imm = SplatBits >> 8;
OpCmode = 0xa;
break;
}
// can't handle any other
return false;
}
case 32: {
// First the LSL variants (MSL is unusable by some interested instructions).
// Neon move instr per word, shift zeros
VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
if ((SplatBits & ~0xff) == 0) {
// Value = 0x000000nn is 0x000000nn LSL 0
// movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
// bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001
// Op=x, Cmode=000x
Imm = SplatBits;
OpCmode = 0;
break;
}
if ((SplatBits & ~0xff00) == 0) {
// Value = 0x0000nn00 is 0x000000nn LSL 8
// movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010
// bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011
// Op=x, Cmode=001x
Imm = SplatBits >> 8;
OpCmode = 0x2;
break;
}
if ((SplatBits & ~0xff0000) == 0) {
// Value = 0x00nn0000 is 0x000000nn LSL 16
// movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
// bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101
// Op=x, Cmode=010x
Imm = SplatBits >> 16;
OpCmode = 0x4;
break;
}
if ((SplatBits & ~0xff000000) == 0) {
// Value = 0xnn000000 is 0x000000nn LSL 24
// movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
// bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111
// Op=x, Cmode=011x
Imm = SplatBits >> 24;
OpCmode = 0x6;
break;
}
// Now the MSL immediates.
// Neon move instr per word, shift ones
if ((SplatBits & ~0xffff) == 0 &&
((SplatBits | SplatUndef) & 0xff) == 0xff) {
// Value = 0x0000nnff is 0x000000nn MSL 8
// movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
// Op=x, Cmode=1100
Imm = SplatBits >> 8;
OpCmode = 0xc;
break;
}
if ((SplatBits & ~0xffffff) == 0 &&
((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
// Value = 0x00nnffff is 0x000000nn MSL 16
// movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
// Op=x, Cmode=1101
Imm = SplatBits >> 16;
OpCmode = 0xd;
break;
}
// can't handle any other
return false;
}
case 64: {
if (type != Neon_Mov_Imm)
return false;
// Neon move instr bytemask, where each byte is either 0x00 or 0xff.
// movi Op=1, Cmode=1110.
OpCmode = 0x1e;
uint64_t BitMask = 0xff;
uint64_t Val = 0;
unsigned ImmMask = 1;
Imm = 0;
for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
Val |= BitMask;
Imm |= ImmMask;
} else if ((SplatBits & BitMask) != 0) {
return false;
}
BitMask <<= 8;
ImmMask <<= 1;
}
SplatBits = Val;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
break;
}
}
return true;
}
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
EVT VT = N->getValueType(0);
// We're looking for an SRA/SHL pair which form an SBFX.
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
if (!isa<ConstantSDNode>(N->getOperand(1)))
return SDValue();
uint64_t TruncMask = N->getConstantOperandVal(1);
if (!isMask_64(TruncMask))
return SDValue();
uint64_t Width = CountPopulation_64(TruncMask);
SDValue Shift = N->getOperand(0);
if (Shift.getOpcode() != ISD::SRL)
return SDValue();
if (!isa<ConstantSDNode>(Shift->getOperand(1)))
return SDValue();
uint64_t LSB = Shift->getConstantOperandVal(1);
if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
return SDValue();
return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
DAG.getConstant(LSB, MVT::i64),
DAG.getConstant(LSB + Width - 1, MVT::i64));
}
/// For a true bitfield insert, the bits getting into that contiguous mask
/// should come from the low part of an existing value: they must be formed from
/// a compatible SHL operation (unless they're already low). This function
/// checks that condition and returns the least-significant bit that's
/// intended. If the operation not a field preparation, -1 is returned.
static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT,
SDValue &MaskedVal, uint64_t Mask) {
if (!isShiftedMask_64(Mask))
return -1;
// Now we need to alter MaskedVal so that it is an appropriate input for a BFI
// instruction. BFI will do a left-shift by LSB before applying the mask we've
// spotted, so in general we should pre-emptively "undo" that by making sure
// the incoming bits have had a right-shift applied to them.
//
// This right shift, however, will combine with existing left/right shifts. In
// the simplest case of a completely straight bitfield operation, it will be
// expected to completely cancel out with an existing SHL. More complicated
// cases (e.g. bitfield to bitfield copy) may still need a real shift before
// the BFI.
uint64_t LSB = countTrailingZeros(Mask);
int64_t ShiftRightRequired = LSB;
if (MaskedVal.getOpcode() == ISD::SHL &&
isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
MaskedVal = MaskedVal.getOperand(0);
} else if (MaskedVal.getOpcode() == ISD::SRL &&
isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
MaskedVal = MaskedVal.getOperand(0);
}
if (ShiftRightRequired > 0)
MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
DAG.getConstant(ShiftRightRequired, MVT::i64));
else if (ShiftRightRequired < 0) {
// We could actually end up with a residual left shift, for example with
// "struc.bitfield = val << 1".
MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
DAG.getConstant(-ShiftRightRequired, MVT::i64));
}
return LSB;
}
/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
/// a mask and an extension. Returns true if a BFI was found and provides
/// information on its surroundings.
static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
bool &Extended) {
Extended = false;
if (N.getOpcode() == ISD::ZERO_EXTEND) {
Extended = true;
N = N.getOperand(0);
}
if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
Mask = N->getConstantOperandVal(1);
N = N.getOperand(0);
} else {
// Mask is the whole width.
Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
}
if (N.getOpcode() == AArch64ISD::BFI) {
BFI = N;
return true;
}
return false;
}
/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
/// is roughly equivalent to (and (BFI ...), mask). This form is used because it
/// can often be further combined with a larger mask. Ultimately, we want mask
/// to be 2^32-1 or 2^64-1 so the AND can be skipped.
static SDValue tryCombineToBFI(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
EVT VT = N->getValueType(0);
assert(N->getOpcode() == ISD::OR && "Unexpected root");
// We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
// abandon the effort.
SDValue LHS = N->getOperand(0);
if (LHS.getOpcode() != ISD::AND)
return SDValue();
uint64_t LHSMask;
if (isa<ConstantSDNode>(LHS.getOperand(1)))
LHSMask = LHS->getConstantOperandVal(1);
else
return SDValue();
// We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
// is or abandon the effort.
SDValue RHS = N->getOperand(1);
if (RHS.getOpcode() != ISD::AND)
return SDValue();
uint64_t RHSMask;
if (isa<ConstantSDNode>(RHS.getOperand(1)))
RHSMask = RHS->getConstantOperandVal(1);
else
return SDValue();
// Can't do anything if the masks are incompatible.
if (LHSMask & RHSMask)
return SDValue();
// Now we need one of the masks to be a contiguous field. Without loss of
// generality that should be the RHS one.
SDValue Bitfield = LHS.getOperand(0);
if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
// We know that LHS is a candidate new value, and RHS isn't already a better
// one.
std::swap(LHS, RHS);
std::swap(LHSMask, RHSMask);
}
// We've done our best to put the right operands in the right places, all we
// can do now is check whether a BFI exists.
Bitfield = RHS.getOperand(0);
int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
if (LSB == -1)
return SDValue();
uint32_t Width = CountPopulation_64(RHSMask);
assert(Width && "Expected non-zero bitfield width");
SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
LHS.getOperand(0), Bitfield,
DAG.getConstant(LSB, MVT::i64),
DAG.getConstant(Width, MVT::i64));
// Mask is trivial
if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
return BFI;
return DAG.getNode(ISD::AND, DL, VT, BFI,
DAG.getConstant(LHSMask | RHSMask, VT));
}
/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
/// original input. This is surprisingly common because SROA splits things up
/// into i8 chunks, so the originally detected MaskedBFI may actually only act
/// on the low (say) byte of a word. This is then orred into the rest of the
/// word afterwards.
///
/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
///
/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
/// MaskedBFI. We can also deal with a certain amount of extend/truncate being
/// involved.
static SDValue tryCombineToLargerBFI(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
EVT VT = N->getValueType(0);
// First job is to hunt for a MaskedBFI on either the left or right. Swap
// operands if it's actually on the right.
SDValue BFI;
SDValue PossExtraMask;
uint64_t ExistingMask = 0;
bool Extended = false;
if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
PossExtraMask = N->getOperand(1);
else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
PossExtraMask = N->getOperand(0);
else
return SDValue();
// We can only combine a BFI with another compatible mask.
if (PossExtraMask.getOpcode() != ISD::AND ||
!isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
return SDValue();
uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
// Masks must be compatible.
if (ExtraMask & ExistingMask)
return SDValue();
SDValue OldBFIVal = BFI.getOperand(0);
SDValue NewBFIVal = BFI.getOperand(1);
if (Extended) {
// We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
// 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
// need to be made compatible.
assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
&& "Invalid types for BFI");
OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
}
// We need the MaskedBFI to be combined with a mask of the *same* value.
if (PossExtraMask.getOperand(0) != OldBFIVal)
return SDValue();
BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
OldBFIVal, NewBFIVal,
BFI.getOperand(2), BFI.getOperand(3));
// If the masking is trivial, we don't need to create it.
if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
return BFI;
return DAG.getNode(ISD::AND, DL, VT, BFI,
DAG.getConstant(ExtraMask | ExistingMask, VT));
}
/// An EXTR instruction is made up of two shifts, ORed together. This helper
/// searches for and classifies those shifts.
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
bool &FromHi) {
if (N.getOpcode() == ISD::SHL)
FromHi = false;
else if (N.getOpcode() == ISD::SRL)
FromHi = true;
else
return false;
if (!isa<ConstantSDNode>(N.getOperand(1)))
return false;
ShiftAmount = N->getConstantOperandVal(1);
Src = N->getOperand(0);
return true;
}
/// EXTR instruction extracts a contiguous chunk of bits from two existing
/// registers viewed as a high/low pair. This function looks for the pattern:
/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
/// EXTR. Can't quite be done in TableGen because the two immediates aren't
/// independent.
static SDValue tryCombineToEXTR(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
EVT VT = N->getValueType(0);
assert(N->getOpcode() == ISD::OR && "Unexpected root");
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
SDValue LHS;
uint32_t ShiftLHS = 0;
bool LHSFromHi = 0;
if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
return SDValue();
SDValue RHS;
uint32_t ShiftRHS = 0;
bool RHSFromHi = 0;
if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
return SDValue();
// If they're both trying to come from the high part of the register, they're
// not really an EXTR.
if (LHSFromHi == RHSFromHi)
return SDValue();
if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
return SDValue();
if (LHSFromHi) {
std::swap(LHS, RHS);
std::swap(ShiftLHS, ShiftRHS);
}
return DAG.getNode(AArch64ISD::EXTR, DL, VT,
LHS, RHS,
DAG.getConstant(ShiftRHS, MVT::i64));
}
/// Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
EVT VT = N->getValueType(0);
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
// Attempt to recognise bitfield-insert operations.
SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
if (Res.getNode())
return Res;
// Attempt to combine an existing MaskedBFI operation into one with a larger
// mask.
Res = tryCombineToLargerBFI(N, DCI, Subtarget);
if (Res.getNode())
return Res;
Res = tryCombineToEXTR(N, DCI);
if (Res.getNode())
return Res;
if (!Subtarget->hasNEON())
return SDValue();
// Attempt to use vector immediate-form BSL
// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND)
return SDValue();
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() != ISD::AND)
return SDValue();
if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
APInt SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
APInt SplatBits0;
if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
HasAnyUndefs) &&
!HasAnyUndefs) {
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
APInt SplatBits1;
if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
HasAnyUndefs) &&
!HasAnyUndefs && SplatBits0 == ~SplatBits1) {
// Canonicalize the vector type to make instruction selection simpler.
EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8;
SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT,
N0->getOperand(1), N0->getOperand(0),
N1->getOperand(0));
return DAG.getNode(ISD::BITCAST, DL, VT, Result);
}
}
}
return SDValue();
}
/// Target-specific dag combine xforms for ISD::SRA
static SDValue PerformSRACombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
EVT VT = N->getValueType(0);
// We're looking for an SRA/SHL pair which form an SBFX.
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
if (!isa<ConstantSDNode>(N->getOperand(1)))
return SDValue();
uint64_t ExtraSignBits = N->getConstantOperandVal(1);
SDValue Shift = N->getOperand(0);
if (Shift.getOpcode() != ISD::SHL)
return SDValue();
if (!isa<ConstantSDNode>(Shift->getOperand(1)))
return SDValue();
uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
return SDValue();
return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
DAG.getConstant(LSB, MVT::i64),
DAG.getConstant(LSB + Width - 1, MVT::i64));
}
/// Check if this is a valid build_vector for the immediate operand of
/// a vector shift operation, where all the elements of the build_vector
/// must have the same constant integer value.
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
// Ignore bit_converts.
while (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
HasAnyUndefs, ElementBits) ||
SplatBitSize > ElementBits)
return false;
Cnt = SplatBits.getSExtValue();
return true;
}
/// Check if this is a valid build_vector for the immediate operand of
/// a vector shift left operation. That value must be in the range:
/// 0 <= Value < ElementBits
static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && Cnt < ElementBits);
}
/// Check if this is a valid build_vector for the immediate operand of a
/// vector shift right operation. The value must be in the range:
/// 1 <= Value <= ElementBits
static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 1 && Cnt <= ElementBits);
}
/// Checks for immediate versions of vector shifts and lowers them.
static SDValue PerformShiftCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *ST) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
return PerformSRACombine(N, DCI);
// Nothing to be done for scalar shifts.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
assert(ST->hasNEON() && "unexpected vector shift");
int64_t Cnt;
switch (N->getOpcode()) {
default:
llvm_unreachable("unexpected shift opcode");
case ISD::SHL:
if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
SDValue RHS =
DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
DAG.getConstant(Cnt, MVT::i32));
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
}
break;
case ISD::SRA:
case ISD::SRL:
if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
SDValue RHS =
DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
DAG.getConstant(Cnt, MVT::i32));
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
}
break;
}
return SDValue();
}
/// ARM-specific DAG combining for intrinsics.
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IntNo) {
default:
// Don't do anything for most intrinsics.
break;
case Intrinsic::arm_neon_vqshifts:
case Intrinsic::arm_neon_vqshiftu:
EVT VT = N->getOperand(1).getValueType();
int64_t Cnt;
if (!isVShiftLImm(N->getOperand(2), VT, Cnt))
break;
unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts)
? AArch64ISD::NEON_QSHLs
: AArch64ISD::NEON_QSHLu;
return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
}
return SDValue();
}
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
static SDValue CombineBaseUpdate(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
SDValue Addr = N->getOperand(AddrOpIdx);
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
if (User->getOpcode() != ISD::ADD ||
UI.getUse().getResNo() != Addr.getResNo())
continue;
// Check that the add is independent of the load/store. Otherwise, folding
// it would create a cycle.
if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
continue;
// Find the new opcode for the updating load/store.
bool isLoad = true;
bool isLaneOp = false;
unsigned NewOpc = 0;
unsigned NumVecs = 0;
if (isIntrinsic) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
default: llvm_unreachable("unexpected intrinsic for Neon base update");
case Intrinsic::arm_neon_vld1: NewOpc = AArch64ISD::NEON_LD1_UPD;
NumVecs = 1; break;
case Intrinsic::arm_neon_vld2: NewOpc = AArch64ISD::NEON_LD2_UPD;
NumVecs = 2; break;
case Intrinsic::arm_neon_vld3: NewOpc = AArch64ISD::NEON_LD3_UPD;
NumVecs = 3; break;
case Intrinsic::arm_neon_vld4: NewOpc = AArch64ISD::NEON_LD4_UPD;
NumVecs = 4; break;
case Intrinsic::arm_neon_vst1: NewOpc = AArch64ISD::NEON_ST1_UPD;
NumVecs = 1; isLoad = false; break;
case Intrinsic::arm_neon_vst2: NewOpc = AArch64ISD::NEON_ST2_UPD;
NumVecs = 2; isLoad = false; break;
case Intrinsic::arm_neon_vst3: NewOpc = AArch64ISD::NEON_ST3_UPD;
NumVecs = 3; isLoad = false; break;
case Intrinsic::arm_neon_vst4: NewOpc = AArch64ISD::NEON_ST4_UPD;
NumVecs = 4; isLoad = false; break;
case Intrinsic::aarch64_neon_vld1x2: NewOpc = AArch64ISD::NEON_LD1x2_UPD;
NumVecs = 2; break;
case Intrinsic::aarch64_neon_vld1x3: NewOpc = AArch64ISD::NEON_LD1x3_UPD;
NumVecs = 3; break;
case Intrinsic::aarch64_neon_vld1x4: NewOpc = AArch64ISD::NEON_LD1x4_UPD;
NumVecs = 4; break;
case Intrinsic::aarch64_neon_vst1x2: NewOpc = AArch64ISD::NEON_ST1x2_UPD;
NumVecs = 2; isLoad = false; break;
case Intrinsic::aarch64_neon_vst1x3: NewOpc = AArch64ISD::NEON_ST1x3_UPD;
NumVecs = 3; isLoad = false; break;
case Intrinsic::aarch64_neon_vst1x4: NewOpc = AArch64ISD::NEON_ST1x4_UPD;
NumVecs = 4; isLoad = false; break;
case Intrinsic::arm_neon_vld2lane: NewOpc = AArch64ISD::NEON_LD2LN_UPD;
NumVecs = 2; isLaneOp = true; break;
case Intrinsic::arm_neon_vld3lane: NewOpc = AArch64ISD::NEON_LD3LN_UPD;
NumVecs = 3; isLaneOp = true; break;
case Intrinsic::arm_neon_vld4lane: NewOpc = AArch64ISD::NEON_LD4LN_UPD;
NumVecs = 4; isLaneOp = true; break;
case Intrinsic::arm_neon_vst2lane: NewOpc = AArch64ISD::NEON_ST2LN_UPD;
NumVecs = 2; isLoad = false; isLaneOp = true; break;
case Intrinsic::arm_neon_vst3lane: NewOpc = AArch64ISD::NEON_ST3LN_UPD;
NumVecs = 3; isLoad = false; isLaneOp = true; break;
case Intrinsic::arm_neon_vst4lane: NewOpc = AArch64ISD::NEON_ST4LN_UPD;
NumVecs = 4; isLoad = false; isLaneOp = true; break;
}
} else {
isLaneOp = true;
switch (N->getOpcode()) {
default: llvm_unreachable("unexpected opcode for Neon base update");
case AArch64ISD::NEON_LD2DUP: NewOpc = AArch64ISD::NEON_LD2DUP_UPD;
NumVecs = 2; break;
case AArch64ISD::NEON_LD3DUP: NewOpc = AArch64ISD::NEON_LD3DUP_UPD;
NumVecs = 3; break;
case AArch64ISD::NEON_LD4DUP: NewOpc = AArch64ISD::NEON_LD4DUP_UPD;
NumVecs = 4; break;
}
}
// Find the size of memory referenced by the load/store.
EVT VecTy;
if (isLoad)
VecTy = N->getValueType(0);
else
VecTy = N->getOperand(AddrOpIdx + 1).getValueType();
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (isLaneOp)
NumBytes /= VecTy.getVectorNumElements();
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
uint32_t IncVal = CInc->getZExtValue();
if (IncVal != NumBytes)
continue;
Inc = DAG.getTargetConstant(IncVal, MVT::i32);
}
// Create the new updating load/store node.
EVT Tys[6];
unsigned NumResultVecs = (isLoad ? NumVecs : 0);
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
Tys[n] = VecTy;
Tys[n++] = MVT::i64;
Tys[n] = MVT::Other;
SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs + 2);
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
Ops.push_back(Inc);
for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
Ops.push_back(N->getOperand(i));
}
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
Ops.data(), Ops.size(),
MemInt->getMemoryVT(),
MemInt->getMemOperand());
// Update the uses.
std::vector<SDValue> NewResults;
for (unsigned i = 0; i < NumResultVecs; ++i) {
NewResults.push_back(SDValue(UpdN.getNode(), i));
}
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
DCI.CombineTo(N, NewResults);
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
break;
}
return SDValue();
}
/// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1)
/// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs.
/// If so, combine them to a vldN-dup operation and return true.
static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
// Check if the VDUPLANE operand is a vldN-dup intrinsic.
SDNode *VLD = N->getOperand(0).getNode();
if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
return SDValue();
unsigned NumVecs = 0;
unsigned NewOpc = 0;
unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
if (IntNo == Intrinsic::arm_neon_vld2lane) {
NumVecs = 2;
NewOpc = AArch64ISD::NEON_LD2DUP;
} else if (IntNo == Intrinsic::arm_neon_vld3lane) {
NumVecs = 3;
NewOpc = AArch64ISD::NEON_LD3DUP;
} else if (IntNo == Intrinsic::arm_neon_vld4lane) {
NumVecs = 4;
NewOpc = AArch64ISD::NEON_LD4DUP;
} else {
return SDValue();
}
// First check that all the vldN-lane uses are VDUPLANEs and that the lane
// numbers match the load.
unsigned VLDLaneNo =
cast<ConstantSDNode>(VLD->getOperand(NumVecs + 3))->getZExtValue();
for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
UI != UE; ++UI) {
// Ignore uses of the chain result.
if (UI.getUse().getResNo() == NumVecs)
continue;
SDNode *User = *UI;
if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE ||
VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
return SDValue();
}
// Create the vldN-dup node.
EVT Tys[5];
unsigned n;
for (n = 0; n < NumVecs; ++n)
Tys[n] = VT;
Tys[n] = MVT::Other;
SDVTList SDTys = DAG.getVTList(Tys, NumVecs + 1);
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, 2,
VLDMemInt->getMemoryVT(),
VLDMemInt->getMemOperand());
// Update the uses.
for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
UI != UE; ++UI) {
unsigned ResNo = UI.getUse().getResNo();
// Ignore uses of the chain result.
if (ResNo == NumVecs)
continue;
SDNode *User = *UI;
DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
}
// Now the vldN-lane intrinsic is dead except for its chain result.
// Update uses of the chain.
std::vector<SDValue> VLDDupResults;
for (unsigned n = 0; n < NumVecs; ++n)
VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
DCI.CombineTo(VLD, VLDDupResults);
return SDValue(N, 0);
}
SDValue
AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
case ISD::AND: return PerformANDCombine(N, DCI);
case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
return PerformShiftCombine(N, DCI, getSubtarget());
case ISD::INTRINSIC_WO_CHAIN:
return PerformIntrinsicCombine(N, DCI.DAG);
case AArch64ISD::NEON_VDUPLANE:
return CombineVLDDUP(N, DCI);
case AArch64ISD::NEON_LD2DUP:
case AArch64ISD::NEON_LD3DUP:
case AArch64ISD::NEON_LD4DUP:
return CombineBaseUpdate(N, DCI);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
case Intrinsic::arm_neon_vld4:
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
case Intrinsic::arm_neon_vst4:
case Intrinsic::arm_neon_vld2lane:
case Intrinsic::arm_neon_vld3lane:
case Intrinsic::arm_neon_vld4lane:
case Intrinsic::aarch64_neon_vld1x2:
case Intrinsic::aarch64_neon_vld1x3:
case Intrinsic::aarch64_neon_vld1x4:
case Intrinsic::aarch64_neon_vst1x2:
case Intrinsic::aarch64_neon_vst1x3:
case Intrinsic::aarch64_neon_vst1x4:
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane:
return CombineBaseUpdate(N, DCI);
default:
break;
}
}
return SDValue();
}
bool
AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f16:
case MVT::f32:
case MVT::f64:
return true;
case MVT::f128:
return false;
default:
break;
}
return false;
}
// Check whether a Build Vector could be presented as Shuffle Vector. If yes,
// try to call LowerVECTOR_SHUFFLE to lower it.
bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
SDValue &Res) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
unsigned V0NumElts = 0;
int Mask[16];
SDValue V0, V1;
// Check if all elements are extracted from less than 3 vectors.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return false;
if (V0.getNode() == 0) {
V0 = Elt.getOperand(0);
V0NumElts = V0.getValueType().getVectorNumElements();
}
if (Elt.getOperand(0) == V0) {
Mask[i] = (cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue());
continue;
} else if (V1.getNode() == 0) {
V1 = Elt.getOperand(0);
}
if (Elt.getOperand(0) == V1) {
unsigned Lane = cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue();
Mask[i] = (Lane + V0NumElts);
continue;
} else {
return false;
}
}
if (!V1.getNode() && V0NumElts == NumElts * 2) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(NumElts, MVT::i64));
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(0, MVT::i64));
V0NumElts = V0.getValueType().getVectorNumElements();
}
if (V1.getNode() && NumElts == V0NumElts &&
V0NumElts == V1.getValueType().getVectorNumElements()) {
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
return true;
} else
return false;
}
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
SDValue
AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) const {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
SDLoc DL(Op);
EVT VT = Op.getValueType();
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
unsigned UseNeonMov = VT.getSizeInBits() >= 64;
// Note we favor lowering MOVI over MVNI.
// This has implications on the definition of patterns in TableGen to select
// BIC immediate instructions but not ORR immediate instructions.
// If this lowering order is changed, TableGen patterns for BIC immediate and
// ORR immediate instructions have to be updated.
if (UseNeonMov &&
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
// First attempt to use vector immediate-form MOVI
EVT NeonMovVT;
unsigned Imm = 0;
unsigned OpCmode = 0;
if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
SplatBitSize, DAG, VT.is128BitVector(),
Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
if (ImmVal.getNode() && OpCmodeVal.getNode()) {
SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
ImmVal, OpCmodeVal);
return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
}
}
// Then attempt to use vector immediate-form MVNI
uint64_t NegatedImm = (~SplatBits).getZExtValue();
if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
Imm, OpCmode)) {
SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
if (ImmVal.getNode() && OpCmodeVal.getNode()) {
SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
ImmVal, OpCmodeVal);
return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
}
}
// Attempt to use vector immediate-form FMOV
if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) ||
(VT == MVT::v2f64 && SplatBitSize == 64)) {
APFloat RealVal(
SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
SplatBits);
uint32_t ImmVal;
if (A64Imms::isFPImm(RealVal, ImmVal)) {
SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
}
}
}
}
unsigned NumElts = VT.getVectorNumElements();
bool isOnlyLowElement = true;
bool usesOnlyOneValue = true;
bool hasDominantValue = false;
bool isConstant = true;
// Map of the number of times a particular SDValue appears in the
// element list.
DenseMap<SDValue, unsigned> ValueCounts;
SDValue Value;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF)
continue;
if (i > 0)
isOnlyLowElement = false;
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
isConstant = false;
ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
// Is this value dominant? (takes up more than half of the lanes)
if (++Count > (NumElts / 2)) {
hasDominantValue = true;
Value = V;
}
}
if (ValueCounts.size() != 1)
usesOnlyOneValue = false;
if (!Value.getNode() && ValueCounts.size() > 0)
Value = ValueCounts.begin()->first;
if (ValueCounts.size() == 0)
return DAG.getUNDEF(VT);
// Loads are better lowered with insert_vector_elt.
// Keep going if we are hitting this case.
if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (hasDominantValue && EltSize <= 64) {
// Use VDUP for non-constant splats.
if (!isConstant) {
SDValue N;
// If we are DUPing a value that comes directly from a vector, we could
// just use DUPLANE. We can only do this if the lane being extracted
// is at a constant index, as the DUP from lane instructions only have
// constant-index forms.
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(Value->getOperand(1))) {
N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT,
Value->getOperand(0), Value->getOperand(1));
} else
N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
if (!usesOnlyOneValue) {
// The dominant value was splatted as 'N', but we now have to insert
// all differing elements.
for (unsigned I = 0; I < NumElts; ++I) {
if (Op.getOperand(I) == Value)
continue;
SmallVector<SDValue, 3> Ops;
Ops.push_back(N);
Ops.push_back(Op.getOperand(I));
Ops.push_back(DAG.getConstant(I, MVT::i64));
N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3);
}
}
return N;
}
if (usesOnlyOneValue && isConstant) {
return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
}
}
// If all elements are constants and the case above didn't get hit, fall back
// to the default expansion, which will generate a load from the constant
// pool.
if (isConstant)
return SDValue();
// Try to lower this in lowering ShuffleVector way.
SDValue Shuf;
if (isKnownShuffleVector(Op, DAG, Shuf))
return Shuf;
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
// worse. For a vector with one or two non-undef values, that's
// scalar_to_vector for the elements followed by a shuffle (provided the
// shuffle is valid for the target) and materialization element by element
// on the stack followed by a load for everything else.
if (!isConstant && !usesOnlyOneValue) {
SDValue Vec = DAG.getUNDEF(VT);
for (unsigned i = 0 ; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF)
continue;
SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
}
return Vec;
}
return SDValue();
}
/// isREVMask - Check if a vector shuffle corresponds to a REV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for REV are: 16, 32, 64");
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
// If the first shuffle index is UNDEF, be optimistic.
if (M[0] < 0)
BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
return false;
}
return true;
}
// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and
// TRN instruction.
static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts < 4)
return 0;
bool ismatch = true;
// Check UZP1
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != i * 2) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_UZP1;
// Check UZP2
ismatch = true;
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != i * 2 + 1) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_UZP2;
// Check ZIP1
ismatch = true;
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != i / 2 + NumElts * (i % 2)) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_ZIP1;
// Check ZIP2
ismatch = true;
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != (NumElts + i) / 2 + NumElts * (i % 2)) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_ZIP2;
// Check TRN1
ismatch = true;
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != i + (NumElts - 1) * (i % 2)) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_TRN1;
// Check TRN2
ismatch = true;
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != 1 + i + (NumElts - 1) * (i % 2)) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_TRN2;
return 0;
}
SDValue
AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc dl(Op);
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
// Convert shuffles that are directly supported on NEON to target-specific
// DAG nodes, instead of keeping them as shuffles and matching them again
// during code selection. This is more efficient and avoids the possibility
// of inconsistencies between legalization and selection.
ArrayRef<int> ShuffleMask = SVN->getMask();
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize > 64)
return SDValue();
if (isREVMask(ShuffleMask, VT, 64))
return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1);
if (isREVMask(ShuffleMask, VT, 32))
return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1);
if (isREVMask(ShuffleMask, VT, 16))
return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1);
unsigned ISDNo = isPermuteMask(ShuffleMask, VT);
if (ISDNo)
return DAG.getNode(ISDNo, dl, VT, V1, V2);
// If the element of shuffle mask are all the same constant, we can
// transform it into either NEON_VDUP or NEON_VDUPLANE
if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
int Lane = SVN->getSplatIndex();
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1) Lane = 0;
// Test if V1 is a SCALAR_TO_VECTOR.
if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
}
// Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
if (V1.getOpcode() == ISD::BUILD_VECTOR) {
bool IsScalarToVector = true;
for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
i != (unsigned)Lane) {
IsScalarToVector = false;
break;
}
if (IsScalarToVector)
return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
V1.getOperand(Lane));
}
// Test if V1 is a EXTRACT_SUBVECTOR.
if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
int ExtLane = cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0),
DAG.getConstant(Lane + ExtLane, MVT::i64));
}
// Test if V1 is a CONCAT_VECTORS.
if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
V1.getOperand(1).getOpcode() == ISD::UNDEF) {
SDValue Op0 = V1.getOperand(0);
assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() &&
"Invalid vector lane access");
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0,
DAG.getConstant(Lane, MVT::i64));
}
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
DAG.getConstant(Lane, MVT::i64));
}
int Length = ShuffleMask.size();
int V1EltNum = V1.getValueType().getVectorNumElements();
// If the number of v1 elements is the same as the number of shuffle mask
// element and the shuffle masks are sequential values, we can transform
// it into NEON_VEXTRACT.
if (V1EltNum == Length) {
// Check if the shuffle mask is sequential.
bool IsSequential = true;
int CurMask = ShuffleMask[0];
for (int I = 0; I < Length; ++I) {
if (ShuffleMask[I] != CurMask) {
IsSequential = false;
break;
}
CurMask++;
}
if (IsSequential) {
assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
unsigned VecSize = EltSize * V1EltNum;
unsigned Index = (EltSize/8) * ShuffleMask[0];
if (VecSize == 64 || VecSize == 128)
return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
DAG.getConstant(Index, MVT::i64));
}
}
// For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
// by element from V2 to V1 .
// If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
// better choice to be inserted than V1 as less insert needed, so we count
// element to be inserted for both V1 and V2, and select less one as insert
// target.
// Collect elements need to be inserted and their index.
SmallVector<int, 8> NV1Elt;
SmallVector<int, 8> N1Index;
SmallVector<int, 8> NV2Elt;
SmallVector<int, 8> N2Index;
for (int I = 0; I != Length; ++I) {
if (ShuffleMask[I] != I) {
NV1Elt.push_back(ShuffleMask[I]);
N1Index.push_back(I);
}
}
for (int I = 0; I != Length; ++I) {
if (ShuffleMask[I] != (I + V1EltNum)) {
NV2Elt.push_back(ShuffleMask[I]);
N2Index.push_back(I);
}
}
// Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
// will be inserted.
SDValue InsV = V1;
SmallVector<int, 8> InsMasks = NV1Elt;
SmallVector<int, 8> InsIndex = N1Index;
if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
if (NV1Elt.size() > NV2Elt.size()) {
InsV = V2;
InsMasks = NV2Elt;
InsIndex = N2Index;
}
} else {
InsV = DAG.getNode(ISD::UNDEF, dl, VT);
}
for (int I = 0, E = InsMasks.size(); I != E; ++I) {
SDValue ExtV = V1;
int Mask = InsMasks[I];
if (Mask >= V1EltNum) {
ExtV = V2;
Mask -= V1EltNum;
}
// Any value type smaller than i32 is illegal in AArch64, and this lower
// function is called after legalize pass, so we need to legalize
// the result here.
EVT EltVT;
if (VT.getVectorElementType().isFloatingPoint())
EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
else
EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
if (Mask >= 0) {
ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
DAG.getConstant(Mask, MVT::i64));
InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
DAG.getConstant(InsIndex[I], MVT::i64));
}
}
return InsV;
}
AArch64TargetLowering::ConstraintType
AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
case 'w': // An FP/SIMD vector register
return C_RegisterClass;
case 'I': // Constant that can be used with an ADD instruction
case 'J': // Constant that can be used with a SUB instruction
case 'K': // Constant that can be used with a 32-bit logical instruction
case 'L': // Constant that can be used with a 64-bit logical instruction
case 'M': // Constant that can be used as a 32-bit MOV immediate
case 'N': // Constant that can be used as a 64-bit MOV immediate
case 'Y': // Floating point constant zero
case 'Z': // Integer constant zero
return C_Other;
case 'Q': // A memory reference with base register and no offset
return C_Memory;
case 'S': // A symbolic address
return C_Other;
}
}
// FIXME: Ump, Utf, Usa, Ush
// Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
// whatever they may be
// Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
// Usa: An absolute symbolic address
// Ush: The high part (bits 32:12) of a pc-relative symbolic address
assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
&& Constraint != "Ush" && "Unimplemented constraints");
return TargetLowering::getConstraintType(Constraint);
}
TargetLowering::ConstraintWeight
AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
const char *Constraint) const {
llvm_unreachable("Constraint weight unimplemented");
}
void
AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
// Only length 1 constraints are C_Other.
if (Constraint.size() != 1) return;
// Only C_Other constraints get lowered like this. That means constants for us
// so return early if there's no hope the constraint can be lowered.
switch(Constraint[0]) {
default: break;
case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'Z': {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
if (!C)
return;
uint64_t CVal = C->getZExtValue();
uint32_t Bits;
switch (Constraint[0]) {
default:
// FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
// is a peculiarly useless SUB constraint.
llvm_unreachable("Unimplemented C_Other constraint");
case 'I':
if (CVal <= 0xfff)
break;
return;
case 'K':
if (A64Imms::isLogicalImm(32, CVal, Bits))
break;
return;
case 'L':
if (A64Imms::isLogicalImm(64, CVal, Bits))
break;
return;
case 'Z':
if (CVal == 0)
break;
return;
}
Result = DAG.getTargetConstant(CVal, Op.getValueType());
break;
}
case 'S': {
// An absolute symbolic address or label reference.
if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
GA->getValueType(0));
} else if (const BlockAddressSDNode *BA
= dyn_cast<BlockAddressSDNode>(Op)) {
Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
BA->getValueType(0));
} else if (const ExternalSymbolSDNode *ES
= dyn_cast<ExternalSymbolSDNode>(Op)) {
Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
ES->getValueType(0));
} else
return;
break;
}
case 'Y':
if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
if (CFP->isExactlyValue(0.0)) {
Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
break;
}
}
return;
}
if (Result.getNode()) {
Ops.push_back(Result);
return;
}
// It's an unknown constraint for us. Let generic code have a go.
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
std::pair<unsigned, const TargetRegisterClass*>
AArch64TargetLowering::getRegForInlineAsmConstraint(
const std::string &Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
if (VT.getSizeInBits() <= 32)
return std::make_pair(0U, &AArch64::GPR32RegClass);
else if (VT == MVT::i64)
return std::make_pair(0U, &AArch64::GPR64RegClass);
break;
case 'w':
if (VT == MVT::f16)
return std::make_pair(0U, &AArch64::FPR16RegClass);
else if (VT == MVT::f32)
return std::make_pair(0U, &AArch64::FPR32RegClass);
else if (VT.getSizeInBits() == 64)
return std::make_pair(0U, &AArch64::FPR64RegClass);
else if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &AArch64::FPR128RegClass);
break;
}
}
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
/// Represent NEON load and store intrinsics as MemIntrinsicNodes.
/// The associated MachineMemOperands record the alignment specified
/// in the intrinsic calls.
bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
switch (Intrinsic) {
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
case Intrinsic::arm_neon_vld4:
case Intrinsic::aarch64_neon_vld1x2:
case Intrinsic::aarch64_neon_vld1x3:
case Intrinsic::aarch64_neon_vld1x4:
case Intrinsic::arm_neon_vld2lane:
case Intrinsic::arm_neon_vld3lane:
case Intrinsic::arm_neon_vld4lane: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
Info.vol = false; // volatile loads with NEON intrinsics not supported
Info.readMem = true;
Info.writeMem = false;
return true;
}
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
case Intrinsic::arm_neon_vst4:
case Intrinsic::aarch64_neon_vst1x2:
case Intrinsic::aarch64_neon_vst1x3:
case Intrinsic::aarch64_neon_vst1x4:
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
Info.vol = false; // volatile stores with NEON intrinsics not supported
Info.readMem = false;
Info.writeMem = true;
return true;
}
default:
break;
}
return false;
}
Index: head/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td (revision 265924)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td (revision 265925)
@@ -1,5193 +1,5194 @@
//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the AArch64 scalar instructions in TableGen format.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "neon">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto","crypto">;
// Use fused MAC if more precision in FP computation is allowed.
def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
" FPOpFusion::Fast)">;
include "AArch64InstrFormats.td"
//===----------------------------------------------------------------------===//
// Target-specific ISD nodes and profiles
//===----------------------------------------------------------------------===//
def SDT_A64ret : SDTypeProfile<0, 0, []>;
def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain,
SDNPOptInGlue,
SDNPVariadic]>;
// (ins NZCV, Condition, Dest)
def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>;
def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>;
// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition)
def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<2, 3>]>;
def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>;
// (outs NZCV), (ins LHS, RHS, Condition)
def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
SDTCisSameAs<1, 2>]>;
def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>;
// (outs GPR64), (ins)
def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
// A64 compares don't care about the cond really (they set all flags) so a
// simple binary operator is useful.
def A64cmp : PatFrag<(ops node:$lhs, node:$rhs),
(A64setcc node:$lhs, node:$rhs, cond)>;
// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN
// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C
// and V flags can be set differently by this operation. It comes down to
// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are
// then everything is fine. If not then the optimization is wrong. Thus general
// comparisons are only valid if op2 != 0.
// So, finally, the only LLVM-native comparisons that don't mention C and V are
// SETEQ and SETNE. They're the only ones we can safely use CMN for in the
// absence of information about op2.
def equality_cond : PatLeaf<(cond), [{
return N->get() == ISD::SETEQ || N->get() == ISD::SETNE;
}]>;
def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
(A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>;
// There are two layers of indirection here, driven by the following
// considerations.
// + TableGen does not know CodeModel or Reloc so that decision should be
// made for a variable/address at ISelLowering.
// + The output of ISelLowering should be selectable (hence the Wrapper,
// rather than a bare target opcode)
def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisSameAs<0, 4>,
SDTCisPtrTy<0>]>;
def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>;
def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>,
SDTCisVT<3, i32>,
SDTCisPtrTy<0>]>;
def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>;
def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad,
[SDNPHasChain]>;
// (A64BFI LHS, RHS, LSB, Width)
def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>,
SDTCisVT<3, i64>,
SDTCisVT<4, i64>]>;
def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>;
// (A64EXTR HiReg, LoReg, LSB)
def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i64>]>;
def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
// (A64[SU]BFX Field, ImmR, ImmS).
//
// Note that ImmR and ImmS are already encoded for the actual instructions. The
// more natural LSB and Width mix together to form ImmR and ImmS, something
// which TableGen can't handle.
def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
//===----------------------------------------------------------------------===//
// Call sequence pseudo-instructions
//===----------------------------------------------------------------------===//
def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
// The TLSDESCCALL node is a variant call which goes to an indirectly calculated
// destination but needs a relocation against a fixed symbol. As such it has two
// certain operands: the callee and the relocated variable.
//
// The TLS ABI only allows it to be selected to a BLR instructin (with
// appropriate relocation).
def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall,
[SDNPInGlue, SDNPOutGlue, SDNPHasChain,
SDNPVariadic]>;
def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>;
def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>;
def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
// These pseudo-instructions have special semantics by virtue of being passed to
// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by
// LowerCall to (in our case) tell the back-end about stack adjustments for
// arguments passed on the stack. Here we select those markers to
// pseudo-instructions which explicitly set the stack, and finally in the
// RegisterInfo we convert them to a true stack adjustment.
let Defs = [XSP], Uses = [XSP] in {
def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt),
[(AArch64callseq_start timm:$amt)]>;
def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2),
[(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
}
//===----------------------------------------------------------------------===//
// Atomic operation pseudo-instructions
//===----------------------------------------------------------------------===//
// These get selected from C++ code as a pretty much direct translation from the
// generic DAG nodes. The one exception is the AtomicOrdering is added as an
// operand so that the eventual lowering can make use of it and choose
// acquire/release operations when required.
let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
multiclass AtomicSizes {
def _I8 : PseudoInst<(outs GPR32:$dst),
(ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I16 : PseudoInst<(outs GPR32:$dst),
(ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I32 : PseudoInst<(outs GPR32:$dst),
(ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I64 : PseudoInst<(outs GPR64:$dst),
(ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
}
}
defm ATOMIC_LOAD_ADD : AtomicSizes;
defm ATOMIC_LOAD_SUB : AtomicSizes;
defm ATOMIC_LOAD_AND : AtomicSizes;
defm ATOMIC_LOAD_OR : AtomicSizes;
defm ATOMIC_LOAD_XOR : AtomicSizes;
defm ATOMIC_LOAD_NAND : AtomicSizes;
defm ATOMIC_SWAP : AtomicSizes;
let Defs = [NZCV] in {
// These operations need a CMP to calculate the correct value
defm ATOMIC_LOAD_MIN : AtomicSizes;
defm ATOMIC_LOAD_MAX : AtomicSizes;
defm ATOMIC_LOAD_UMIN : AtomicSizes;
defm ATOMIC_LOAD_UMAX : AtomicSizes;
}
class AtomicCmpSwap<RegisterClass GPRData>
: PseudoInst<(outs GPRData:$dst),
(ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new,
i32imm:$ordering), []> {
let usesCustomInserter = 1;
let hasCtrlDep = 1;
let mayLoad = 1;
let mayStore = 1;
let Defs = [NZCV];
}
def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
//===----------------------------------------------------------------------===//
// Add-subtract (extended register) instructions
//===----------------------------------------------------------------------===//
// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP
// The RHS of these operations is conceptually a sign/zero-extended
// register, optionally shifted left by 1-4. The extension can be a
// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but
// must be specified with one exception:
// If one of the registers is sp/wsp then LSL is an alias for UXTW in
// 32-bit instructions and UXTX in 64-bit versions, the shift amount
// is not optional in that case (but can explicitly be 0), and the
// entire suffix can be skipped (e.g. "add sp, x3, x2").
multiclass extend_operands<string PREFIX, string Diag> {
def _asmoperand : AsmOperandClass {
let Name = PREFIX;
let RenderMethod = "addRegExtendOperands";
let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">";
let DiagnosticType = "AddSubRegExtend" # Diag;
}
def _operand : Operand<i64>,
ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> {
let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">";
let DecoderMethod = "DecodeRegExtendOperand";
let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand");
}
}
defm UXTB : extend_operands<"UXTB", "Small">;
defm UXTH : extend_operands<"UXTH", "Small">;
defm UXTW : extend_operands<"UXTW", "Small">;
defm UXTX : extend_operands<"UXTX", "Large">;
defm SXTB : extend_operands<"SXTB", "Small">;
defm SXTH : extend_operands<"SXTH", "Small">;
defm SXTW : extend_operands<"SXTW", "Small">;
defm SXTX : extend_operands<"SXTX", "Large">;
def LSL_extasmoperand : AsmOperandClass {
let Name = "RegExtendLSL";
let RenderMethod = "addRegExtendOperands";
let DiagnosticType = "AddSubRegExtendLarge";
}
def LSL_extoperand : Operand<i64> {
let ParserMatchClass = LSL_extasmoperand;
}
// The patterns for various sign-extensions are a little ugly and
// non-uniform because everything has already been promoted to the
// legal i64 and i32 types. We'll wrap the various variants up in a
// class for use later.
class extend_types {
dag uxtb; dag uxth; dag uxtw; dag uxtx;
dag sxtb; dag sxth; dag sxtw; dag sxtx;
ValueType ty;
RegisterClass GPR;
}
def extends_to_i64 : extend_types {
let uxtb = (and (anyext i32:$Rm), 255);
let uxth = (and (anyext i32:$Rm), 65535);
let uxtw = (zext i32:$Rm);
let uxtx = (i64 $Rm);
let sxtb = (sext_inreg (anyext i32:$Rm), i8);
let sxth = (sext_inreg (anyext i32:$Rm), i16);
let sxtw = (sext i32:$Rm);
let sxtx = (i64 $Rm);
let ty = i64;
let GPR = GPR64xsp;
}
def extends_to_i32 : extend_types {
let uxtb = (and i32:$Rm, 255);
let uxth = (and i32:$Rm, 65535);
let uxtw = (i32 i32:$Rm);
let uxtx = (i32 i32:$Rm);
let sxtb = (sext_inreg i32:$Rm, i8);
let sxth = (sext_inreg i32:$Rm, i16);
let sxtw = (i32 i32:$Rm);
let sxtx = (i32 i32:$Rm);
let ty = i32;
let GPR = GPR32wsp;
}
// Now, six of the extensions supported are easy and uniform: if the source size
// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate
// those instructions in one block.
// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me:
// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would
// be impossible.
// + Patterns are very different as well.
// + Passing different registers would be ugly (more fields in extend_types
// would probably be the best option).
multiclass addsub_exts<bit sf, bit op, bit S, string asmop,
SDPatternOperator opfrag,
dag outs, extend_types exts> {
def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000,
outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
!strconcat(asmop, "$Rn, $Rm, $Imm3"),
[(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
NoItinerary>;
def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
!strconcat(asmop, "$Rn, $Rm, $Imm3"),
[(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
NoItinerary>;
def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
!strconcat(asmop, "$Rn, $Rm, $Imm3"),
[(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
NoItinerary>;
def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
!strconcat(asmop, "$Rn, $Rm, $Imm3"),
[(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
NoItinerary>;
def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
!strconcat(asmop, "$Rn, $Rm, $Imm3"),
[(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
NoItinerary>;
def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
!strconcat(asmop, "$Rn, $Rm, $Imm3"),
[(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
NoItinerary>;
}
// These two could be merge in with the above, but their patterns aren't really
// necessary and the naming-scheme would necessarily break:
multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag,
dag outs> {
def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011,
outs,
(ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
!strconcat(asmop, "$Rn, $Rm, $Imm3"),
[(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))],
NoItinerary>;
def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
outs,
(ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3),
!strconcat(asmop, "$Rn, $Rm, $Imm3"),
[/* No Pattern: same as uxtx */],
NoItinerary>;
}
multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> {
def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011,
outs,
(ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
!strconcat(asmop, "$Rn, $Rm, $Imm3"),
[/* No pattern: probably same as uxtw */],
NoItinerary>;
def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111,
outs,
(ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
!strconcat(asmop, "$Rn, $Rm, $Imm3"),
[/* No Pattern: probably same as uxtw */],
NoItinerary>;
}
class SetRD<RegisterClass RC, SDPatternOperator op>
: PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>;
class SetNZCV<SDPatternOperator op>
: PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>;
defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
(outs GPR64xsp:$Rd), extends_to_i64>,
addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
(outs GPR64xsp:$Rd)>;
defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>,
(outs GPR32wsp:$Rd), extends_to_i32>,
addsub_wxtx< 0b0, 0b0, "add\t$Rd, ",
(outs GPR32wsp:$Rd)>;
defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
(outs GPR64xsp:$Rd), extends_to_i64>,
addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
(outs GPR64xsp:$Rd)>;
defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>,
(outs GPR32wsp:$Rd), extends_to_i32>,
addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ",
(outs GPR32wsp:$Rd)>;
let Defs = [NZCV] in {
defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
(outs GPR64:$Rd), extends_to_i64>,
addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
(outs GPR64:$Rd)>;
defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>,
(outs GPR32:$Rd), extends_to_i32>,
addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ",
(outs GPR32:$Rd)>;
defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
(outs GPR64:$Rd), extends_to_i64>,
addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
(outs GPR64:$Rd)>;
defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
(outs GPR32:$Rd), extends_to_i32>,
addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ",
(outs GPR32:$Rd)>;
let Rd = 0b11111, isCompare = 1 in {
defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
(outs), extends_to_i64>,
addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
(outs), extends_to_i32>,
addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>;
defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
(outs), extends_to_i64>,
addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>;
defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
(outs), extends_to_i32>,
addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>;
}
}
// Now patterns for the operation without a shift being needed. No patterns are
// created for uxtx/sxtx since they're non-uniform and it's expected that
// add/sub (shifted register) will handle those cases anyway.
multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop,
extend_types exts> {
def : Pat<(nodeop exts.ty:$Rn, exts.uxtb),
(!cast<Instruction>(prefix # "w_uxtb") $Rn, $Rm, 0)>;
def : Pat<(nodeop exts.ty:$Rn, exts.uxth),
(!cast<Instruction>(prefix # "w_uxth") $Rn, $Rm, 0)>;
def : Pat<(nodeop exts.ty:$Rn, exts.uxtw),
(!cast<Instruction>(prefix # "w_uxtw") $Rn, $Rm, 0)>;
def : Pat<(nodeop exts.ty:$Rn, exts.sxtb),
(!cast<Instruction>(prefix # "w_sxtb") $Rn, $Rm, 0)>;
def : Pat<(nodeop exts.ty:$Rn, exts.sxth),
(!cast<Instruction>(prefix # "w_sxth") $Rn, $Rm, 0)>;
def : Pat<(nodeop exts.ty:$Rn, exts.sxtw),
(!cast<Instruction>(prefix # "w_sxtw") $Rn, $Rm, 0)>;
}
defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>;
defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>;
defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>;
defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>;
defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>;
defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>;
defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>;
defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>;
// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is
// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the
// operation. Also permitted in this case is complete omission of the argument,
// which implies "lsl #0".
multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd,
RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
(inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"),
(inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
}
defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>;
defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>;
defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>;
defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>;
// Rd cannot be sp for flag-setting variants so only half of the aliases are
// needed.
defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>;
defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>;
defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>;
defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>;
// CMP unfortunately has to be different because the instruction doesn't have a
// dest register.
multiclass cmp_lsl_aliases<string asmop, Instruction inst,
RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
def : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
(inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"),
(inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
}
defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>;
defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>;
defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>;
defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>;
//===----------------------------------------------------------------------===//
// Add-subtract (immediate) instructions
//===----------------------------------------------------------------------===//
// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV
// These instructions accept a 12-bit unsigned immediate, optionally shifted
// left by 12 bits. Official assembly format specifies a 12 bit immediate with
// one of "", "LSL #0", "LSL #12" supplementary operands.
// There are surprisingly few ways to make this work with TableGen, so this
// implementation has separate instructions for the "LSL #0" and "LSL #12"
// variants.
// If the MCInst retained a single combined immediate (which could be 0x123000,
// for example) then both components (imm & shift) would have to be delegated to
// a single assembly operand. This would entail a separate operand parser
// (because the LSL would have to live in the same AArch64Operand as the
// immediate to be accessible); assembly parsing is rather complex and
// error-prone C++ code.
//
// By splitting the immediate, we can delegate handling this optional operand to
// an InstAlias. Supporting functions to generate the correct MCInst are still
// required, but these are essentially trivial and parsing can remain generic.
//
// Rejected plans with rationale:
// ------------------------------
//
// In an ideal world you'de have two first class immediate operands (in
// InOperandList, specifying imm12 and shift). Unfortunately this is not
// selectable by any means I could discover.
//
// An Instruction with two MCOperands hidden behind a single entry in
// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional,
// but required more C++ code to handle encoding/decoding. Parsing (the intended
// main beneficiary) ended up equally complex because of the optional nature of
// "LSL #0".
//
// Attempting to circumvent the need for a custom OperandParser above by giving
// InstAliases without the "lsl #0" failed. add/sub could be accommodated but
// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands
// should be parsed: there was no way to accommodate an "lsl #12".
let ParserMethod = "ParseImmWithLSLOperand",
RenderMethod = "addImmWithLSLOperands" in {
// Derived PredicateMethod fields are different for each
def addsubimm_lsl0_asmoperand : AsmOperandClass {
let Name = "AddSubImmLSL0";
// If an error is reported against this operand, instruction could also be a
// register variant.
let DiagnosticType = "AddSubSecondSource";
}
def addsubimm_lsl12_asmoperand : AsmOperandClass {
let Name = "AddSubImmLSL12";
let DiagnosticType = "AddSubSecondSource";
}
}
def shr_12_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32);
}]>;
def shr_12_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32);
}]>;
def neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32);
}]>;
multiclass addsub_imm_operands<ValueType ty> {
let PrintMethod = "printAddSubImmLSL0Operand",
EncoderMethod = "getAddSubImmOpValue",
ParserMatchClass = addsubimm_lsl0_asmoperand in {
def _posimm_lsl0 : Operand<ty>,
ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>;
def _negimm_lsl0 : Operand<ty>,
ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }],
neg_XFORM>;
}
let PrintMethod = "printAddSubImmLSL12Operand",
EncoderMethod = "getAddSubImmOpValue",
ParserMatchClass = addsubimm_lsl12_asmoperand in {
def _posimm_lsl12 : Operand<ty>,
ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }],
shr_12_XFORM>;
def _negimm_lsl12 : Operand<ty>,
ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }],
shr_12_neg_XFORM>;
}
}
// The add operands don't need any transformation
defm addsubimm_operand_i32 : addsub_imm_operands<i32>;
defm addsubimm_operand_i64 : addsub_imm_operands<i64>;
multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
string asmop, string cmpasmop,
Operand imm_operand, Operand cmp_imm_operand,
RegisterClass GPR, RegisterClass GPRsp,
AArch64Reg ZR, ValueType Ty> {
// All registers for non-S variants allow SP
def _s : A64I_addsubimm<sf, op, 0b0, shift,
(outs GPRsp:$Rd),
(ins GPRsp:$Rn, imm_operand:$Imm12),
!strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
[(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))],
NoItinerary>;
// S variants can read SP but would write to ZR
def _S : A64I_addsubimm<sf, op, 0b1, shift,
(outs GPR:$Rd),
(ins GPRsp:$Rn, imm_operand:$Imm12),
!strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
[(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))],
NoItinerary> {
let Defs = [NZCV];
}
// Note that the pattern here for ADDS is subtle. Canonically CMP
// a, b becomes SUBS a, b. If b < 0 then this is equivalent to
// ADDS a, (-b). This is not true in general.
def _cmp : A64I_addsubimm<sf, op, 0b1, shift,
(outs), (ins GPRsp:$Rn, imm_operand:$Imm12),
!strconcat(cmpasmop, " $Rn, $Imm12"),
[(set NZCV,
(A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
NoItinerary> {
let Rd = 0b11111;
let Defs = [NZCV];
let isCompare = 1;
}
}
multiclass addsubimm_shifts<string prefix, bit sf, bit op,
string asmop, string cmpasmop, string operand, string cmpoperand,
RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR,
ValueType Ty> {
defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00,
asmop, cmpasmop,
!cast<Operand>(operand # "_lsl0"),
!cast<Operand>(cmpoperand # "_lsl0"),
GPR, GPRsp, ZR, Ty>;
defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01,
asmop, cmpasmop,
!cast<Operand>(operand # "_lsl12"),
!cast<Operand>(cmpoperand # "_lsl12"),
GPR, GPRsp, ZR, Ty>;
}
defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn",
"addsubimm_operand_i32_posimm",
"addsubimm_operand_i32_negimm",
GPR32, GPR32wsp, WZR, i32>;
defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn",
"addsubimm_operand_i64_posimm",
"addsubimm_operand_i64_negimm",
GPR64, GPR64xsp, XZR, i64>;
defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp",
"addsubimm_operand_i32_negimm",
"addsubimm_operand_i32_posimm",
GPR32, GPR32wsp, WZR, i32>;
defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp",
"addsubimm_operand_i64_negimm",
"addsubimm_operand_i64_posimm",
GPR64, GPR64xsp, XZR, i64>;
multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> {
def _fromsp : InstAlias<"mov $Rd, $Rn",
(addop GPRsp:$Rd, SP:$Rn, 0),
0b1>;
def _tosp : InstAlias<"mov $Rd, $Rn",
(addop SP:$Rd, GPRsp:$Rn, 0),
0b1>;
}
// Recall Rxsp is a RegisterClass containing *just* xsp.
defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>;
defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>;
//===----------------------------------------------------------------------===//
// Add-subtract (shifted register) instructions
//===----------------------------------------------------------------------===//
// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS
//===-------------------------------
// 1. The "shifed register" operands. Shared with logical insts.
//===-------------------------------
multiclass shift_operands<string prefix, string form> {
def _asmoperand_i32 : AsmOperandClass {
let Name = "Shift" # form # "i32";
let RenderMethod = "addShiftOperands";
let PredicateMethod = "isShift<A64SE::" # form # ", false>";
let DiagnosticType = "AddSubRegShift32";
}
// Note that the operand type is intentionally i64 because the DAGCombiner
// puts these into a canonical form.
def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
let ParserMatchClass
= !cast<AsmOperandClass>(prefix # "_asmoperand_i32");
let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
let DecoderMethod = "Decode32BitShiftOperand";
}
def _asmoperand_i64 : AsmOperandClass {
let Name = "Shift" # form # "i64";
let RenderMethod = "addShiftOperands";
let PredicateMethod = "isShift<A64SE::" # form # ", true>";
let DiagnosticType = "AddSubRegShift64";
}
def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
let ParserMatchClass
= !cast<AsmOperandClass>(prefix # "_asmoperand_i64");
let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
}
}
defm lsl_operand : shift_operands<"lsl_operand", "LSL">;
defm lsr_operand : shift_operands<"lsr_operand", "LSR">;
defm asr_operand : shift_operands<"asr_operand", "ASR">;
// Not used for add/sub, but defined here for completeness. The "logical
// (shifted register)" instructions *do* have an ROR variant.
defm ror_operand : shift_operands<"ror_operand", "ROR">;
//===-------------------------------
// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions.
//===-------------------------------
// N.b. the commutable parameter is just !N. It will be first against the wall
// when the revolution comes.
multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
string asmop, SDPatternOperator opfrag, ValueType ty,
RegisterClass GPR, list<Register> defs> {
let isCommutable = commutable, Defs = defs in {
def _lsl : A64I_addsubshift<sf, op, s, 0b00,
(outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6),
!strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
[(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6))
)],
NoItinerary>;
def _lsr : A64I_addsubshift<sf, op, s, 0b01,
(outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("lsr_operand_" # ty):$Imm6),
!strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
[(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
!cast<Operand>("lsr_operand_" # ty):$Imm6))
)],
NoItinerary>;
def _asr : A64I_addsubshift<sf, op, s, 0b10,
(outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("asr_operand_" # ty):$Imm6),
!strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
[(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
!cast<Operand>("asr_operand_" # ty):$Imm6))
)],
NoItinerary>;
}
def _noshift
: InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
(!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
GPR:$Rm, 0)>;
def : Pat<(opfrag ty:$Rn, ty:$Rm),
(!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
}
multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable,
string asmop, SDPatternOperator opfrag,
list<Register> defs> {
defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s,
commutable, asmop, opfrag, i64, GPR64, defs>;
defm www : addsub_shifts<prefix # "www", 0b0, op, s,
commutable, asmop, opfrag, i32, GPR32, defs>;
}
defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>;
defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>;
defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>;
defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>;
//===-------------------------------
// 1. The NEG/NEGS aliases
//===-------------------------------
multiclass neg_alias<Instruction INST, RegisterClass GPR, Register ZR,
ValueType ty, Operand shift_operand, SDNode shiftop> {
def : InstAlias<"neg $Rd, $Rm, $Imm6",
(INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)),
(INST ZR, $Rm, shift_operand:$Imm6)>;
}
defm : neg_alias<SUBwww_lsl, GPR32, WZR, i32, lsl_operand_i32, shl>;
defm : neg_alias<SUBwww_lsr, GPR32, WZR, i32, lsr_operand_i32, srl>;
defm : neg_alias<SUBwww_asr, GPR32, WZR, i32, asr_operand_i32, sra>;
def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>;
defm : neg_alias<SUBxxx_lsl, GPR64, XZR, i64, lsl_operand_i64, shl>;
defm : neg_alias<SUBxxx_lsr, GPR64, XZR, i64, lsr_operand_i64, srl>;
defm : neg_alias<SUBxxx_asr, GPR64, XZR, i64, asr_operand_i64, sra>;
def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>;
// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to
// be involved.
class negs_alias<Instruction INST, RegisterClass GPR,
Register ZR, Operand shift_operand, SDNode shiftop>
: InstAlias<"negs $Rd, $Rm, $Imm6",
(INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>;
def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
//===-------------------------------
// 1. The CMP/CMN aliases
//===-------------------------------
multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
string asmop, SDPatternOperator opfrag, ValueType ty,
RegisterClass GPR> {
let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in {
def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00,
(outs),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6),
!strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
[(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6))
)],
NoItinerary>;
def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
(outs),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("lsr_operand_" # ty):$Imm6),
!strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
[(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm,
!cast<Operand>("lsr_operand_" # ty):$Imm6))
)],
NoItinerary>;
def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
(outs),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("asr_operand_" # ty):$Imm6),
!strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
[(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm,
!cast<Operand>("asr_operand_" # ty):$Imm6))
)],
NoItinerary>;
}
def _noshift
: InstAlias<!strconcat(asmop, " $Rn, $Rm"),
(!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
def : Pat<(opfrag ty:$Rn, ty:$Rm),
(!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
}
defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>;
defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>;
defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>;
defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>;
//===----------------------------------------------------------------------===//
// Add-subtract (with carry) instructions
//===----------------------------------------------------------------------===//
// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS
multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> {
let Uses = [NZCV] in {
def www : A64I_addsubcarry<0b0, op, s, 0b000000,
(outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm),
!strconcat(asmop, "\t$Rd, $Rn, $Rm"),
[], NoItinerary>;
def xxx : A64I_addsubcarry<0b1, op, s, 0b000000,
(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
!strconcat(asmop, "\t$Rd, $Rn, $Rm"),
[], NoItinerary>;
}
}
let isCommutable = 1 in {
defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">;
}
defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">;
let Defs = [NZCV] in {
let isCommutable = 1 in {
defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">;
}
defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">;
}
def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>;
def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>;
def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
// Note that adde and sube can form a chain longer than two (e.g. for 256-bit
// addition). So the flag-setting instructions are appropriate.
def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>;
def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>;
def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>;
def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>;
//===----------------------------------------------------------------------===//
// Bitfield
//===----------------------------------------------------------------------===//
// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL,
// UBFIZ, UBFX
// Because of the rather complicated nearly-overlapping aliases, the decoding of
// this range of instructions is handled manually. The architectural
// instructions are BFM, SBFM and UBFM but a disassembler should never produce
// these.
//
// In the end, the best option was to use BFM instructions for decoding under
// almost all circumstances, but to create aliasing *Instructions* for each of
// the canonical forms and specify a completely custom decoder which would
// substitute the correct MCInst as needed.
//
// This also simplifies instruction selection, parsing etc because the MCInsts
// have a shape that's closer to their use in code.
//===-------------------------------
// 1. The architectural BFM instructions
//===-------------------------------
def uimm5_asmoperand : AsmOperandClass {
let Name = "UImm5";
let PredicateMethod = "isUImm<5>";
let RenderMethod = "addImmOperands";
let DiagnosticType = "UImm5";
}
def uimm6_asmoperand : AsmOperandClass {
let Name = "UImm6";
let PredicateMethod = "isUImm<6>";
let RenderMethod = "addImmOperands";
let DiagnosticType = "UImm6";
}
def bitfield32_imm : Operand<i64>,
ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> {
let ParserMatchClass = uimm5_asmoperand;
let DecoderMethod = "DecodeBitfield32ImmOperand";
}
def bitfield64_imm : Operand<i64>,
ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
let ParserMatchClass = uimm6_asmoperand;
// Default decoder works in 64-bit case: the 6-bit field can take any value.
}
multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> {
def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
(ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
!strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
[], NoItinerary> {
let DecoderMethod = "DecodeBitfieldInstruction";
}
def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
(ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
!strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
[], NoItinerary> {
let DecoderMethod = "DecodeBitfieldInstruction";
}
}
defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">;
defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">;
// BFM instructions modify the destination register rather than defining it
// completely.
def BFMwwii :
A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
(ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
"bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
let DecoderMethod = "DecodeBitfieldInstruction";
let Constraints = "$src = $Rd";
}
def BFMxxii :
A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
(ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
"bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
let DecoderMethod = "DecodeBitfieldInstruction";
let Constraints = "$src = $Rd";
}
//===-------------------------------
// 2. Extend aliases to 64-bit dest
//===-------------------------------
// Unfortunately the extensions that end up as 64-bits cannot be handled by an
// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs
// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is
// not capable of such a map as far as I'm aware
// Note that these instructions are strictly more specific than the
// BFM ones (in ImmR) so they can handle their own decoding.
class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, ValueType dty,
string asmop, bits<6> imms, dag pattern>
: A64I_bitfield<sf, opc, sf,
(outs GPRDest:$Rd), (ins GPR32:$Rn),
!strconcat(asmop, "\t$Rd, $Rn"),
[(set dty:$Rd, pattern)], NoItinerary> {
let ImmR = 0b000000;
let ImmS = imms;
}
// Signed extensions
def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7,
(sext_inreg (anyext i32:$Rn), i8)>;
def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7,
(sext_inreg i32:$Rn, i8)>;
def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15,
(sext_inreg (anyext i32:$Rn), i16)>;
def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15,
(sext_inreg i32:$Rn, i16)>;
def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>;
// Unsigned extensions
def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7,
(and i32:$Rn, 255)>;
def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15,
(and i32:$Rn, 65535)>;
// The 64-bit unsigned variants are not strictly architectural but recommended
// for consistency.
let isAsmParserOnly = 1 in {
def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7,
(and (anyext i32:$Rn), 255)>;
def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15,
(and (anyext i32:$Rn), 65535)>;
}
// Extra patterns for when the source register is actually 64-bits
// too. There's no architectural difference here, it's just LLVM
// shinanigans. There's no need for equivalent zero-extension patterns
// because they'll already be caught by logical (immediate) matching.
def : Pat<(sext_inreg i64:$Rn, i8),
(SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>;
def : Pat<(sext_inreg i64:$Rn, i16),
(SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>;
def : Pat<(sext_inreg i64:$Rn, i32),
(SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>;
//===-------------------------------
// 3. Aliases for ASR and LSR (the simple shifts)
//===-------------------------------
// These also handle their own decoding because ImmS being set makes
// them take precedence over BFM.
multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> {
def wwi : A64I_bitfield<0b0, opc, 0b0,
(outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
!strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
[(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))],
NoItinerary> {
let ImmS = 31;
}
def xxi : A64I_bitfield<0b1, opc, 0b1,
(outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
!strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
[(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))],
NoItinerary> {
let ImmS = 63;
}
}
defm ASR : A64I_shift<0b00, "asr", sra>;
defm LSR : A64I_shift<0b10, "lsr", srl>;
//===-------------------------------
// 4. Aliases for LSL
//===-------------------------------
// Unfortunately LSL and subsequent aliases are much more complicated. We need
// to be able to say certain output instruction fields depend in a complex
// manner on combinations of input assembly fields).
//
// MIOperandInfo *might* have been able to do it, but at the cost of
// significantly more C++ code.
// N.b. contrary to usual practice these operands store the shift rather than
// the machine bits in an MCInst. The complexity overhead of consistency
// outweighed the benefits in this case (custom asmparser, printer and selection
// vs custom encoder).
def bitfield32_lsl_imm : Operand<i64>,
ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
let ParserMatchClass = uimm5_asmoperand;
let EncoderMethod = "getBitfield32LSLOpValue";
}
def bitfield64_lsl_imm : Operand<i64>,
ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
let ParserMatchClass = uimm6_asmoperand;
let EncoderMethod = "getBitfield64LSLOpValue";
}
class A64I_bitfield_lsl<bit sf, RegisterClass GPR, ValueType ty,
Operand operand>
: A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
"lsl\t$Rd, $Rn, $FullImm",
[(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))],
NoItinerary> {
bits<12> FullImm;
let ImmR = FullImm{5-0};
let ImmS = FullImm{11-6};
// No disassembler allowed because it would overlap with BFM which does the
// actual work.
let isAsmParserOnly = 1;
}
def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>;
def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>;
//===-------------------------------
// 5. Aliases for bitfield extract instructions
//===-------------------------------
def bfx32_width_asmoperand : AsmOperandClass {
let Name = "BFX32Width";
let PredicateMethod = "isBitfieldWidth<32>";
let RenderMethod = "addBFXWidthOperands";
let DiagnosticType = "Width32";
}
def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> {
let PrintMethod = "printBFXWidthOperand";
let ParserMatchClass = bfx32_width_asmoperand;
}
def bfx64_width_asmoperand : AsmOperandClass {
let Name = "BFX64Width";
let PredicateMethod = "isBitfieldWidth<64>";
let RenderMethod = "addBFXWidthOperands";
let DiagnosticType = "Width64";
}
def bfx64_width : Operand<i64> {
let PrintMethod = "printBFXWidthOperand";
let ParserMatchClass = bfx64_width_asmoperand;
}
multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> {
def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
(ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
!strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
[(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))],
NoItinerary> {
// As above, no disassembler allowed.
let isAsmParserOnly = 1;
}
def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
(ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
!strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
[(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))],
NoItinerary> {
// As above, no disassembler allowed.
let isAsmParserOnly = 1;
}
}
defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>;
defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
// Again, variants based on BFM modify Rd so need it as an input too.
def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
(ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
"bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
// As above, no disassembler allowed.
let isAsmParserOnly = 1;
let Constraints = "$src = $Rd";
}
def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
(ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
"bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
// As above, no disassembler allowed.
let isAsmParserOnly = 1;
let Constraints = "$src = $Rd";
}
// SBFX instructions can do a 1-instruction sign-extension of boolean values.
def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>;
def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>;
def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
(SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>;
// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
sub_32)>;
//===-------------------------------
// 6. Aliases for bitfield insert instructions
//===-------------------------------
def bfi32_lsb_asmoperand : AsmOperandClass {
let Name = "BFI32LSB";
let PredicateMethod = "isUImm<5>";
let RenderMethod = "addBFILSBOperands<32>";
let DiagnosticType = "UImm5";
}
def bfi32_lsb : Operand<i64>,
ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
let PrintMethod = "printBFILSBOperand<32>";
let ParserMatchClass = bfi32_lsb_asmoperand;
}
def bfi64_lsb_asmoperand : AsmOperandClass {
let Name = "BFI64LSB";
let PredicateMethod = "isUImm<6>";
let RenderMethod = "addBFILSBOperands<64>";
let DiagnosticType = "UImm6";
}
def bfi64_lsb : Operand<i64>,
ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
let PrintMethod = "printBFILSBOperand<64>";
let ParserMatchClass = bfi64_lsb_asmoperand;
}
// Width verification is performed during conversion so width operand can be
// shared between 32/64-bit cases. Still needed for the print method though
// because ImmR encodes "width - 1".
def bfi32_width_asmoperand : AsmOperandClass {
let Name = "BFI32Width";
let PredicateMethod = "isBitfieldWidth<32>";
let RenderMethod = "addBFIWidthOperands";
let DiagnosticType = "Width32";
}
def bfi32_width : Operand<i64>,
ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> {
let PrintMethod = "printBFIWidthOperand";
let ParserMatchClass = bfi32_width_asmoperand;
}
def bfi64_width_asmoperand : AsmOperandClass {
let Name = "BFI64Width";
let PredicateMethod = "isBitfieldWidth<64>";
let RenderMethod = "addBFIWidthOperands";
let DiagnosticType = "Width64";
}
def bfi64_width : Operand<i64>,
ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> {
let PrintMethod = "printBFIWidthOperand";
let ParserMatchClass = bfi64_width_asmoperand;
}
multiclass A64I_bitfield_insert<bits<2> opc, string asmop> {
def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
(ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
!strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
[], NoItinerary> {
// As above, no disassembler allowed.
let isAsmParserOnly = 1;
}
def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
(ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
!strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
[], NoItinerary> {
// As above, no disassembler allowed.
let isAsmParserOnly = 1;
}
}
defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">;
defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">;
def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
(ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
"bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
// As above, no disassembler allowed.
let isAsmParserOnly = 1;
let Constraints = "$src = $Rd";
}
def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
(ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
"bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
// As above, no disassembler allowed.
let isAsmParserOnly = 1;
let Constraints = "$src = $Rd";
}
//===----------------------------------------------------------------------===//
// Compare and branch (immediate)
//===----------------------------------------------------------------------===//
// Contains: CBZ, CBNZ
class label_asmoperand<int width, int scale> : AsmOperandClass {
let Name = "Label" # width # "_" # scale;
let PredicateMethod = "isLabel<" # width # "," # scale # ">";
let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">";
let DiagnosticType = "Label";
}
def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>;
// All conditional immediate branches are the same really: 19 signed bits scaled
// by the instruction-size (4).
def bcc_target : Operand<OtherVT> {
// This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
let ParserMatchClass = label_wid19_scal4_asmoperand;
let PrintMethod = "printLabelOperand<19, 4>";
let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>";
let OperandType = "OPERAND_PCREL";
}
multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> {
let isBranch = 1, isTerminator = 1 in {
def x : A64I_cmpbr<0b1, op,
(outs),
(ins GPR64:$Rt, bcc_target:$Label),
!strconcat(asmop,"\t$Rt, $Label"),
[(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)],
NoItinerary>;
def w : A64I_cmpbr<0b0, op,
(outs),
(ins GPR32:$Rt, bcc_target:$Label),
!strconcat(asmop,"\t$Rt, $Label"),
[(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)],
NoItinerary>;
}
}
defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf<i32, [{
return Imm == A64CC::EQ;
}]> >;
defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{
return Imm == A64CC::NE;
}]> >;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instructions
//===----------------------------------------------------------------------===//
// Contains: B.cc
def cond_code_asmoperand : AsmOperandClass {
let Name = "CondCode";
let DiagnosticType = "CondCode";
}
def cond_code : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm <= 15;
}]> {
let PrintMethod = "printCondCodeOperand";
let ParserMatchClass = cond_code_asmoperand;
}
def Bcc : A64I_condbr<0b0, 0b0, (outs),
(ins cond_code:$Cond, bcc_target:$Label),
"b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)],
NoItinerary> {
let Uses = [NZCV];
let isBranch = 1;
let isTerminator = 1;
}
//===----------------------------------------------------------------------===//
// Conditional compare (immediate) instructions
//===----------------------------------------------------------------------===//
// Contains: CCMN, CCMP
def uimm4_asmoperand : AsmOperandClass {
let Name = "UImm4";
let PredicateMethod = "isUImm<4>";
let RenderMethod = "addImmOperands";
let DiagnosticType = "UImm4";
}
def uimm4 : Operand<i32> {
let ParserMatchClass = uimm4_asmoperand;
}
def uimm5 : Operand<i32> {
let ParserMatchClass = uimm5_asmoperand;
}
// The only difference between this operand and the one for instructions like
// B.cc is that it's parsed manually. The other get parsed implicitly as part of
// the mnemonic handling.
def cond_code_op_asmoperand : AsmOperandClass {
let Name = "CondCodeOp";
let RenderMethod = "addCondCodeOperands";
let PredicateMethod = "isCondCode";
let ParserMethod = "ParseCondCodeOperand";
let DiagnosticType = "CondCode";
}
def cond_code_op : Operand<i32> {
let PrintMethod = "printCondCodeOperand";
let ParserMatchClass = cond_code_op_asmoperand;
}
class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop>
: A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs),
(ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond),
!strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"),
[], NoItinerary> {
let Defs = [NZCV];
}
def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">;
def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">;
def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">;
def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">;
//===----------------------------------------------------------------------===//
// Conditional compare (register) instructions
//===----------------------------------------------------------------------===//
// Contains: CCMN, CCMP
class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
: A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1,
(outs),
(ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
!strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
[], NoItinerary> {
let Defs = [NZCV];
}
def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">;
def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">;
def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">;
def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">;
//===----------------------------------------------------------------------===//
// Conditional select instructions
//===----------------------------------------------------------------------===//
// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG
// Condition code which is encoded as the inversion (semantically rather than
// bitwise) in the instruction.
def inv_cond_code_op_asmoperand : AsmOperandClass {
let Name = "InvCondCodeOp";
let RenderMethod = "addInvCondCodeOperands";
let PredicateMethod = "isCondCode";
let ParserMethod = "ParseCondCodeOperand";
let DiagnosticType = "CondCode";
}
def inv_cond_code_op : Operand<i32> {
let ParserMatchClass = inv_cond_code_op_asmoperand;
}
// Having a separate operand for the selectable use-case is debatable, but gives
// consistency with cond_code.
def inv_cond_XFORM : SDNodeXForm<imm, [{
A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue());
return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32);
}]>;
def inv_cond_code
: ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>;
multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
SDPatternOperator select> {
let Uses = [NZCV] in {
def wwwc : A64I_condsel<0b0, op, 0b0, op2,
(outs GPR32:$Rd),
(ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
!strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
[(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
NoItinerary>;
def xxxc : A64I_condsel<0b1, op, 0b0, op2,
(outs GPR64:$Rd),
(ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
!strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
[(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
NoItinerary>;
}
}
def simple_select
: PatFrag<(ops node:$lhs, node:$rhs),
(A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>;
class complex_select<SDPatternOperator opnode>
: PatFrag<(ops node:$lhs, node:$rhs),
(A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>;
defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>;
defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc",
complex_select<PatFrag<(ops node:$val),
(add node:$val, 1)>>>;
defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>;
defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>;
// Now the instruction aliases, which fit nicely into LLVM's model:
def : InstAlias<"cset $Rd, $Cond",
(CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
def : InstAlias<"cset $Rd, $Cond",
(CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
def : InstAlias<"csetm $Rd, $Cond",
(CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
def : InstAlias<"csetm $Rd, $Cond",
(CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
def : InstAlias<"cinc $Rd, $Rn, $Cond",
(CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
def : InstAlias<"cinc $Rd, $Rn, $Cond",
(CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
def : InstAlias<"cinv $Rd, $Rn, $Cond",
(CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
def : InstAlias<"cinv $Rd, $Rn, $Cond",
(CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
def : InstAlias<"cneg $Rd, $Rn, $Cond",
(CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
def : InstAlias<"cneg $Rd, $Rn, $Cond",
(CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
// Finally some helper patterns.
// For CSET (a.k.a. zero-extension of icmp)
def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
(CSINCwwwc WZR, WZR, cond_code:$Cond)>;
def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
(CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>;
def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
(CSINCxxxc XZR, XZR, cond_code:$Cond)>;
def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
(CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>;
// For CSETM (a.k.a. sign-extension of icmp)
def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
(CSINVwwwc WZR, WZR, cond_code:$Cond)>;
def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
(CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>;
def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
(CSINVxxxc XZR, XZR, cond_code:$Cond)>;
def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
(CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>;
// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of
// commutativity. The instructions are to complex for isCommutable to be used,
// so we have to create the patterns manually:
// No commutable pattern for CSEL since the commuted version is isomorphic.
// CSINC
def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond),
(CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond),
(CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
// CSINV
def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
(CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
(CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
// CSNEG
def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
(CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
(CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
//===----------------------------------------------------------------------===//
// Data Processing (1 source) instructions
//===----------------------------------------------------------------------===//
// Contains: RBIT, REV16, REV, REV32, CLZ, CLS.
// We define an unary operator which always fails. We will use this to
// define unary operators that cannot be matched.
class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop,
list<dag> patterns, RegisterClass GPRrc,
InstrItinClass itin>:
A64I_dp_1src<sf,
0,
0b00000,
opcode,
!strconcat(asmop, "\t$Rd, $Rn"),
(outs GPRrc:$Rd),
(ins GPRrc:$Rn),
patterns,
itin>;
multiclass A64I_dp_1src <bits<6> opcode, string asmop> {
let hasSideEffects = 0 in {
def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>;
def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>;
}
}
defm RBIT : A64I_dp_1src<0b000000, "rbit">;
defm CLS : A64I_dp_1src<0b000101, "cls">;
defm CLZ : A64I_dp_1src<0b000100, "clz">;
def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>;
def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>;
def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>;
def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>;
def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>;
def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>;
def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>;
def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>;
def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev",
[(set i32:$Rd, (bswap i32:$Rn))],
GPR32, NoItinerary>;
def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev",
[(set i64:$Rd, (bswap i64:$Rn))],
GPR64, NoItinerary>;
def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32",
[(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))],
GPR64, NoItinerary>;
def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16",
[(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))],
GPR32,
NoItinerary>;
def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>;
//===----------------------------------------------------------------------===//
// Data Processing (2 sources) instructions
//===----------------------------------------------------------------------===//
// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL,
// LSR, ASR, ROR
class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns,
RegisterClass GPRsp,
InstrItinClass itin>:
A64I_dp_2src<sf,
opcode,
0,
!strconcat(asmop, "\t$Rd, $Rn, $Rm"),
(outs GPRsp:$Rd),
(ins GPRsp:$Rn, GPRsp:$Rm),
patterns,
itin>;
multiclass dp_2src_crc<bit c, string asmop> {
def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0},
!strconcat(asmop, "b"), [], GPR32, NoItinerary>;
def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1},
!strconcat(asmop, "h"), [], GPR32, NoItinerary>;
def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0},
!strconcat(asmop, "w"), [], GPR32, NoItinerary>;
def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0,
!strconcat(asmop, "x\t$Rd, $Rn, $Rm"),
(outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [],
NoItinerary>;
}
multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
def www : dp_2src_impl<0b0,
opcode,
asmop,
[(set i32:$Rd,
(op i32:$Rn, (i64 (zext i32:$Rm))))],
GPR32,
NoItinerary>;
def xxx : dp_2src_impl<0b1,
opcode,
asmop,
[(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
GPR64,
NoItinerary>;
}
multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> {
def www : dp_2src_impl<0b0,
opcode,
asmop,
[(set i32:$Rd, (op i32:$Rn, i32:$Rm))],
GPR32,
NoItinerary>;
def xxx : dp_2src_impl<0b1,
opcode,
asmop,
[(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
GPR64,
NoItinerary>;
}
// Here we define the data processing 2 source instructions.
defm CRC32 : dp_2src_crc<0b0, "crc32">;
defm CRC32C : dp_2src_crc<0b1, "crc32c">;
defm UDIV : dp_2src<0b000010, "udiv", udiv>;
defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
// Extra patterns for an incoming 64-bit value for a 32-bit
// operation. Since the LLVM operations are undefined (as in C) if the
// RHS is out of range, it's perfectly permissible to discard the high
// bits of the GPR64.
def : Pat<(shl i32:$Rn, i64:$Rm),
(LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
def : Pat<(srl i32:$Rn, i64:$Rm),
(LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
def : Pat<(sra i32:$Rn, i64:$Rm),
(ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
def : Pat<(rotr i32:$Rn, i64:$Rm),
(RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
// Here we define the aliases for the data processing 2 source instructions.
def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">;
def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">;
def ASR_menmonic : MnemonicAlias<"asrv", "asr">;
def ROR_menmonic : MnemonicAlias<"rorv", "ror">;
//===----------------------------------------------------------------------===//
// Data Processing (3 sources) instructions
//===----------------------------------------------------------------------===//
// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH
// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL
class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg,
ValueType AccTy, RegisterClass SrcReg,
string asmop, dag pattern>
: A64I_dp3<sf, opcode,
(outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
!strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
[(set AccTy:$Rd, pattern)], NoItinerary> {
RegisterClass AccGPR = AccReg;
RegisterClass SrcGPR = SrcReg;
}
def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd",
(add i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd",
(add i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub",
(sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub",
(sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl",
(add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl",
(sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl",
(add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl",
(sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in {
def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd),
(ins GPR64:$Rn, GPR64:$Rm),
"umulh\t$Rd, $Rn, $Rm",
[(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))],
NoItinerary>;
def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
(ins GPR64:$Rn, GPR64:$Rm),
"smulh\t$Rd, $Rn, $Rm",
[(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))],
NoItinerary>;
}
multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
Register ZR, dag pattern> {
def : InstAlias<asmop # " $Rd, $Rn, $Rm",
(INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
def : Pat<pattern, (INST $Rn, $Rm, ZR)>;
}
defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>;
defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>;
defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR,
(sub 0, (mul i32:$Rn, i32:$Rm))>;
defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR,
(sub 0, (mul i64:$Rn, i64:$Rm))>;
defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR,
(mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>;
defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR,
(sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR,
(mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>;
defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR,
(sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
//===----------------------------------------------------------------------===//
// Exception generation
//===----------------------------------------------------------------------===//
// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
def uimm16_asmoperand : AsmOperandClass {
let Name = "UImm16";
let PredicateMethod = "isUImm<16>";
let RenderMethod = "addImmOperands";
let DiagnosticType = "UImm16";
}
def uimm16 : Operand<i32> {
let ParserMatchClass = uimm16_asmoperand;
}
class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop>
: A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16),
!strconcat(asmop, "\t$UImm16"), [], NoItinerary> {
let isBranch = 1;
let isTerminator = 1;
}
def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">;
def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">;
def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">;
def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">;
def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">;
def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">;
def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">;
def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">;
// The immediate is optional for the DCPS instructions, defaulting to 0.
def : InstAlias<"dcps1", (DCPS1i 0)>;
def : InstAlias<"dcps2", (DCPS2i 0)>;
def : InstAlias<"dcps3", (DCPS3i 0)>;
//===----------------------------------------------------------------------===//
// Extract (immediate)
//===----------------------------------------------------------------------===//
// Contains: EXTR + alias ROR
def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0,
(outs GPR32:$Rd),
(ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB),
"extr\t$Rd, $Rn, $Rm, $LSB",
[(set i32:$Rd,
(A64Extr i32:$Rn, i32:$Rm, imm:$LSB))],
NoItinerary>;
def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
(outs GPR64:$Rd),
(ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
"extr\t$Rd, $Rn, $Rm, $LSB",
[(set i64:$Rd,
(A64Extr i64:$Rn, i64:$Rm, imm:$LSB))],
NoItinerary>;
def : InstAlias<"ror $Rd, $Rs, $LSB",
(EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>;
def : InstAlias<"ror $Rd, $Rs, $LSB",
(EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>;
def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB),
(EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>;
def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB),
(EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>;
//===----------------------------------------------------------------------===//
// Floating-point compare instructions
//===----------------------------------------------------------------------===//
// Contains: FCMP, FCMPE
def fpzero_asmoperand : AsmOperandClass {
let Name = "FPZero";
let ParserMethod = "ParseFPImmOperand";
let DiagnosticType = "FPZero";
}
def fpz32 : Operand<f32>,
ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
let ParserMatchClass = fpzero_asmoperand;
let PrintMethod = "printFPZeroOperand";
let DecoderMethod = "DecodeFPZeroOperand";
}
def fpz64 : Operand<f64>,
ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
let ParserMatchClass = fpzero_asmoperand;
let PrintMethod = "printFPZeroOperand";
let DecoderMethod = "DecodeFPZeroOperand";
}
def fpz64movi : Operand<i64>,
ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
let ParserMatchClass = fpzero_asmoperand;
let PrintMethod = "printFPZeroOperand";
let DecoderMethod = "DecodeFPZeroOperand";
}
multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
(outs), ins, "fcmp\t$Rn, $Rm", [pattern],
NoItinerary> {
let Defs = [NZCV];
}
def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
(outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> {
let Defs = [NZCV];
}
}
defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
(set NZCV, (A64cmp f32:$Rn, f32:$Rm))>;
defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
(set NZCV, (A64cmp f64:$Rn, f64:$Rm))>;
// What would be Rm should be written as 0; note that even though it's called
// "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
(set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>;
defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
(set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>;
//===----------------------------------------------------------------------===//
// Floating-point conditional compare instructions
//===----------------------------------------------------------------------===//
// Contains: FCCMP, FCCMPE
class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop>
: A64I_fpccmp<0b0, 0b0, type, op,
(outs),
(ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
!strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
[], NoItinerary> {
let Defs = [NZCV];
}
def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">;
def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">;
def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">;
def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">;
//===----------------------------------------------------------------------===//
// Floating-point conditional select instructions
//===----------------------------------------------------------------------===//
// Contains: FCSEL
let Uses = [NZCV] in {
def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
(ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
"fcsel\t$Rd, $Rn, $Rm, $Cond",
[(set f32:$Rd,
(simple_select f32:$Rn, f32:$Rm))],
NoItinerary>;
def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
(ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond),
"fcsel\t$Rd, $Rn, $Rm, $Cond",
[(set f64:$Rd,
(simple_select f64:$Rn, f64:$Rm))],
NoItinerary>;
}
//===----------------------------------------------------------------------===//
// Floating-point data-processing (1 source)
//===----------------------------------------------------------------------===//
// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI].
def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val),
[{ (void)N; return false; }]>;
// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d"
// syntax. Default to no pattern because most are odd enough not to have one.
multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr,
SDPatternOperator opnode = FPNoUnop> {
def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
!strconcat(asmstr, "\t$Rd, $Rn"),
[(set f32:$Rd, (opnode f32:$Rn))],
NoItinerary>;
def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
!strconcat(asmstr, "\t$Rd, $Rn"),
[(set f64:$Rd, (opnode f64:$Rn))],
NoItinerary>;
}
defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">;
defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>;
defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>;
defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">;
defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>;
defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>;
defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>;
defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">;
defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>;
defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>;
// The FCVT instrucitons have different source and destination register-types,
// but the fields are uniform everywhere a D-register (say) crops up. Package
// this information in a Record.
class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> {
RegisterClass Class = rc;
ValueType VT = vt;
bit t1 = fld{1};
bit t0 = fld{0};
}
def FCVT16 : FCVTRegType<FPR16, 0b11, f16>;
def FCVT32 : FCVTRegType<FPR32, 0b00, f32>;
def FCVT64 : FCVTRegType<FPR64, 0b01, f64>;
class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode>
: A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0},
{0,0,0,1, DestReg.t1, DestReg.t0},
(outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
"fcvt\t$Rd, $Rn",
[(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>;
def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>;
def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>;
def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>;
def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>;
//===----------------------------------------------------------------------===//
// Floating-point data-processing (2 sources) instructions
//===----------------------------------------------------------------------===//
// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL
def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs),
[{ (void)N; return false; }]>;
multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr,
SDPatternOperator opnode> {
def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode,
(outs FPR32:$Rd),
(ins FPR32:$Rn, FPR32:$Rm),
!strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
[(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))],
NoItinerary>;
def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
(outs FPR64:$Rd),
(ins FPR64:$Rn, FPR64:$Rm),
!strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
[(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))],
NoItinerary>;
}
let isCommutable = 1 in {
defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>;
// No patterns for these.
defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>;
defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>;
defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>;
defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>;
defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul",
PatFrag<(ops node:$lhs, node:$rhs),
(fneg (fmul node:$lhs, node:$rhs))> >;
}
defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>;
//===----------------------------------------------------------------------===//
// Floating-point data-processing (3 sources) instructions
//===----------------------------------------------------------------------===//
// Contains: FMADD, FMSUB, FNMADD, FNMSUB
def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
(fma (fneg node:$Rn), node:$Rm, node:$Ra)>;
def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
(fma node:$Rn, node:$Rm, (fneg node:$Ra))>;
def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
(fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>;
class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT,
bits<2> type, bit o1, bit o0, SDPatternOperator fmakind>
: A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd),
(ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
!strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
[(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))],
NoItinerary>;
def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>;
def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>;
def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>;
def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>;
def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>;
def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
// Extra patterns for when we're allowed to optimise separate multiplication and
// addition.
let Predicates = [HasFPARMv8, UseFusedMAC] in {
def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)),
(FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)),
(FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
}
//===----------------------------------------------------------------------===//
// Floating-point <-> fixed-point conversion instructions
//===----------------------------------------------------------------------===//
// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
// #1-#32 allowed, encoded as "64 - <specified imm>
def fixedpos_asmoperand_i32 : AsmOperandClass {
let Name = "CVTFixedPos32";
let RenderMethod = "addCVTFixedPosOperands";
let PredicateMethod = "isCVTFixedPos<32>";
let DiagnosticType = "CVTFixedPos32";
}
// Also encoded as "64 - <specified imm>" but #1-#64 allowed.
def fixedpos_asmoperand_i64 : AsmOperandClass {
let Name = "CVTFixedPos64";
let RenderMethod = "addCVTFixedPosOperands";
let PredicateMethod = "isCVTFixedPos<64>";
let DiagnosticType = "CVTFixedPos64";
}
// We need the cartesian product of f32/f64 i32/i64 operands for
// conversions:
// + Selection needs to use operands of correct floating type
// + Assembly parsing and decoding depend on integer width
class cvtfix_i32_op<ValueType FloatVT>
: Operand<FloatVT>,
ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> {
let ParserMatchClass = fixedpos_asmoperand_i32;
let DecoderMethod = "DecodeCVT32FixedPosOperand";
let PrintMethod = "printCVTFixedPosOperand";
}
class cvtfix_i64_op<ValueType FloatVT>
: Operand<FloatVT>,
ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> {
let ParserMatchClass = fixedpos_asmoperand_i64;
let PrintMethod = "printCVTFixedPosOperand";
}
// Because of the proliferation of weird operands, it's not really
// worth going for a multiclass here. Oh well.
class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
RegisterClass GPR, RegisterClass FPR,
ValueType DstTy, ValueType SrcTy,
Operand scale_op, string asmop, SDNode cvtop>
: A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
(outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
!strconcat(asmop, "\t$Rd, $Rn, $Scale"),
[(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))],
NoItinerary>;
def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32,
cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32,
cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>;
def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32,
cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>;
def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32,
cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>;
def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64,
cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>;
def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64,
cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>;
def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64,
cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>;
def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64,
cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>;
class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode,
RegisterClass FPR, RegisterClass GPR,
ValueType DstTy, ValueType SrcTy,
Operand scale_op, string asmop, SDNode cvtop>
: A64I_fpfixed<sf, 0b0, type, 0b00, opcode,
(outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
!strconcat(asmop, "\t$Rd, $Rn, $Scale"),
[(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))],
NoItinerary>;
def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32,
cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64,
cvtfix_i64_op<f32>, "scvtf", sint_to_fp>;
def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32,
cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>;
def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64,
cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>;
def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32,
cvtfix_i32_op<f64>, "scvtf", sint_to_fp>;
def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64,
cvtfix_i64_op<f64>, "scvtf", sint_to_fp>;
def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32,
cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>;
def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64,
cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>;
//===----------------------------------------------------------------------===//
// Floating-point <-> integer conversion instructions
//===----------------------------------------------------------------------===//
// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode,
RegisterClass DestPR, RegisterClass SrcPR, string asmop>
: A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn),
!strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>;
multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> {
def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0},
GPR32, FPR32, asmop # "s">;
def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0},
GPR64, FPR32, asmop # "s">;
def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1},
GPR32, FPR32, asmop # "u">;
def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1},
GPR64, FPR32, asmop # "u">;
def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0},
GPR32, FPR64, asmop # "s">;
def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0},
GPR64, FPR64, asmop # "s">;
def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1},
GPR32, FPR64, asmop # "u">;
def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1},
GPR64, FPR64, asmop # "u">;
}
defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">;
defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">;
defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
let Predicates = [HasFPARMv8] in {
def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>;
def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>;
def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>;
def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>;
def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>;
def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>;
def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>;
def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>;
}
multiclass A64I_inttofp<bit o0, string asmop> {
def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>;
def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>;
def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>;
}
defm S : A64I_inttofp<0b0, "scvtf">;
defm U : A64I_inttofp<0b1, "ucvtf">;
let Predicates = [HasFPARMv8] in {
def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>;
def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>;
def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>;
def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>;
def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>;
def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>;
def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>;
def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>;
}
def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
let Predicates = [HasFPARMv8] in {
def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>;
def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>;
def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>;
def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>;
}
def lane1_asmoperand : AsmOperandClass {
let Name = "Lane1";
let RenderMethod = "addImmOperands";
let DiagnosticType = "Lane1";
}
def lane1 : Operand<i32> {
let ParserMatchClass = lane1_asmoperand;
let PrintMethod = "printBareImmOperand";
}
let DecoderMethod = "DecodeFMOVLaneInstruction" in {
def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110,
(outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane),
"fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>;
def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111,
(outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane),
"fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>;
}
let Predicates = [HasFPARMv8] in {
def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
(FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
(FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
}
//===----------------------------------------------------------------------===//
// Floating-point immediate instructions
//===----------------------------------------------------------------------===//
// Contains: FMOV
def fpimm_asmoperand : AsmOperandClass {
let Name = "FMOVImm";
let ParserMethod = "ParseFPImmOperand";
let DiagnosticType = "FPImm";
}
// The MCOperand for these instructions are the encoded 8-bit values.
def SDXF_fpimm : SDNodeXForm<fpimm, [{
uint32_t Imm8;
A64Imms::isFPImm(N->getValueAPF(), Imm8);
return CurDAG->getTargetConstant(Imm8, MVT::i32);
}]>;
class fmov_operand<ValueType FT>
: Operand<i32>,
PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }],
SDXF_fpimm> {
let PrintMethod = "printFPImmOperand";
let ParserMatchClass = fpimm_asmoperand;
}
def fmov32_operand : fmov_operand<f32>;
def fmov64_operand : fmov_operand<f64>;
class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT,
Operand fmov_operand>
: A64I_fpimm<0b0, 0b0, type, 0b00000,
(outs Reg:$Rd),
(ins fmov_operand:$Imm8),
"fmov\t$Rd, $Imm8",
[(set VT:$Rd, fmov_operand:$Imm8)],
NoItinerary>;
def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>;
//===----------------------------------------------------------------------===//
// Load-register (literal) instructions
//===----------------------------------------------------------------------===//
// Contains: LDR, LDRSW, PRFM
def ldrlit_label_asmoperand : AsmOperandClass {
let Name = "LoadLitLabel";
let RenderMethod = "addLabelOperands<19, 4>";
let DiagnosticType = "Label";
}
def ldrlit_label : Operand<i64> {
let EncoderMethod = "getLoadLitLabelOpValue";
// This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
let PrintMethod = "printLabelOperand<19, 4>";
let ParserMatchClass = ldrlit_label_asmoperand;
let OperandType = "OPERAND_PCREL";
}
// Various instructions take an immediate value (which can always be used),
// where some numbers have a symbolic name to make things easier. These operands
// and the associated functions abstract away the differences.
multiclass namedimm<string prefix, string mapper> {
def _asmoperand : AsmOperandClass {
let Name = "NamedImm" # prefix;
let PredicateMethod = "isUImm";
let RenderMethod = "addImmOperands";
let ParserMethod = "ParseNamedImmOperand<" # mapper # ">";
let DiagnosticType = "NamedImm_" # prefix;
}
def _op : Operand<i32> {
let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
let PrintMethod = "printNamedImmOperand<" # mapper # ">";
let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">";
}
}
defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">;
class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg,
list<dag> patterns = []>
: A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19),
"ldr\t$Rt, $Imm19", patterns, NoItinerary>;
let mayLoad = 1 in {
def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>;
def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
}
let Predicates = [HasFPARMv8] in {
def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>;
def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>;
}
let mayLoad = 1 in {
let Predicates = [HasFPARMv8] in {
def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
}
def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
(outs GPR64:$Rt),
(ins ldrlit_label:$Imm19),
"ldrsw\t$Rt, $Imm19",
[], NoItinerary>;
def PRFM_lit : A64I_LDRlit<0b11, 0b0,
(outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
"prfm\t$Rt, $Imm19",
[], NoItinerary>;
}
//===----------------------------------------------------------------------===//
// Load-store exclusive instructions
//===----------------------------------------------------------------------===//
// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB,
// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB,
// STLRH, STLR, LDARB, LDARH, LDAR
// Since these instructions have the undefined register bits set to 1 in
// their canonical form, we need a post encoder method to set those bits
// to 1 when encoding these instructions. We do this using the
// fixLoadStoreExclusive function. This function has template parameters:
//
// fixLoadStoreExclusive<int hasRs, int hasRt2>
//
// hasRs indicates that the instruction uses the Rs field, so we won't set
// it to 1 (and the same for Rt2). We don't need template parameters for
// the other register fiels since Rt and Rn are always used.
// This operand parses a GPR64xsp register, followed by an optional immediate
// #0.
def GPR64xsp0_asmoperand : AsmOperandClass {
let Name = "GPR64xsp0";
let PredicateMethod = "isWrappedReg";
let RenderMethod = "addRegOperands";
let ParserMethod = "ParseLSXAddressOperand";
// Diagnostics are provided by ParserMethod
}
def GPR64xsp0 : RegisterOperand<GPR64xsp> {
let ParserMatchClass = GPR64xsp0_asmoperand;
}
//===----------------------------------
// Store-exclusive (releasing & normal)
//===----------------------------------
class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
dag ins, list<dag> pat,
InstrItinClass itin> :
A64I_LDSTex_stn <size,
opcode{2}, 0, opcode{1}, opcode{0},
outs, ins,
!strconcat(asm, "\t$Rs, $Rt, [$Rn]"),
pat, itin> {
let mayStore = 1;
let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+ let Constraints = "@earlyclobber $Rs";
}
multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
(outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
[], NoItinerary>;
def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
(outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
[],NoItinerary>;
def _word: A64I_SRexs_impl<0b10, opcode, asmstr,
(outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
[], NoItinerary>;
def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
(outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
[], NoItinerary>;
}
defm STXR : A64I_SRex<"stxr", 0b000, "STXR">;
defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">;
//===----------------------------------
// Loads
//===----------------------------------
class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
dag ins, list<dag> pat,
InstrItinClass itin> :
A64I_LDSTex_tn <size,
opcode{2}, 1, opcode{1}, opcode{0},
outs, ins,
!strconcat(asm, "\t$Rt, [$Rn]"),
pat, itin> {
let mayLoad = 1;
let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
}
multiclass A64I_LRex<string asmstr, bits<3> opcode> {
def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
(outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
[], NoItinerary>;
def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
(outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
[], NoItinerary>;
def _word: A64I_LRexs_impl<0b10, opcode, asmstr,
(outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
[], NoItinerary>;
def _dword: A64I_LRexs_impl<0b11, opcode, asmstr,
(outs GPR64:$Rt), (ins GPR64xsp0:$Rn),
[], NoItinerary>;
}
defm LDXR : A64I_LRex<"ldxr", 0b000>;
defm LDAXR : A64I_LRex<"ldaxr", 0b001>;
defm LDAR : A64I_LRex<"ldar", 0b101>;
class acquiring_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
return Ordering == Acquire || Ordering == SequentiallyConsistent;
}]>;
def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
def atomic_load_acquire_64 : acquiring_load<atomic_load_64>;
def : Pat<(atomic_load_acquire_8 i64:$Rn), (LDAR_byte $Rn)>;
def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>;
def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word $Rn)>;
def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>;
//===----------------------------------
// Store-release (no exclusivity)
//===----------------------------------
class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
dag ins, list<dag> pat,
InstrItinClass itin> :
A64I_LDSTex_tn <size,
opcode{2}, 0, opcode{1}, opcode{0},
outs, ins,
!strconcat(asm, "\t$Rt, [$Rn]"),
pat, itin> {
let mayStore = 1;
let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
}
class releasing_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
return Ordering == Release || Ordering == SequentiallyConsistent;
}]>;
def atomic_store_release_8 : releasing_store<atomic_store_8>;
def atomic_store_release_16 : releasing_store<atomic_store_16>;
def atomic_store_release_32 : releasing_store<atomic_store_32>;
def atomic_store_release_64 : releasing_store<atomic_store_64>;
multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
(outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
[(atomic_store_release_8 i64:$Rn, i32:$Rt)],
NoItinerary>;
def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
(outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
[(atomic_store_release_16 i64:$Rn, i32:$Rt)],
NoItinerary>;
def _word: A64I_SLexs_impl<0b10, opcode, asmstr,
(outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
[(atomic_store_release_32 i64:$Rn, i32:$Rt)],
NoItinerary>;
def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
(outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
[(atomic_store_release_64 i64:$Rn, i64:$Rt)],
NoItinerary>;
}
defm STLR : A64I_SLex<"stlr", 0b101, "STLR">;
//===----------------------------------
// Store-exclusive pair (releasing & normal)
//===----------------------------------
class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
dag ins, list<dag> pat,
InstrItinClass itin> :
A64I_LDSTex_stt2n <size,
opcode{2}, 0, opcode{1}, opcode{0},
outs, ins,
!strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"),
pat, itin> {
let mayStore = 1;
}
multiclass A64I_SPex<string asmstr, bits<3> opcode> {
def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
(ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
GPR64xsp0:$Rn),
[], NoItinerary>;
def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
(ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
GPR64xsp0:$Rn),
[], NoItinerary>;
}
defm STXP : A64I_SPex<"stxp", 0b010>;
defm STLXP : A64I_SPex<"stlxp", 0b011>;
//===----------------------------------
// Load-exclusive pair (acquiring & normal)
//===----------------------------------
class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
dag ins, list<dag> pat,
InstrItinClass itin> :
A64I_LDSTex_tt2n <size,
opcode{2}, 1, opcode{1}, opcode{0},
outs, ins,
!strconcat(asm, "\t$Rt, $Rt2, [$Rn]"),
pat, itin>{
let mayLoad = 1;
let DecoderMethod = "DecodeLoadPairExclusiveInstruction";
let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
}
multiclass A64I_LPex<string asmstr, bits<3> opcode> {
def _word: A64I_LPexs_impl<0b10, opcode, asmstr,
(outs GPR32:$Rt, GPR32:$Rt2),
(ins GPR64xsp0:$Rn),
[], NoItinerary>;
def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
(outs GPR64:$Rt, GPR64:$Rt2),
(ins GPR64xsp0:$Rn),
[], NoItinerary>;
}
defm LDXP : A64I_LPex<"ldxp", 0b010>;
defm LDAXP : A64I_LPex<"ldaxp", 0b011>;
//===----------------------------------------------------------------------===//
// Load-store register (unscaled immediate) instructions
//===----------------------------------------------------------------------===//
// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM
//
// and
//
//===----------------------------------------------------------------------===//
// Load-store register (register offset) instructions
//===----------------------------------------------------------------------===//
// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
//
// and
//
//===----------------------------------------------------------------------===//
// Load-store register (unsigned immediate) instructions
//===----------------------------------------------------------------------===//
// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
//
// and
//
//===----------------------------------------------------------------------===//
// Load-store register (immediate post-indexed) instructions
//===----------------------------------------------------------------------===//
// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
//
// and
//
//===----------------------------------------------------------------------===//
// Load-store register (immediate pre-indexed) instructions
//===----------------------------------------------------------------------===//
// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
// Note that patterns are much later on in a completely separate section (they
// need ADRPxi to be defined).
//===-------------------------------
// 1. Various operands needed
//===-------------------------------
//===-------------------------------
// 1.1 Unsigned 12-bit immediate operands
//===-------------------------------
// The addressing mode for these instructions consists of an unsigned 12-bit
// immediate which is scaled by the size of the memory access.
//
// We represent this in the MC layer by two operands:
// 1. A base register.
// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]"
// would have '1' in this field.
// This means that separate functions are needed for converting representations
// which *are* aware of the intended access size.
// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
// know the access size via some means. An isolated operand does not have this
// information unless told from here, which means we need separate tablegen
// Operands for each access size. This multiclass takes care of instantiating
// the correct template functions in the rest of the backend.
//===-------------------------------
// 1.1 Unsigned 12-bit immediate operands
//===-------------------------------
multiclass offsets_uimm12<int MemSize, string prefix> {
def uimm12_asmoperand : AsmOperandClass {
let Name = "OffsetUImm12_" # MemSize;
let PredicateMethod = "isOffsetUImm12<" # MemSize # ">";
let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">";
let DiagnosticType = "LoadStoreUImm12_" # MemSize;
}
// Pattern is really no more than an ImmLeaf, but predicated on MemSize which
// complicates things beyond TableGen's ken.
def uimm12 : Operand<i64>,
ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> {
let ParserMatchClass
= !cast<AsmOperandClass>(prefix # uimm12_asmoperand);
let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">";
let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">";
}
}
defm byte_ : offsets_uimm12<1, "byte_">;
defm hword_ : offsets_uimm12<2, "hword_">;
defm word_ : offsets_uimm12<4, "word_">;
defm dword_ : offsets_uimm12<8, "dword_">;
defm qword_ : offsets_uimm12<16, "qword_">;
//===-------------------------------
// 1.1 Signed 9-bit immediate operands
//===-------------------------------
// The MCInst is expected to store the bit-wise encoding of the value,
// which amounts to lopping off the extended sign bits.
def SDXF_simm9 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32);
}]>;
def simm9_asmoperand : AsmOperandClass {
let Name = "SImm9";
let PredicateMethod = "isSImm<9>";
let RenderMethod = "addSImmOperands<9>";
let DiagnosticType = "LoadStoreSImm9";
}
def simm9 : Operand<i64>,
ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }],
SDXF_simm9> {
let PrintMethod = "printOffsetSImm9Operand";
let ParserMatchClass = simm9_asmoperand;
}
//===-------------------------------
// 1.3 Register offset extensions
//===-------------------------------
// The assembly-syntax for these addressing-modes is:
// [<Xn|SP>, <R><m> {, <extend> {<amount>}}]
//
// The essential semantics are:
// + <amount> is a shift: #<log(transfer size)> or #0
// + <R> can be W or X.
// + If <R> is W, <extend> can be UXTW or SXTW
// + If <R> is X, <extend> can be LSL or SXTX
//
// The trickiest of those constraints is that Rm can be either GPR32 or GPR64,
// which will need separate instructions for LLVM type-consistency. We'll also
// need separate operands, of course.
multiclass regexts<int MemSize, int RmSize, RegisterClass GPR,
string Rm, string prefix> {
def regext_asmoperand : AsmOperandClass {
let Name = "AddrRegExtend_" # MemSize # "_" # Rm;
let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">";
let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">";
let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize;
}
def regext : Operand<i64> {
let PrintMethod
= "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">";
let DecoderMethod = "DecodeAddrRegExtendOperand";
let ParserMatchClass
= !cast<AsmOperandClass>(prefix # regext_asmoperand);
}
}
multiclass regexts_wx<int MemSize, string prefix> {
// Rm is an X-register if LSL or SXTX are specified as the shift.
defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">;
// Rm is a W-register if UXTW or SXTW are specified as the shift.
defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">;
}
defm byte_ : regexts_wx<1, "byte_">;
defm hword_ : regexts_wx<2, "hword_">;
defm word_ : regexts_wx<4, "word_">;
defm dword_ : regexts_wx<8, "dword_">;
defm qword_ : regexts_wx<16, "qword_">;
//===------------------------------
// 2. The instructions themselves.
//===------------------------------
// We have the following instructions to implement:
// | | B | H | W | X |
// |-----------------+-------+-------+-------+--------|
// | unsigned str | STRB | STRH | STR | STR |
// | unsigned ldr | LDRB | LDRH | LDR | LDR |
// | signed ldr to W | LDRSB | LDRSH | - | - |
// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) |
// This will instantiate the LDR/STR instructions you'd expect to use for an
// unsigned datatype (first two rows above) or floating-point register, which is
// reasonably uniform across all access sizes.
//===------------------------------
// 2.1 Regular instructions
//===------------------------------
// This class covers the basic unsigned or irrelevantly-signed loads and stores,
// to general-purpose and floating-point registers.
class AddrParams<string prefix> {
Operand uimm12 = !cast<Operand>(prefix # "_uimm12");
Operand regextWm = !cast<Operand>(prefix # "_Wm_regext");
Operand regextXm = !cast<Operand>(prefix # "_Xm_regext");
}
def byte_addrparams : AddrParams<"byte">;
def hword_addrparams : AddrParams<"hword">;
def word_addrparams : AddrParams<"word">;
def dword_addrparams : AddrParams<"dword">;
def qword_addrparams : AddrParams<"qword">;
multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
bit high_opc, string asmsuffix,
RegisterClass GPR, AddrParams params> {
// Unsigned immediate
def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
(outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
"str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
[], NoItinerary> {
let mayStore = 1;
}
def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
(!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1},
(outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
"ldr" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
[], NoItinerary> {
let mayLoad = 1;
}
def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]",
(!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
// Register offset (four of these: load/store and Wm/Xm).
let mayLoad = 1 in {
def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0,
(outs GPR:$Rt),
(ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
"ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1,
(outs GPR:$Rt),
(ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
"ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
}
def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]",
(!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn,
GPR64:$Rm, 2)>;
let mayStore = 1 in {
def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0,
(outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
params.regextWm:$Ext),
"str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
(outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
params.regextXm:$Ext),
"str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
}
def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
(!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
GPR64:$Rm, 2)>;
// Unaligned immediate
def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
(outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
"stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
[], NoItinerary> {
let mayStore = 1;
}
def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
(!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1},
(outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
"ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
[], NoItinerary> {
let mayLoad = 1;
}
def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]",
(!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
// Post-indexed
def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0},
(outs GPR64xsp:$Rn_wb),
(ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
"str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
[], NoItinerary> {
let Constraints = "$Rn = $Rn_wb";
let mayStore = 1;
// Decoder only needed for unpredictability checking (FIXME).
let DecoderMethod = "DecodeSingleIndexedInstruction";
}
def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1},
(outs GPR:$Rt, GPR64xsp:$Rn_wb),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
[], NoItinerary> {
let mayLoad = 1;
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeSingleIndexedInstruction";
}
// Pre-indexed
def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0},
(outs GPR64xsp:$Rn_wb),
(ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
"str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
[], NoItinerary> {
let Constraints = "$Rn = $Rn_wb";
let mayStore = 1;
// Decoder only needed for unpredictability checking (FIXME).
let DecoderMethod = "DecodeSingleIndexedInstruction";
}
def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1},
(outs GPR:$Rt, GPR64xsp:$Rn_wb),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
[], NoItinerary> {
let mayLoad = 1;
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeSingleIndexedInstruction";
}
}
// STRB/LDRB: First define the instructions
defm LS8
: A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>;
// STRH/LDRH
defm LS16
: A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>;
// STR/LDR to/from a W register
defm LS32
: A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>;
// STR/LDR to/from an X register
defm LS64
: A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
let Predicates = [HasFPARMv8] in {
// STR/LDR to/from a B register
defm LSFP8
: A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
// STR/LDR to/from an H register
defm LSFP16
: A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>;
// STR/LDR to/from an S register
defm LSFP32
: A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>;
// STR/LDR to/from a D register
defm LSFP64
: A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>;
// STR/LDR to/from a Q register
defm LSFP128
: A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128,
qword_addrparams>;
}
//===------------------------------
// 2.3 Signed loads
//===------------------------------
// Byte and half-word signed loads can both go into either an X or a W register,
// so it's worth factoring out. Signed word loads don't fit because there is no
// W version.
multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
string prefix> {
// Unsigned offset
def w : A64I_LSunsigimm<size, 0b0, 0b11,
(outs GPR32:$Rt),
(ins GPR64xsp:$Rn, params.uimm12:$UImm12),
"ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
[], NoItinerary> {
let mayLoad = 1;
}
def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
(!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>;
def x : A64I_LSunsigimm<size, 0b0, 0b10,
(outs GPR64:$Rt),
(ins GPR64xsp:$Rn, params.uimm12:$UImm12),
"ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
[], NoItinerary> {
let mayLoad = 1;
}
def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
(!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>;
// Register offset
let mayLoad = 1 in {
def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0,
(outs GPR32:$Rt),
(ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
"ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1,
(outs GPR32:$Rt),
(ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
"ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0,
(outs GPR64:$Rt),
(ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
"ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1,
(outs GPR64:$Rt),
(ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
"ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
}
def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
(!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn,
GPR64:$Rm, 2)>;
def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
(!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn,
GPR64:$Rm, 2)>;
let mayLoad = 1 in {
// Unaligned offset
def w_U : A64I_LSunalimm<size, 0b0, 0b11,
(outs GPR32:$Rt),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
[], NoItinerary>;
def x_U : A64I_LSunalimm<size, 0b0, 0b10,
(outs GPR64:$Rt),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
[], NoItinerary>;
// Post-indexed
def w_PostInd : A64I_LSpostind<size, 0b0, 0b11,
(outs GPR32:$Rt, GPR64xsp:$Rn_wb),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
[], NoItinerary> {
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeSingleIndexedInstruction";
}
def x_PostInd : A64I_LSpostind<size, 0b0, 0b10,
(outs GPR64:$Rt, GPR64xsp:$Rn_wb),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
[], NoItinerary> {
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeSingleIndexedInstruction";
}
// Pre-indexed
def w_PreInd : A64I_LSpreind<size, 0b0, 0b11,
(outs GPR32:$Rt, GPR64xsp:$Rn_wb),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
[], NoItinerary> {
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeSingleIndexedInstruction";
}
def x_PreInd : A64I_LSpreind<size, 0b0, 0b10,
(outs GPR64:$Rt, GPR64xsp:$Rn_wb),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
[], NoItinerary> {
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeSingleIndexedInstruction";
}
} // let mayLoad = 1
}
// LDRSB
defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">;
// LDRSH
defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">;
// LDRSW: load a 32-bit register, sign-extending to 64-bits.
def LDRSWx
: A64I_LSunsigimm<0b10, 0b0, 0b10,
(outs GPR64:$Rt),
(ins GPR64xsp:$Rn, word_uimm12:$UImm12),
"ldrsw\t$Rt, [$Rn, $UImm12]",
[], NoItinerary> {
let mayLoad = 1;
}
def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
let mayLoad = 1 in {
def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0,
(outs GPR64:$Rt),
(ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext),
"ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1,
(outs GPR64:$Rt),
(ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext),
"ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
}
def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]",
(LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>;
def LDURSWx
: A64I_LSunalimm<0b10, 0b0, 0b10,
(outs GPR64:$Rt),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldursw\t$Rt, [$Rn, $SImm9]",
[], NoItinerary> {
let mayLoad = 1;
}
def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
def LDRSWx_PostInd
: A64I_LSpostind<0b10, 0b0, 0b10,
(outs GPR64:$Rt, GPR64xsp:$Rn_wb),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldrsw\t$Rt, [$Rn], $SImm9",
[], NoItinerary> {
let mayLoad = 1;
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeSingleIndexedInstruction";
}
def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
(outs GPR64:$Rt, GPR64xsp:$Rn_wb),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldrsw\t$Rt, [$Rn, $SImm9]!",
[], NoItinerary> {
let mayLoad = 1;
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeSingleIndexedInstruction";
}
//===------------------------------
// 2.4 Prefetch operations
//===------------------------------
def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs),
(ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12),
"prfm\t$Rt, [$Rn, $UImm12]",
[], NoItinerary> {
let mayLoad = 1;
}
def : InstAlias<"prfm $Rt, [$Rn]",
(PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
let mayLoad = 1 in {
def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs),
(ins prefetch_op:$Rt, GPR64xsp:$Rn,
GPR32:$Rm, dword_Wm_regext:$Ext),
"prfm\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs),
(ins prefetch_op:$Rt, GPR64xsp:$Rn,
GPR64:$Rm, dword_Xm_regext:$Ext),
"prfm\t$Rt, [$Rn, $Rm, $Ext]",
[], NoItinerary>;
}
def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
(PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn,
GPR64:$Rm, 2)>;
def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs),
(ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
"prfum\t$Rt, [$Rn, $SImm9]",
[], NoItinerary> {
let mayLoad = 1;
}
def : InstAlias<"prfum $Rt, [$Rn]",
(PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
//===----------------------------------------------------------------------===//
// Load-store register (unprivileged) instructions
//===----------------------------------------------------------------------===//
// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH
// These instructions very much mirror the "unscaled immediate" loads, but since
// there are no floating-point variants we need to split them out into their own
// section to avoid instantiation of "ldtr d0, [sp]" etc.
multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR,
string prefix> {
def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00,
(outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
"sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
[], NoItinerary> {
let mayStore = 1;
}
def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]",
(!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01,
(outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
"ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
[], NoItinerary> {
let mayLoad = 1;
}
def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]",
(!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
}
// STTRB/LDTRB: First define the instructions
defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">;
// STTRH/LDTRH
defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">;
// STTR/LDTR to/from a W register
defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">;
// STTR/LDTR to/from an X register
defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">;
// Now a class for the signed instructions that can go to either 32 or 64
// bits...
multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> {
let mayLoad = 1 in {
def w : A64I_LSunpriv<size, 0b0, 0b11,
(outs GPR32:$Rt),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
[], NoItinerary>;
def x : A64I_LSunpriv<size, 0b0, 0b10,
(outs GPR64:$Rt),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
[], NoItinerary>;
}
def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
(!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>;
def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
(!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>;
}
// LDTRSB
defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">;
// LDTRSH
defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">;
// And finally LDTRSW which only goes to 64 bits.
def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10,
(outs GPR64:$Rt),
(ins GPR64xsp:$Rn, simm9:$SImm9),
"ldtrsw\t$Rt, [$Rn, $SImm9]",
[], NoItinerary> {
let mayLoad = 1;
}
def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
//===----------------------------------------------------------------------===//
// Load-store register pair (offset) instructions
//===----------------------------------------------------------------------===//
//
// and
//
//===----------------------------------------------------------------------===//
// Load-store register pair (post-indexed) instructions
//===----------------------------------------------------------------------===//
// Contains: STP, LDP, LDPSW
//
// and
//
//===----------------------------------------------------------------------===//
// Load-store register pair (pre-indexed) instructions
//===----------------------------------------------------------------------===//
// Contains: STP, LDP, LDPSW
//
// and
//
//===----------------------------------------------------------------------===//
// Load-store non-temporal register pair (offset) instructions
//===----------------------------------------------------------------------===//
// Contains: STNP, LDNP
// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
// know the access size via some means. An isolated operand does not have this
// information unless told from here, which means we need separate tablegen
// Operands for each access size. This multiclass takes care of instantiating
// the correct template functions in the rest of the backend.
multiclass offsets_simm7<string MemSize, string prefix> {
// The bare signed 7-bit immediate is used in post-indexed instructions, but
// because of the scaling performed a generic "simm7" operand isn't
// appropriate here either.
def simm7_asmoperand : AsmOperandClass {
let Name = "SImm7_Scaled" # MemSize;
let PredicateMethod = "isSImm7Scaled<" # MemSize # ">";
let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">";
let DiagnosticType = "LoadStoreSImm7_" # MemSize;
}
def simm7 : Operand<i64> {
let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">";
let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand");
}
}
defm word_ : offsets_simm7<"4", "word_">;
defm dword_ : offsets_simm7<"8", "dword_">;
defm qword_ : offsets_simm7<"16", "qword_">;
multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
Operand simm7, string prefix> {
def _STR : A64I_LSPoffset<opc, v, 0b0, (outs),
(ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
"stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
let mayStore = 1;
let DecoderMethod = "DecodeLDSTPairInstruction";
}
def : InstAlias<"stp $Rt, $Rt2, [$Rn]",
(!cast<Instruction>(prefix # "_STR") SomeReg:$Rt,
SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
def _LDR : A64I_LSPoffset<opc, v, 0b1,
(outs SomeReg:$Rt, SomeReg:$Rt2),
(ins GPR64xsp:$Rn, simm7:$SImm7),
"ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
let mayLoad = 1;
let DecoderMethod = "DecodeLDSTPairInstruction";
}
def : InstAlias<"ldp $Rt, $Rt2, [$Rn]",
(!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt,
SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0,
(outs GPR64xsp:$Rn_wb),
(ins SomeReg:$Rt, SomeReg:$Rt2,
GPR64xsp:$Rn,
simm7:$SImm7),
"stp\t$Rt, $Rt2, [$Rn], $SImm7",
[], NoItinerary> {
let mayStore = 1;
let Constraints = "$Rn = $Rn_wb";
// Decoder only needed for unpredictability checking (FIXME).
let DecoderMethod = "DecodeLDSTPairInstruction";
}
def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1,
(outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
(ins GPR64xsp:$Rn, simm7:$SImm7),
"ldp\t$Rt, $Rt2, [$Rn], $SImm7",
[], NoItinerary> {
let mayLoad = 1;
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeLDSTPairInstruction";
}
def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
(ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
"stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
[], NoItinerary> {
let mayStore = 1;
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeLDSTPairInstruction";
}
def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1,
(outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
(ins GPR64xsp:$Rn, simm7:$SImm7),
"ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
[], NoItinerary> {
let mayLoad = 1;
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeLDSTPairInstruction";
}
def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
(ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
"stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
let mayStore = 1;
let DecoderMethod = "DecodeLDSTPairInstruction";
}
def : InstAlias<"stnp $Rt, $Rt2, [$Rn]",
(!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt,
SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1,
(outs SomeReg:$Rt, SomeReg:$Rt2),
(ins GPR64xsp:$Rn, simm7:$SImm7),
"ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
let mayLoad = 1;
let DecoderMethod = "DecodeLDSTPairInstruction";
}
def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]",
(!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt,
SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
}
defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
let Predicates = [HasFPARMv8] in {
defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">;
defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7,
"LSFPPair128">;
}
def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
(outs GPR64:$Rt, GPR64:$Rt2),
(ins GPR64xsp:$Rn, word_simm7:$SImm7),
"ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
let mayLoad = 1;
let DecoderMethod = "DecodeLDSTPairInstruction";
}
def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]",
(LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>;
def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
(outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
(ins GPR64xsp:$Rn, word_simm7:$SImm7),
"ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
[], NoItinerary> {
let mayLoad = 1;
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeLDSTPairInstruction";
}
def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
(outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
(ins GPR64xsp:$Rn, word_simm7:$SImm7),
"ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
[], NoItinerary> {
let mayLoad = 1;
let Constraints = "$Rn = $Rn_wb";
let DecoderMethod = "DecodeLDSTPairInstruction";
}
//===----------------------------------------------------------------------===//
// Logical (immediate) instructions
//===----------------------------------------------------------------------===//
// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV
multiclass logical_imm_operands<string prefix, string note,
int size, ValueType VT> {
def _asmoperand : AsmOperandClass {
let Name = "LogicalImm" # note # size;
let PredicateMethod = "isLogicalImm" # note # "<" # size # ">";
let RenderMethod = "addLogicalImmOperands<" # size # ">";
let DiagnosticType = "LogicalSecondSource";
}
def _operand
: Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> {
let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
let PrintMethod = "printLogicalImmOperand<" # size # ">";
let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">";
}
}
defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>;
defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>;
// The mov versions only differ in assembly parsing, where they
// exclude values representable with either MOVZ or MOVN.
defm logical_imm32_mov
: logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>;
defm logical_imm64_mov
: logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>;
multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> {
def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd),
(ins GPR32:$Rn, logical_imm32_operand:$Imm),
!strconcat(asmop, "\t$Rd, $Rn, $Imm"),
[(set i32:$Rd,
(opnode i32:$Rn, logical_imm32_operand:$Imm))],
NoItinerary>;
def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
(ins GPR64:$Rn, logical_imm64_operand:$Imm),
!strconcat(asmop, "\t$Rd, $Rn, $Imm"),
[(set i64:$Rd,
(opnode i64:$Rn, logical_imm64_operand:$Imm))],
NoItinerary>;
}
defm AND : A64I_logimmSizes<0b00, "and", and>;
defm ORR : A64I_logimmSizes<0b01, "orr", or>;
defm EOR : A64I_logimmSizes<0b10, "eor", xor>;
let Defs = [NZCV] in {
def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd),
(ins GPR32:$Rn, logical_imm32_operand:$Imm),
"ands\t$Rd, $Rn, $Imm",
[], NoItinerary>;
def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd),
(ins GPR64:$Rn, logical_imm64_operand:$Imm),
"ands\t$Rd, $Rn, $Imm",
[], NoItinerary>;
}
def : InstAlias<"tst $Rn, $Imm",
(ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>;
def : InstAlias<"tst $Rn, $Imm",
(ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>;
def : InstAlias<"mov $Rd, $Imm",
(ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>;
def : InstAlias<"mov $Rd, $Imm",
(ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>;
//===----------------------------------------------------------------------===//
// Logical (shifted register) instructions
//===----------------------------------------------------------------------===//
// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV
// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS"
// behaves differently for unsigned comparisons, so we defensively only allow
// signed or n/a as the operand. In practice "unsigned greater than 0" is "not
// equal to 0" and LLVM gives us this.
def signed_cond : PatLeaf<(cond), [{
return !isUnsignedIntSetCC(N->get());
}]>;
// These instructions share their "shift" operands with add/sub (shifted
// register instructions). They are defined there.
// N.b. the commutable parameter is just !N. It will be first against the wall
// when the revolution comes.
multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
bit N, bit commutable,
string asmop, SDPatternOperator opfrag, ValueType ty,
RegisterClass GPR, list<Register> defs> {
let isCommutable = commutable, Defs = defs in {
def _lsl : A64I_logicalshift<sf, opc, 0b00, N,
(outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6),
!strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
[(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6))
)],
NoItinerary>;
def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
(outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("lsr_operand_" # ty):$Imm6),
!strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
[(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
!cast<Operand>("lsr_operand_" # ty):$Imm6))
)],
NoItinerary>;
def _asr : A64I_logicalshift<sf, opc, 0b10, N,
(outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("asr_operand_" # ty):$Imm6),
!strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
[(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
!cast<Operand>("asr_operand_" # ty):$Imm6))
)],
NoItinerary>;
def _ror : A64I_logicalshift<sf, opc, 0b11, N,
(outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("ror_operand_" # ty):$Imm6),
!strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
[(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm,
!cast<Operand>("ror_operand_" # ty):$Imm6))
)],
NoItinerary>;
}
def _noshift
: InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
(!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
GPR:$Rm, 0)>;
def : Pat<(opfrag ty:$Rn, ty:$Rm),
(!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
}
multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable,
string asmop, SDPatternOperator opfrag,
list<Register> defs> {
defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N,
commutable, asmop, opfrag, i64, GPR64, defs>;
defm www : logical_shifts<prefix # "www", 0b0, opc, N,
commutable, asmop, opfrag, i32, GPR32, defs>;
}
defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>;
defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>;
defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>;
defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands",
PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs),
[{ (void)N; return false; }]>,
[NZCV]>;
defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic",
PatFrag<(ops node:$lhs, node:$rhs),
(and node:$lhs, (not node:$rhs))>, []>;
defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn",
PatFrag<(ops node:$lhs, node:$rhs),
(or node:$lhs, (not node:$rhs))>, []>;
defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon",
PatFrag<(ops node:$lhs, node:$rhs),
(xor node:$lhs, (not node:$rhs))>, []>;
defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics",
PatFrag<(ops node:$lhs, node:$rhs),
(and node:$lhs, (not node:$rhs)),
[{ (void)N; return false; }]>,
[NZCV]>;
multiclass tst_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in {
def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0,
(outs),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6),
"tst\t$Rn, $Rm, $Imm6",
[(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6)),
0, signed_cond))],
NoItinerary>;
def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
(outs),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("lsr_operand_" # ty):$Imm6),
"tst\t$Rn, $Rm, $Imm6",
[(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm,
!cast<Operand>("lsr_operand_" # ty):$Imm6)),
0, signed_cond))],
NoItinerary>;
def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
(outs),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("asr_operand_" # ty):$Imm6),
"tst\t$Rn, $Rm, $Imm6",
[(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm,
!cast<Operand>("asr_operand_" # ty):$Imm6)),
0, signed_cond))],
NoItinerary>;
def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
(outs),
(ins GPR:$Rn, GPR:$Rm,
!cast<Operand>("ror_operand_" # ty):$Imm6),
"tst\t$Rn, $Rm, $Imm6",
[(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm,
!cast<Operand>("ror_operand_" # ty):$Imm6)),
0, signed_cond))],
NoItinerary>;
}
def _noshift : InstAlias<"tst $Rn, $Rm",
(!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond),
(!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
}
defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>;
defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>;
multiclass mvn_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
let isCommutable = 0, Rn = 0b11111 in {
def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1,
(outs GPR:$Rd),
(ins GPR:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6),
"mvn\t$Rd, $Rm, $Imm6",
[(set ty:$Rd, (not (shl ty:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6)))],
NoItinerary>;
def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
(outs GPR:$Rd),
(ins GPR:$Rm,
!cast<Operand>("lsr_operand_" # ty):$Imm6),
"mvn\t$Rd, $Rm, $Imm6",
[(set ty:$Rd, (not (srl ty:$Rm,
!cast<Operand>("lsr_operand_" # ty):$Imm6)))],
NoItinerary>;
def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
(outs GPR:$Rd),
(ins GPR:$Rm,
!cast<Operand>("asr_operand_" # ty):$Imm6),
"mvn\t$Rd, $Rm, $Imm6",
[(set ty:$Rd, (not (sra ty:$Rm,
!cast<Operand>("asr_operand_" # ty):$Imm6)))],
NoItinerary>;
def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
(outs GPR:$Rd),
(ins GPR:$Rm,
!cast<Operand>("ror_operand_" # ty):$Imm6),
"mvn\t$Rd, $Rm, $Imm6",
[(set ty:$Rd, (not (rotr ty:$Rm,
!cast<Operand>("lsl_operand_" # ty):$Imm6)))],
NoItinerary>;
}
def _noshift : InstAlias<"mvn $Rn, $Rm",
(!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
def : Pat<(not ty:$Rm),
(!cast<Instruction>(prefix # "_lsl") $Rm, 0)>;
}
defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>;
defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>;
def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
//===----------------------------------------------------------------------===//
// Move wide (immediate) instructions
//===----------------------------------------------------------------------===//
// Contains: MOVN, MOVZ, MOVK + MOV aliases
// A wide variety of different relocations are needed for variants of these
// instructions, so it turns out that we need a different operand for all of
// them.
multiclass movw_operands<string prefix, string instname, int width> {
def _imm_asmoperand : AsmOperandClass {
let Name = instname # width # "Shifted" # shift;
let PredicateMethod = "is" # instname # width # "Imm";
let RenderMethod = "addMoveWideImmOperands";
let ParserMethod = "ParseImmWithLSLOperand";
let DiagnosticType = "MOVWUImm16";
}
def _imm : Operand<i64> {
let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
let PrintMethod = "printMoveWideImmOperand";
let EncoderMethod = "getMoveWideImmOpValue";
let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">";
let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
}
}
defm movn32 : movw_operands<"movn32", "MOVN", 32>;
defm movn64 : movw_operands<"movn64", "MOVN", 64>;
defm movz32 : movw_operands<"movz32", "MOVZ", 32>;
defm movz64 : movw_operands<"movz64", "MOVZ", 64>;
defm movk32 : movw_operands<"movk32", "MOVK", 32>;
defm movk64 : movw_operands<"movk64", "MOVK", 64>;
multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit,
dag ins64bit> {
def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit,
!strconcat(asmop, "\t$Rd, $FullImm"),
[], NoItinerary> {
bits<18> FullImm;
let UImm16 = FullImm{15-0};
let Shift = FullImm{17-16};
}
def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit,
!strconcat(asmop, "\t$Rd, $FullImm"),
[], NoItinerary> {
bits<18> FullImm;
let UImm16 = FullImm{15-0};
let Shift = FullImm{17-16};
}
}
let isMoveImm = 1, isReMaterializable = 1,
isAsCheapAsAMove = 1, hasSideEffects = 0 in {
defm MOVN : A64I_movwSizes<0b00, "movn",
(ins movn32_imm:$FullImm),
(ins movn64_imm:$FullImm)>;
// Some relocations are able to convert between a MOVZ and a MOVN. If these
// are applied the instruction must be emitted with the corresponding bits as
// 0, which means a MOVZ needs to override that bit from the default.
let PostEncoderMethod = "fixMOVZ" in
defm MOVZ : A64I_movwSizes<0b10, "movz",
(ins movz32_imm:$FullImm),
(ins movz64_imm:$FullImm)>;
}
let Constraints = "$src = $Rd" in
defm MOVK : A64I_movwSizes<0b11, "movk",
(ins GPR32:$src, movk32_imm:$FullImm),
(ins GPR64:$src, movk64_imm:$FullImm)>;
// And now the "MOV" aliases. These also need their own operands because what
// they accept is completely different to what the base instructions accept.
multiclass movalias_operand<string prefix, string basename,
string immpredicate, int width> {
def _asmoperand : AsmOperandClass {
let Name = basename # width # "MovAlias";
let PredicateMethod
= "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">";
let RenderMethod
= "addMoveWideMovAliasOperands<" # width # ", "
# "A64Imms::" # immpredicate # ">";
}
def _movimm : Operand<i64> {
let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
}
}
defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>;
defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>;
defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>;
defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>;
// FIXME: these are officially canonical aliases, but TableGen is too limited to
// print them at the moment. I believe in this case an "AliasPredicate" method
// will need to be implemented. to allow it, as well as the more generally
// useful handling of non-register, non-constant operands.
class movalias<Instruction INST, RegisterClass GPR, Operand operand>
: InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>;
def : movalias<MOVZwii, GPR32, movz32_movimm>;
def : movalias<MOVZxii, GPR64, movz64_movimm>;
def : movalias<MOVNwii, GPR32, movn32_movimm>;
def : movalias<MOVNxii, GPR64, movn64_movimm>;
def movw_addressref_g0 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<0>">;
def movw_addressref_g1 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<1>">;
def movw_addressref_g2 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<2>">;
def movw_addressref_g3 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<3>">;
def : Pat<(A64WrapperLarge movw_addressref_g3:$G3, movw_addressref_g2:$G2,
movw_addressref_g1:$G1, movw_addressref_g0:$G0),
(MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref_g3:$G3),
movw_addressref_g2:$G2),
movw_addressref_g1:$G1),
movw_addressref_g0:$G0)>;
//===----------------------------------------------------------------------===//
// PC-relative addressing instructions
//===----------------------------------------------------------------------===//
// Contains: ADR, ADRP
def adr_label : Operand<i64> {
let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>";
// This label is a 21-bit offset from PC, unscaled
let PrintMethod = "printLabelOperand<21, 1>";
let ParserMatchClass = label_asmoperand<21, 1>;
let OperandType = "OPERAND_PCREL";
}
def adrp_label_asmoperand : AsmOperandClass {
let Name = "AdrpLabel";
let RenderMethod = "addLabelOperands<21, 4096>";
let DiagnosticType = "Label";
}
def adrp_label : Operand<i64> {
let EncoderMethod = "getAdrpLabelOpValue";
// This label is a 21-bit offset from PC, scaled by the page-size: 4096.
let PrintMethod = "printLabelOperand<21, 4096>";
let ParserMatchClass = adrp_label_asmoperand;
let OperandType = "OPERAND_PCREL";
}
let hasSideEffects = 0 in {
def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label),
"adr\t$Rd, $Label", [], NoItinerary>;
def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label),
"adrp\t$Rd, $Label", [], NoItinerary>;
}
//===----------------------------------------------------------------------===//
// System instructions
//===----------------------------------------------------------------------===//
// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS
// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL
// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values.
def uimm3_asmoperand : AsmOperandClass {
let Name = "UImm3";
let PredicateMethod = "isUImm<3>";
let RenderMethod = "addImmOperands";
let DiagnosticType = "UImm3";
}
def uimm3 : Operand<i32> {
let ParserMatchClass = uimm3_asmoperand;
}
// The HINT alias can accept a simple unsigned 7-bit immediate.
def uimm7_asmoperand : AsmOperandClass {
let Name = "UImm7";
let PredicateMethod = "isUImm<7>";
let RenderMethod = "addImmOperands";
let DiagnosticType = "UImm7";
}
def uimm7 : Operand<i32> {
let ParserMatchClass = uimm7_asmoperand;
}
// Multiclass namedimm is defined with the prefetch operands. Most of these fit
// into the NamedImmMapper scheme well: they either accept a named operand or
// any immediate under a particular value (which may be 0, implying no immediate
// is allowed).
defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">;
defm isb : namedimm<"isb", "A64ISB::ISBMapper">;
defm ic : namedimm<"ic", "A64IC::ICMapper">;
defm dc : namedimm<"dc", "A64DC::DCMapper">;
defm at : namedimm<"at", "A64AT::ATMapper">;
defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">;
// However, MRS and MSR are more complicated for a few reasons:
// * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an
// implementation-defined effect
// * Most registers are shared, but some are read-only or write-only.
// * There is a variant of MSR which accepts the same register name (SPSel),
// but which would have a different encoding.
// In principle these could be resolved in with more complicated subclasses of
// NamedImmMapper, however that imposes an overhead on other "named
// immediates". Both in concrete terms with virtual tables and in unnecessary
// abstraction.
// The solution adopted here is to take the MRS/MSR Mappers out of the usual
// hierarchy (they're not derived from NamedImmMapper) and to add logic for
// their special situation.
def mrs_asmoperand : AsmOperandClass {
let Name = "MRS";
let ParserMethod = "ParseSysRegOperand";
let DiagnosticType = "MRS";
}
def mrs_op : Operand<i32> {
let ParserMatchClass = mrs_asmoperand;
let PrintMethod = "printMRSOperand";
let DecoderMethod = "DecodeMRSOperand";
}
def msr_asmoperand : AsmOperandClass {
let Name = "MSRWithReg";
// Note that SPSel is valid for both this and the pstate operands, but with
// different immediate encodings. This is why these operands provide a string
// AArch64Operand rather than an immediate. The overlap is small enough that
// it could be resolved with hackery now, but who can say in future?
let ParserMethod = "ParseSysRegOperand";
let DiagnosticType = "MSR";
}
def msr_op : Operand<i32> {
let ParserMatchClass = msr_asmoperand;
let PrintMethod = "printMSROperand";
let DecoderMethod = "DecodeMSROperand";
}
def pstate_asmoperand : AsmOperandClass {
let Name = "MSRPState";
// See comment above about parser.
let ParserMethod = "ParseSysRegOperand";
let DiagnosticType = "MSR";
}
def pstate_op : Operand<i32> {
let ParserMatchClass = pstate_asmoperand;
let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>";
let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>";
}
// When <CRn> is specified, an assembler should accept something like "C4", not
// the usual "#4" immediate.
def CRx_asmoperand : AsmOperandClass {
let Name = "CRx";
let PredicateMethod = "isUImm<4>";
let RenderMethod = "addImmOperands";
let ParserMethod = "ParseCRxOperand";
// Diagnostics are handled in all cases by ParseCRxOperand.
}
def CRx : Operand<i32> {
let ParserMatchClass = CRx_asmoperand;
let PrintMethod = "printCRxOperand";
}
// Finally, we can start defining the instructions.
// HINT is straightforward, with a few aliases.
def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7",
[], NoItinerary> {
bits<7> UImm7;
let CRm = UImm7{6-3};
let Op2 = UImm7{2-0};
let Op0 = 0b00;
let Op1 = 0b011;
let CRn = 0b0010;
let Rt = 0b11111;
}
def : InstAlias<"nop", (HINTi 0)>;
def : InstAlias<"yield", (HINTi 1)>;
def : InstAlias<"wfe", (HINTi 2)>;
def : InstAlias<"wfi", (HINTi 3)>;
def : InstAlias<"sev", (HINTi 4)>;
def : InstAlias<"sevl", (HINTi 5)>;
// Quite a few instructions then follow a similar pattern of fixing common
// fields in the bitpattern, we'll define a helper-class for them.
class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2,
Operand operand, string asmop>
: A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"),
[], NoItinerary> {
let Op0 = op0;
let Op1 = op1;
let CRn = crn;
let Op2 = op2;
let Rt = 0b11111;
}
def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">;
def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">;
def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">;
def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">;
def : InstAlias<"clrex", (CLREXi 0b1111)>;
def : InstAlias<"isb", (ISBi 0b1111)>;
// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP
// configurations at least.
def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>;
// Any SYS bitpattern can be represented with a complex and opaque "SYS"
// instruction.
def SYSiccix : A64I_system<0b0, (outs),
(ins uimm3:$Op1, CRx:$CRn, CRx:$CRm,
uimm3:$Op2, GPR64:$Rt),
"sys\t$Op1, $CRn, $CRm, $Op2, $Rt",
[], NoItinerary> {
let Op0 = 0b01;
}
// You can skip the Xt argument whether it makes sense or not for the generic
// SYS instruction.
def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2",
(SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>;
// But many have aliases, which obviously don't fit into
class SYSalias<dag ins, string asmstring>
: A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> {
let isAsmParserOnly = 1;
bits<14> SysOp;
let Op0 = 0b01;
let Op1 = SysOp{13-11};
let CRn = SysOp{10-7};
let CRm = SysOp{6-3};
let Op2 = SysOp{2-0};
}
def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">;
def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> {
let Rt = 0b11111;
}
def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">;
def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">;
def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">;
def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> {
let Rt = 0b11111;
}
def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt),
(ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2),
"sysl\t$Rt, $Op1, $CRn, $CRm, $Op2",
[], NoItinerary> {
let Op0 = 0b01;
}
// The instructions themselves are rather simple for MSR and MRS.
def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt),
"msr\t$SysReg, $Rt", [], NoItinerary> {
bits<16> SysReg;
let Op0 = SysReg{15-14};
let Op1 = SysReg{13-11};
let CRn = SysReg{10-7};
let CRm = SysReg{6-3};
let Op2 = SysReg{2-0};
}
def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg),
"mrs\t$Rt, $SysReg", [], NoItinerary> {
bits<16> SysReg;
let Op0 = SysReg{15-14};
let Op1 = SysReg{13-11};
let CRn = SysReg{10-7};
let CRm = SysReg{6-3};
let Op2 = SysReg{2-0};
}
def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm),
"msr\t$PState, $CRm", [], NoItinerary> {
bits<6> PState;
let Op0 = 0b00;
let Op1 = PState{5-3};
let CRn = 0b0100;
let Op2 = PState{2-0};
let Rt = 0b11111;
}
//===----------------------------------------------------------------------===//
// Test & branch (immediate) instructions
//===----------------------------------------------------------------------===//
// Contains: TBZ, TBNZ
// The bit to test is a simple unsigned 6-bit immediate in the X-register
// versions.
def uimm6 : Operand<i64> {
let ParserMatchClass = uimm6_asmoperand;
}
def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>;
def tbimm_target : Operand<OtherVT> {
let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>";
// This label is a 14-bit offset from PC, scaled by the instruction-width: 4.
let PrintMethod = "printLabelOperand<14, 4>";
let ParserMatchClass = label_wid14_scal4_asmoperand;
let OperandType = "OPERAND_PCREL";
}
def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>;
def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>;
// These instructions correspond to patterns involving "and" with a power of
// two, which we need to be able to select.
def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">;
def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">;
let isBranch = 1, isTerminator = 1 in {
def TBZxii : A64I_TBimm<0b0, (outs),
(ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
"tbz\t$Rt, $Imm, $Label",
[(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
A64eq, bb:$Label)],
NoItinerary>;
def TBNZxii : A64I_TBimm<0b1, (outs),
(ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
"tbnz\t$Rt, $Imm, $Label",
[(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
A64ne, bb:$Label)],
NoItinerary>;
// Note, these instructions overlap with the above 64-bit patterns. This is
// intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both
// do the same thing and are both permitted assembly. They also both have
// sensible DAG patterns.
def TBZwii : A64I_TBimm<0b0, (outs),
(ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
"tbz\t$Rt, $Imm, $Label",
[(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
A64eq, bb:$Label)],
NoItinerary> {
let Imm{5} = 0b0;
}
def TBNZwii : A64I_TBimm<0b1, (outs),
(ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
"tbnz\t$Rt, $Imm, $Label",
[(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
A64ne, bb:$Label)],
NoItinerary> {
let Imm{5} = 0b0;
}
}
//===----------------------------------------------------------------------===//
// Unconditional branch (immediate) instructions
//===----------------------------------------------------------------------===//
// Contains: B, BL
def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>;
def bimm_target : Operand<OtherVT> {
let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>";
// This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
let PrintMethod = "printLabelOperand<26, 4>";
let ParserMatchClass = label_wid26_scal4_asmoperand;
let OperandType = "OPERAND_PCREL";
}
def blimm_target : Operand<i64> {
let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>";
// This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
let PrintMethod = "printLabelOperand<26, 4>";
let ParserMatchClass = label_wid26_scal4_asmoperand;
let OperandType = "OPERAND_PCREL";
}
class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type>
: A64I_Bimm<op, (outs), (ins lbl_type:$Label),
!strconcat(asmop, "\t$Label"), patterns,
NoItinerary>;
let isBranch = 1 in {
def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> {
let isTerminator = 1;
let isBarrier = 1;
}
def BLimm : A64I_BimmImpl<0b1, "bl",
[(AArch64Call tglobaladdr:$Label)], blimm_target> {
let isCall = 1;
let Defs = [X30];
}
}
def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>;
//===----------------------------------------------------------------------===//
// Unconditional branch (register) instructions
//===----------------------------------------------------------------------===//
// Contains: BR, BLR, RET, ERET, DRP.
// Most of the notional opcode fields in the A64I_Breg format are fixed in A64
// at the moment.
class A64I_BregImpl<bits<4> opc,
dag outs, dag ins, string asmstr, list<dag> patterns,
InstrItinClass itin = NoItinerary>
: A64I_Breg<opc, 0b11111, 0b000000, 0b00000,
outs, ins, asmstr, patterns, itin> {
let isBranch = 1;
let isIndirectBranch = 1;
}
// Note that these are not marked isCall or isReturn because as far as LLVM is
// concerned they're not. "ret" is just another jump unless it has been selected
// by LLVM as the function's return.
let isBranch = 1 in {
def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn),
"br\t$Rn", [(brind i64:$Rn)]> {
let isBarrier = 1;
let isTerminator = 1;
}
def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
"blr\t$Rn", [(AArch64Call i64:$Rn)]> {
let isBarrier = 0;
let isCall = 1;
let Defs = [X30];
}
def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn),
"ret\t$Rn", []> {
let isBarrier = 1;
let isTerminator = 1;
let isReturn = 1;
}
// Create a separate pseudo-instruction for codegen to use so that we don't
// flag x30 as used in every function. It'll be restored before the RET by the
// epilogue if it's legitimately used.
def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> {
let isTerminator = 1;
let isBarrier = 1;
let isReturn = 1;
}
def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> {
let Rn = 0b11111;
let isBarrier = 1;
let isTerminator = 1;
let isReturn = 1;
}
def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> {
let Rn = 0b11111;
let isBarrier = 1;
}
}
def RETAlias : InstAlias<"ret", (RETx X30)>;
//===----------------------------------------------------------------------===//
// Address generation patterns
//===----------------------------------------------------------------------===//
// Primary method of address generation for the small/absolute memory model is
// an ADRP/ADR pair:
// ADRP x0, some_variable
// ADD x0, x0, #:lo12:some_variable
//
// The load/store elision of the ADD is accomplished when selecting
// addressing-modes. This just mops up the cases where that doesn't work and we
// really need an address in some register.
// This wrapper applies a LO12 modifier to the address. Otherwise we could just
// use the same address.
class ADRP_ADD<SDNode Wrapper, SDNode addrop>
: Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)),
(ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>;
def : ADRP_ADD<A64WrapperSmall, tblockaddress>;
def : ADRP_ADD<A64WrapperSmall, texternalsym>;
def : ADRP_ADD<A64WrapperSmall, tglobaladdr>;
def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>;
def : ADRP_ADD<A64WrapperSmall, tjumptable>;
//===----------------------------------------------------------------------===//
// GOT access patterns
//===----------------------------------------------------------------------===//
class GOTLoadSmall<SDNode addrfrag>
: Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
(LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
def : GOTLoadSmall<texternalsym>;
def : GOTLoadSmall<tglobaladdr>;
def : GOTLoadSmall<tglobaltlsaddr>;
//===----------------------------------------------------------------------===//
// Tail call handling
//===----------------------------------------------------------------------===//
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in {
def TC_RETURNdi
: PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff),
[(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>;
def TC_RETURNxi
: PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff),
[(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>;
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
Uses = [XSP] in {
def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [],
(Bimm bimm_target:$Label)>;
def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [],
(BRx GPR64:$Rd)>;
}
def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
(TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>;
//===----------------------------------------------------------------------===//
// Thread local storage
//===----------------------------------------------------------------------===//
// This is a pseudo-instruction representing the ".tlsdesccall" directive in
// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the
// current location. It should always be immediately followed by a BLR
// instruction, and is intended solely for relaxation by the linker.
def : Pat<(A64threadpointer), (MRSxi 0xde82)>;
def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> {
let hasSideEffects = 1;
}
def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var),
[(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> {
let isCall = 1;
let Defs = [X30];
}
def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var),
(TLSDESC_BLRx $Rn, texternalsym:$Var)>;
//===----------------------------------------------------------------------===//
// Bitfield patterns
//===----------------------------------------------------------------------===//
def bfi32_lsb_to_immr : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64);
}]>;
def bfi64_lsb_to_immr : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64);
}]>;
def bfi_width_to_imms : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64);
}]>;
// The simpler patterns deal with cases where no AND mask is actually needed
// (either all bits are used or the low 32 bits are used).
let AddedComplexity = 10 in {
def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
(BFIxxii $src, $Rn,
(bfi64_lsb_to_immr (i64 imm:$ImmR)),
(bfi_width_to_imms (i64 imm:$ImmS)))>;
def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS),
(BFIwwii $src, $Rn,
(bfi32_lsb_to_immr (i64 imm:$ImmR)),
(bfi_width_to_imms (i64 imm:$ImmS)))>;
def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
(i64 4294967295)),
(SUBREG_TO_REG (i64 0),
(BFIwwii (EXTRACT_SUBREG $src, sub_32),
(EXTRACT_SUBREG $Rn, sub_32),
(bfi32_lsb_to_immr (i64 imm:$ImmR)),
(bfi_width_to_imms (i64 imm:$ImmS))),
sub_32)>;
}
//===----------------------------------------------------------------------===//
// Miscellaneous patterns
//===----------------------------------------------------------------------===//
// Truncation from 64 to 32-bits just involves renaming your register.
def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>;
// Similarly, extension where we don't care about the high bits is
// just a rename.
def : Pat<(i64 (anyext i32:$val)),
(INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>;
// SELECT instructions providing f128 types need to be handled by a
// pseudo-instruction since the eventual code will need to introduce basic
// blocks and control flow.
def F128CSEL : PseudoInst<(outs FPR128:$Rd),
(ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
[(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> {
let Uses = [NZCV];
let usesCustomInserter = 1;
}
//===----------------------------------------------------------------------===//
// Load/store patterns
//===----------------------------------------------------------------------===//
// There are lots of patterns here, because we need to allow at least three
// parameters to vary independently.
// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ...
// 2. LLVM source: zextloadi8, anyextloadi8, ...
// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ...
//
// The biggest problem turns out to be the address-generation variable. At the
// point of instantiation we need to produce two DAGs, one for the pattern and
// one for the instruction. Doing this at the lowest level of classes doesn't
// work.
//
// Consider the simple uimm12 addressing mode, and the desire to match both (add
// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the
// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or
// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this
// operation, and PatFrags are for selection not output.
//
// As a result, the address-generation patterns are the final
// instantiations. However, we do still need to vary the operand for the address
// further down (At the point we're deciding A64WrapperSmall, we don't know
// the memory width of the operation).
//===------------------------------
// 1. Basic infrastructural defs
//===------------------------------
// First, some simple classes for !foreach and !subst to use:
class Decls {
dag pattern;
}
def decls : Decls;
def ALIGN;
def INST;
def OFFSET;
def SHIFT;
// You can't use !subst on an actual immediate, but you *can* use it on an
// operand record that happens to match a single immediate. So we do.
def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>;
def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>;
def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>;
def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>;
def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>;
// If the low bits of a pointer are known to be 0 then an "or" is just as good
// as addition for computing an offset. This fragment forwards that check for
// TableGen's use.
def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),
[{
return CurDAG->isBaseWithConstantOffset(SDValue(N, 0));
}]>;
// Load/store (unsigned immediate) operations with relocations against global
// symbols (for lo12) are only valid if those symbols have correct alignment
// (since the immediate offset is divided by the access scale, it can't have a
// remainder).
//
// The guaranteed alignment is provided as part of the WrapperSmall
// operation, and checked against one of these.
def any_align : ImmLeaf<i32, [{ (void)Imm; return true; }]>;
def min_align2 : ImmLeaf<i32, [{ return Imm >= 2; }]>;
def min_align4 : ImmLeaf<i32, [{ return Imm >= 4; }]>;
def min_align8 : ImmLeaf<i32, [{ return Imm >= 8; }]>;
def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>;
// "Normal" load/store instructions can be used on atomic operations, provided
// the ordering parameter is at most "monotonic". Anything above that needs
// special handling with acquire/release instructions.
class simple_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
}]>;
def atomic_load_simple_i8 : simple_load<atomic_load_8>;
def atomic_load_simple_i16 : simple_load<atomic_load_16>;
def atomic_load_simple_i32 : simple_load<atomic_load_32>;
def atomic_load_simple_i64 : simple_load<atomic_load_64>;
class simple_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
}]>;
def atomic_store_simple_i8 : simple_store<atomic_store_8>;
def atomic_store_simple_i16 : simple_store<atomic_store_16>;
def atomic_store_simple_i32 : simple_store<atomic_store_32>;
def atomic_store_simple_i64 : simple_store<atomic_store_64>;
//===------------------------------
// 2. UImm12 and SImm9
//===------------------------------
// These instructions have two operands providing the address so they can be
// treated similarly for most purposes.
//===------------------------------
// 2.1 Base patterns covering extend/truncate semantics
//===------------------------------
// Atomic patterns can be shared between integer operations of all sizes, a
// quick multiclass here allows reuse.
multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
dag Offset, dag address, ValueType transty,
ValueType sty> {
def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
(LOAD Base, Offset)>;
def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
(STORE $Rt, Base, Offset)>;
}
// Instructions accessing a memory chunk smaller than a register (or, in a
// pinch, the same size) have a characteristic set of patterns they want to
// match: extending loads and truncating stores. This class deals with the
// sign-neutral version of those patterns.
//
// It will be instantiated across multiple addressing-modes.
multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
dag Base, dag Offset,
dag address, ValueType sty>
: ls_atomic_pats<LOAD, STORE, Base, Offset, address, i32, sty> {
def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>;
def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>;
// For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
// register was actually set.
def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
(SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
(SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
(STORE $Rt, Base, Offset)>;
// For truncating store from 64-bits, we have to manually tell LLVM to
// ignore the high bits of the x register.
def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
(STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
}
// Next come patterns for sign-extending loads.
multiclass load_signed_pats<string T, string U, dag Base, dag Offset,
dag address, ValueType sty> {
def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
(!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>;
def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
(!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>;
}
// and finally "natural-width" loads and stores come next.
multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
dag Offset, dag address, ValueType sty> {
def : Pat<(sty (load address)), (LOAD Base, Offset)>;
def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>;
}
// Integer operations also get atomic instructions to select for.
multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
dag Offset, dag address, ValueType sty>
: ls_neutral_pats<LOAD, STORE, Base, Offset, address, sty>,
ls_atomic_pats<LOAD, STORE, Base, Offset, address, sty, sty>;
//===------------------------------
// 2.2. Addressing-mode instantiations
//===------------------------------
multiclass uimm12_pats<dag address, dag Base, dag Offset> {
defm : ls_small_pats<LS8_LDR, LS8_STR, Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, byte_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, byte_uimm12,
!subst(ALIGN, any_align, decls.pattern))),
i8>;
defm : ls_small_pats<LS16_LDR, LS16_STR, Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, hword_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, hword_uimm12,
!subst(ALIGN, min_align2, decls.pattern))),
i16>;
defm : ls_small_pats<LS32_LDR, LS32_STR, Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, word_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, word_uimm12,
!subst(ALIGN, min_align4, decls.pattern))),
i32>;
defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, word_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, word_uimm12,
!subst(ALIGN, min_align4, decls.pattern))),
i32>;
defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, dword_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, dword_uimm12,
!subst(ALIGN, min_align8, decls.pattern))),
i64>;
defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, hword_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, hword_uimm12,
!subst(ALIGN, min_align2, decls.pattern))),
f16>;
defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, word_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, word_uimm12,
!subst(ALIGN, min_align4, decls.pattern))),
f32>;
defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, dword_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, dword_uimm12,
!subst(ALIGN, min_align8, decls.pattern))),
f64>;
defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, qword_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, qword_uimm12,
!subst(ALIGN, min_align16, decls.pattern))),
f128>;
defm : load_signed_pats<"B", "", Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, byte_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, byte_uimm12,
!subst(ALIGN, any_align, decls.pattern))),
i8>;
defm : load_signed_pats<"H", "", Base,
!foreach(decls.pattern, Offset,
!subst(OFFSET, hword_uimm12, decls.pattern)),
!foreach(decls.pattern, address,
!subst(OFFSET, hword_uimm12,
!subst(ALIGN, min_align2, decls.pattern))),
i16>;
def : Pat<(sextloadi32 !foreach(decls.pattern, address,
!subst(OFFSET, word_uimm12,
!subst(ALIGN, min_align4, decls.pattern)))),
(LDRSWx Base, !foreach(decls.pattern, Offset,
!subst(OFFSET, word_uimm12, decls.pattern)))>;
}
// Straightforward patterns of last resort: a pointer with or without an
// appropriate offset.
defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>;
defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12),
(i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
// The offset could be hidden behind an "or", of course:
defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12),
(i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
// Global addresses under the small-absolute model should use these
// instructions. There are ELF relocations specifically for it.
defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN),
(ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>;
defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12,
ALIGN),
(ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>;
// External symbols that make it this far should also get standard relocations.
defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12,
ALIGN),
(ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>;
defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
(ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
// We also want to use uimm12 instructions for local variables at the moment.
def tframeindex_XFORM : SDNodeXForm<frameindex, [{
int FI = cast<FrameIndexSDNode>(N)->getIndex();
return CurDAG->getTargetFrameIndex(FI, MVT::i64);
}]>;
defm : uimm12_pats<(i64 frameindex:$Rn),
(tframeindex_XFORM tframeindex:$Rn), (i64 0)>;
// These can be much simpler than uimm12 because we don't to change the operand
// type (e.g. LDURB and LDURH take the same operands).
multiclass simm9_pats<dag address, dag Base, dag Offset> {
defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>;
defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>;
defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, i32>;
defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, i64>;
defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, f16>;
defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, f32>;
defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, f64>;
defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address,
f128>;
def : Pat<(i64 (zextloadi32 address)),
(SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>;
def : Pat<(truncstorei32 i64:$Rt, address),
(LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>;
defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>;
def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>;
}
defm : simm9_pats<(add i64:$Rn, simm9:$SImm9),
(i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9),
(i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
//===------------------------------
// 3. Register offset patterns
//===------------------------------
// Atomic patterns can be shared between integer operations of all sizes, a
// quick multiclass here allows reuse.
multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
dag Offset, dag Extend, dag address,
ValueType transty, ValueType sty> {
def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
(LOAD Base, Offset, Extend)>;
def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
(STORE $Rt, Base, Offset, Extend)>;
}
// The register offset instructions take three operands giving the instruction,
// and have an annoying split between instructions where Rm is 32-bit and
// 64-bit. So we need a special hierarchy to describe them. Other than that the
// same operations should be supported as for simm9 and uimm12 addressing.
multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
dag Base, dag Offset, dag Extend,
dag address, ValueType sty>
: ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, i32, sty> {
def : Pat<(!cast<SDNode>(zextload # sty) address),
(LOAD Base, Offset, Extend)>;
def : Pat<(!cast<SDNode>(extload # sty) address),
(LOAD Base, Offset, Extend)>;
// For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
// register was actually set.
def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
(SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
(SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
(STORE $Rt, Base, Offset, Extend)>;
// For truncating store from 64-bits, we have to manually tell LLVM to
// ignore the high bits of the x register.
def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
(STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>;
}
// Next come patterns for sign-extending loads.
multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
dag address, ValueType sty> {
def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
(!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset")
Base, Offset, Extend)>;
def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
(!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset")
Base, Offset, Extend)>;
}
// and finally "natural-width" loads and stores come next.
multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE,
dag Base, dag Offset, dag Extend, dag address,
ValueType sty> {
def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>;
def : Pat<(store sty:$Rt, address),
(STORE $Rt, Base, Offset, Extend)>;
}
multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE,
dag Base, dag Offset, dag Extend, dag address,
ValueType sty>
: ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, sty>,
ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, sty, sty>;
multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
dag Extend> {
defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"),
!cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"),
Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq0, decls.pattern)),
i8>;
defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"),
!cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"),
Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq1, decls.pattern)),
i16>;
defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
!cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq2, decls.pattern)),
i32>;
defm : ro_int_neutral_pats<
!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
!cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq2, decls.pattern)),
i32>;
defm : ro_int_neutral_pats<
!cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"),
!cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"),
Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq3, decls.pattern)),
i64>;
defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"),
!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"),
Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq1, decls.pattern)),
f16>;
defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"),
!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"),
Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq2, decls.pattern)),
f32>;
defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"),
!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"),
Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq3, decls.pattern)),
f64>;
defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"),
!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"),
Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq4, decls.pattern)),
f128>;
defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq0, decls.pattern)),
i8>;
defm : ro_signed_pats<"H", Rm, Base, Offset, Extend,
!foreach(decls.pattern, address,
!subst(SHIFT, imm_eq1, decls.pattern)),
i16>;
def : Pat<(sextloadi32 !foreach(decls.pattern, address,
!subst(SHIFT, imm_eq2, decls.pattern))),
(!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset")
Base, Offset, Extend)>;
}
// Finally we're in a position to tell LLVM exactly what addresses are reachable
// using register-offset instructions. Essentially a base plus a possibly
// extended, possibly shifted (by access size) offset.
defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)),
(i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>;
defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)),
(i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>;
defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)),
(i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>;
defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)),
(i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>;
defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
(i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>;
defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
(i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
//===----------------------------------------------------------------------===//
// Advanced SIMD (NEON) Support
//
include "AArch64InstrNEON.td"
Index: head/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
===================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp (revision 265925)
@@ -1,24 +1,31 @@
//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file deals with any AArch64 specific requirements on object files.
//
//===----------------------------------------------------------------------===//
#include "AArch64TargetObjectFile.h"
using namespace llvm;
void
AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
}
+
+void
+AArch64ElfTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
Index: head/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
===================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h (revision 265924)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h (revision 265925)
@@ -1,31 +1,35 @@
//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file deals with any AArch64 specific requirements on object files.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
- /// AArch64LinuxTargetObjectFile - This implementation is used for linux
- /// AArch64.
+ /// AArch64ElfTargetObjectFile - This implementation is used for ELF
+ /// AArch64 targets.
+ class AArch64ElfTargetObjectFile : public TargetLoweringObjectFileELF {
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ };
+
class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
};
} // end namespace llvm
#endif
Index: head/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
===================================================================
--- head/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp (revision 265925)
@@ -1,711 +1,717 @@
//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The Cortex-A15 processor employs a tracking scheme in its register renaming
// in order to process each instruction's micro-ops speculatively and
// out-of-order with appropriate forwarding. The ARM architecture allows VFP
// instructions to read and write 32-bit S-registers. Each S-register
// corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
//
// There are several instruction patterns which can be used to provide this
// capability which can provide higher performance than other, potentially more
// direct patterns, specifically around when one micro-op reads a D-register
// operand that has recently been written as one or more S-register results.
//
// This file defines a pre-regalloc pass which looks for SPR producers which
// are going to be used by a DPR (or QPR) consumers and creates the more
// optimized access pattern.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "a15-sd-optimizer"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
#include "ARMISelLowering.h"
#include "ARMTargetMachine.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <set>
using namespace llvm;
namespace {
struct A15SDOptimizer : public MachineFunctionPass {
static char ID;
A15SDOptimizer() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
virtual const char *getPassName() const {
return "ARM A15 S->D optimizer";
}
private:
const ARMBaseInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
bool runOnInstruction(MachineInstr *MI);
//
// Instruction builder helpers
//
unsigned createDupLane(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL,
unsigned Reg, unsigned Lane,
bool QPR=false);
unsigned createExtractSubreg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL,
unsigned DReg, unsigned Lane,
const TargetRegisterClass *TRC);
unsigned createVExt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL,
unsigned Ssub0, unsigned Ssub1);
unsigned createRegSequence(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL,
unsigned Reg1, unsigned Reg2);
unsigned createInsertSubreg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL, unsigned DReg, unsigned Lane,
unsigned ToInsert);
unsigned createImplicitDef(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL);
//
// Various property checkers
//
bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
bool hasPartialWrite(MachineInstr *MI);
SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
unsigned getDPRLaneFromSPR(unsigned SReg);
//
// Methods used for getting the definitions of partial registers
//
MachineInstr *elideCopies(MachineInstr *MI);
void elideCopiesAndPHIs(MachineInstr *MI,
SmallVectorImpl<MachineInstr*> &Outs);
//
// Pattern optimization methods
//
unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
unsigned optimizeSDPattern(MachineInstr *MI);
unsigned getPrefSPRLane(unsigned SReg);
//
// Sanitizing method - used to make sure if don't leave dead code around.
//
void eraseInstrWithNoUses(MachineInstr *MI);
//
// A map used to track the changes done by this pass.
//
std::map<MachineInstr*, unsigned> Replacements;
std::set<MachineInstr *> DeadInstr;
};
char A15SDOptimizer::ID = 0;
} // end anonymous namespace
// Returns true if this is a use of a SPR register.
bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
const TargetRegisterClass *TRC) {
if (!MO.isReg())
return false;
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
else
return TRC->contains(Reg);
}
unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
&ARM::DPRRegClass);
if (DReg != ARM::NoRegister) return ARM::ssub_1;
return ARM::ssub_0;
}
// Get the subreg type that is most likely to be coalesced
// for an SPR register that will be used in VDUP32d pseudo.
unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
if (!TRI->isVirtualRegister(SReg))
return getDPRLaneFromSPR(SReg);
MachineInstr *MI = MRI->getVRegDef(SReg);
if (!MI) return ARM::ssub_0;
MachineOperand *MO = MI->findRegisterDefOperand(SReg);
assert(MO->isReg() && "Non register operand found!");
if (!MO) return ARM::ssub_0;
if (MI->isCopy() && usesRegClass(MI->getOperand(1),
&ARM::SPRRegClass)) {
SReg = MI->getOperand(1).getReg();
}
if (TargetRegisterInfo::isVirtualRegister(SReg)) {
if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
return ARM::ssub_0;
}
return getDPRLaneFromSPR(SReg);
}
// MI is known to be dead. Figure out what instructions
// are also made dead by this and mark them for removal.
void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
SmallVector<MachineInstr *, 8> Front;
DeadInstr.insert(MI);
DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
Front.push_back(MI);
while (Front.size() != 0) {
MI = Front.back();
Front.pop_back();
// MI is already known to be dead. We need to see
// if other instructions can also be removed.
for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
MachineOperand &MO = MI->getOperand(i);
if ((!MO.isReg()) || (!MO.isUse()))
continue;
unsigned Reg = MO.getReg();
if (!TRI->isVirtualRegister(Reg))
continue;
MachineOperand *Op = MI->findRegisterDefOperand(Reg);
if (!Op)
continue;
MachineInstr *Def = Op->getParent();
// We don't need to do anything if we have already marked
// this instruction as being dead.
if (DeadInstr.find(Def) != DeadInstr.end())
continue;
// Check if all the uses of this instruction are marked as
// dead. If so, we can also mark this instruction as being
// dead.
bool IsDead = true;
for (unsigned int j = 0; j < Def->getNumOperands(); ++j) {
MachineOperand &MODef = Def->getOperand(j);
if ((!MODef.isReg()) || (!MODef.isDef()))
continue;
unsigned DefReg = MODef.getReg();
if (!TRI->isVirtualRegister(DefReg)) {
IsDead = false;
break;
}
for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg),
EE = MRI->use_end();
II != EE; ++II) {
// We don't care about self references.
if (&*II == Def)
continue;
if (DeadInstr.find(&*II) == DeadInstr.end()) {
IsDead = false;
break;
}
}
}
if (!IsDead) continue;
DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
DeadInstr.insert(Def);
}
}
}
// Creates the more optimized patterns and generally does all the code
// transformations in this pass.
unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
if (MI->isCopy()) {
return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
}
if (MI->isInsertSubreg()) {
unsigned DPRReg = MI->getOperand(1).getReg();
unsigned SPRReg = MI->getOperand(2).getReg();
if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
if (DPRMI && SPRMI) {
// See if the first operand of this insert_subreg is IMPLICIT_DEF
MachineInstr *ECDef = elideCopies(DPRMI);
if (ECDef != 0 && ECDef->isImplicitDef()) {
// Another corner case - if we're inserting something that is purely
// a subreg copy of a DPR, just use that DPR.
MachineInstr *EC = elideCopies(SPRMI);
// Is it a subreg copy of ssub_0?
if (EC && EC->isCopy() &&
EC->getOperand(1).getSubReg() == ARM::ssub_0) {
DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
// Find the thing we're subreg copying out of - is it of the same
// regclass as DPRMI? (i.e. a DPR or QPR).
unsigned FullReg = SPRMI->getOperand(1).getReg();
const TargetRegisterClass *TRC =
MRI->getRegClass(MI->getOperand(1).getReg());
if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
DEBUG(dbgs() << "Subreg copy is compatible - returning ");
DEBUG(dbgs() << PrintReg(FullReg) << "\n");
eraseInstrWithNoUses(MI);
return FullReg;
}
}
return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
}
}
}
return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
}
if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
&ARM::SPRRegClass)) {
// See if all bar one of the operands are IMPLICIT_DEF and insert the
// optimizer pattern accordingly.
unsigned NumImplicit = 0, NumTotal = 0;
unsigned NonImplicitReg = ~0U;
for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
if (!MI->getOperand(I).isReg())
continue;
++NumTotal;
unsigned OpReg = MI->getOperand(I).getReg();
if (!TRI->isVirtualRegister(OpReg))
break;
MachineInstr *Def = MRI->getVRegDef(OpReg);
if (!Def)
break;
if (Def->isImplicitDef())
++NumImplicit;
else
NonImplicitReg = MI->getOperand(I).getReg();
}
if (NumImplicit == NumTotal - 1)
return optimizeAllLanesPattern(MI, NonImplicitReg);
else
return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
}
assert(0 && "Unhandled update pattern!");
return 0;
}
// Return true if this MachineInstr inserts a scalar (SPR) value into
// a D or Q register.
bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
// The only way we can do a partial register update is through a COPY,
// INSERT_SUBREG or REG_SEQUENCE.
if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
return true;
if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
&ARM::SPRRegClass))
return true;
if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
return true;
return false;
}
// Looks through full copies to get the instruction that defines the input
// operand for MI.
MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
if (!MI->isFullCopy())
return MI;
if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
return NULL;
MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
if (!Def)
return NULL;
return elideCopies(Def);
}
// Look through full copies and PHIs to get the set of non-copy MachineInstrs
// that can produce MI.
void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
SmallVectorImpl<MachineInstr*> &Outs) {
// Looking through PHIs may create loops so we need to track what
// instructions we have visited before.
std::set<MachineInstr *> Reached;
SmallVector<MachineInstr *, 8> Front;
Front.push_back(MI);
while (Front.size() != 0) {
MI = Front.back();
Front.pop_back();
// If we have already explored this MachineInstr, ignore it.
if (Reached.find(MI) != Reached.end())
continue;
Reached.insert(MI);
if (MI->isPHI()) {
for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
unsigned Reg = MI->getOperand(I).getReg();
if (!TRI->isVirtualRegister(Reg)) {
continue;
}
MachineInstr *NewMI = MRI->getVRegDef(Reg);
if (!NewMI)
continue;
Front.push_back(NewMI);
}
} else if (MI->isFullCopy()) {
if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
continue;
MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
if (!NewMI)
continue;
Front.push_back(NewMI);
} else {
DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
Outs.push_back(MI);
}
}
}
// Return the DPR virtual registers that are read by this machine instruction
// (if any).
SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
MI->isKill())
return SmallVector<unsigned, 8>();
SmallVector<unsigned, 8> Defs;
for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
if (!usesRegClass(MO, &ARM::DPRRegClass) &&
- !usesRegClass(MO, &ARM::QPRRegClass))
+ !usesRegClass(MO, &ARM::QPRRegClass) &&
+ !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
continue;
Defs.push_back(MO.getReg());
}
return Defs;
}
// Creates a DPR register from an SPR one by using a VDUP.
unsigned
A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL,
unsigned Reg, unsigned Lane, bool QPR) {
unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
&ARM::DPRRegClass);
AddDefaultPred(BuildMI(MBB,
InsertBefore,
DL,
TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
Out)
.addReg(Reg)
.addImm(Lane));
return Out;
}
// Creates a SPR register from a DPR by copying the value in lane 0.
unsigned
A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL,
unsigned DReg, unsigned Lane,
const TargetRegisterClass *TRC) {
unsigned Out = MRI->createVirtualRegister(TRC);
BuildMI(MBB,
InsertBefore,
DL,
TII->get(TargetOpcode::COPY), Out)
.addReg(DReg, 0, Lane);
return Out;
}
// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
unsigned
A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL,
unsigned Reg1, unsigned Reg2) {
unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
BuildMI(MBB,
InsertBefore,
DL,
TII->get(TargetOpcode::REG_SEQUENCE), Out)
.addReg(Reg1)
.addImm(ARM::dsub_0)
.addReg(Reg2)
.addImm(ARM::dsub_1);
return Out;
}
// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
// and merges them into one DPR register.
unsigned
A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL,
unsigned Ssub0, unsigned Ssub1) {
unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
AddDefaultPred(BuildMI(MBB,
InsertBefore,
DL,
TII->get(ARM::VEXTd32), Out)
.addReg(Ssub0)
.addReg(Ssub1)
.addImm(1));
return Out;
}
unsigned
A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL, unsigned DReg, unsigned Lane,
unsigned ToInsert) {
unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
BuildMI(MBB,
InsertBefore,
DL,
TII->get(TargetOpcode::INSERT_SUBREG), Out)
.addReg(DReg)
.addReg(ToInsert)
.addImm(Lane);
return Out;
}
unsigned
A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL) {
unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
BuildMI(MBB,
InsertBefore,
DL,
TII->get(TargetOpcode::IMPLICIT_DEF), Out);
return Out;
}
// This function inserts instructions in order to optimize interactions between
// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
// lanes, and the using VEXT instructions to recompose the result.
unsigned
A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
MachineBasicBlock::iterator InsertPt(MI);
DebugLoc DL = MI->getDebugLoc();
MachineBasicBlock &MBB = *MI->getParent();
InsertPt++;
unsigned Out;
- if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
+ // DPair has the same length as QPR and also has two DPRs as subreg.
+ // Treat DPair as QPR.
+ if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
+ MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
ARM::dsub_0, &ARM::DPRRegClass);
unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
ARM::dsub_1, &ARM::DPRRegClass);
unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
} else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
} else {
assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
"Found unexpected regclass!");
unsigned PrefLane = getPrefSPRLane(Reg);
unsigned Lane;
switch (PrefLane) {
case ARM::ssub_0: Lane = 0; break;
case ARM::ssub_1: Lane = 1; break;
default: llvm_unreachable("Unknown preferred lane!");
}
- bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
+ // Treat DPair as QPR
+ bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
+ usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
Out = createImplicitDef(MBB, InsertPt, DL);
Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
eraseInstrWithNoUses(MI);
}
return Out;
}
bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
// We look for instructions that write S registers that are then read as
// D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
// REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
// merge two SPR values to form a DPR register. In order avoid false
// positives we make sure that there is an SPR producer so we look past
// COPY and PHI nodes to find it.
//
// The best code pattern for when an SPR producer is going to be used by a
// DPR or QPR consumer depends on whether the other lanes of the
// corresponding DPR/QPR are currently defined.
//
// We can handle these efficiently, depending on the type of
// pseudo-instruction that is producing the pattern
//
// * COPY: * VDUP all lanes and merge the results together
// using VEXTs.
//
// * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
// lane, and the other lane(s) of the DPR/QPR register
// that we are inserting in are undefined, use the
// original DPR/QPR value.
// * Otherwise, fall back on the same stategy as COPY.
//
// * REG_SEQUENCE: * If all except one of the input operands are
// IMPLICIT_DEFs, insert the VDUP pattern for just the
// defined input operand
// * Otherwise, fall back on the same stategy as COPY.
//
// First, get all the reads of D-registers done by this instruction.
SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
bool Modified = false;
for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end();
I != E; ++I) {
// Follow the def-use chain for this DPR through COPYs, and also through
// PHIs (which are essentially multi-way COPYs). It is because of PHIs that
// we can end up with multiple defs of this DPR.
SmallVector<MachineInstr *, 8> DefSrcs;
if (!TRI->isVirtualRegister(*I))
continue;
MachineInstr *Def = MRI->getVRegDef(*I);
if (!Def)
continue;
elideCopiesAndPHIs(Def, DefSrcs);
for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(),
EE = DefSrcs.end(); II != EE; ++II) {
MachineInstr *MI = *II;
// If we've already analyzed and replaced this operand, don't do
// anything.
if (Replacements.find(MI) != Replacements.end())
continue;
// Now, work out if the instruction causes a SPR->DPR dependency.
if (!hasPartialWrite(MI))
continue;
// Collect all the uses of this MI's DPR def for updating later.
SmallVector<MachineOperand*, 8> Uses;
unsigned DPRDefReg = MI->getOperand(0).getReg();
for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
E = MRI->use_end(); I != E; ++I)
Uses.push_back(&I.getOperand());
// We can optimize this.
unsigned NewReg = optimizeSDPattern(MI);
if (NewReg != 0) {
Modified = true;
for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
// Make sure to constrain the register class of the new register to
// match what we're replacing. Otherwise we can optimize a DPR_VFP2
// reference into a plain DPR, and that will end poorly. NewReg is
// always virtual here, so there will always be a matching subclass
// to find.
MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg()));
DEBUG(dbgs() << "Replacing operand "
<< **I << " with "
<< PrintReg(NewReg) << "\n");
(*I)->substVirtReg(NewReg, 0, *TRI);
}
}
Replacements[MI] = NewReg;
}
}
return Modified;
}
bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
TRI = Fn.getTarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
bool Modified = false;
DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
DeadInstr.clear();
Replacements.clear();
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
MI != ME;) {
Modified |= runOnInstruction(MI++);
}
}
for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
E = DeadInstr.end();
I != E; ++I) {
(*I)->eraseFromParent();
}
return Modified;
}
FunctionPass *llvm::createA15SDOptimizerPass() {
return new A15SDOptimizer();
}
Index: head/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
===================================================================
--- head/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp (revision 265925)
@@ -1,4350 +1,4352 @@
//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the Base ARM implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMFeatures.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#define GET_INSTRINFO_CTOR_DTOR
#include "ARMGenInstrInfo.inc"
using namespace llvm;
static cl::opt<bool>
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
static cl::opt<bool>
WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
cl::desc("Widen ARM vmovs to vmovd when possible"));
static cl::opt<unsigned>
SwiftPartialUpdateClearance("swift-partial-update-clearance",
cl::Hidden, cl::init(12),
cl::desc("Clearance before partial register updates"));
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
uint16_t MLxOpc; // MLA / MLS opcode
uint16_t MulOpc; // Expanded multiplication opcode
uint16_t AddSubOpc; // Expanded add / sub opcode
bool NegAcc; // True if the acc is negated before the add / sub.
bool HasLane; // True if instruction has an extra "lane" operand.
};
static const ARM_MLxEntry ARM_MLxTable[] = {
// MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
// fp scalar ops
{ ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
{ ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
{ ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
{ ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
{ ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
{ ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
{ ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
{ ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
// fp SIMD ops
{ ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
{ ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
{ ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
{ ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
{ ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
{ ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
{ ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
{ ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
};
ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
: ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
Subtarget(STI) {
for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
assert(false && "Duplicated entries?");
MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
}
}
// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
// currently defaults to no prepass hazard recognizer.
ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetHazardRecognizer(const TargetMachine *TM,
const ScheduleDAG *DAG) const {
if (usePreRAHazardRecognizer()) {
const InstrItineraryData *II = TM->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
}
return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
}
ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
if (Subtarget.isThumb2() || Subtarget.hasVFP2())
return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
MachineInstr *
ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const {
// FIXME: Thumb2 support.
if (!EnableARM3Addr)
return NULL;
MachineInstr *MI = MBBI;
MachineFunction &MF = *MI->getParent()->getParent();
uint64_t TSFlags = MI->getDesc().TSFlags;
bool isPre = false;
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
default: return NULL;
case ARMII::IndexModePre:
isPre = true;
break;
case ARMII::IndexModePost:
break;
}
// Try splitting an indexed load/store to an un-indexed one plus an add/sub
// operation.
unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
if (MemOpc == 0)
return NULL;
MachineInstr *UpdateMI = NULL;
MachineInstr *MemMI = NULL;
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
bool isLoad = !MI->mayStore();
const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
const MachineOperand &Base = MI->getOperand(2);
const MachineOperand &Offset = MI->getOperand(NumOps-3);
unsigned WBReg = WB.getReg();
unsigned BaseReg = Base.getReg();
unsigned OffReg = Offset.getReg();
unsigned OffImm = MI->getOperand(NumOps-2).getImm();
ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
switch (AddrMode) {
default: llvm_unreachable("Unknown indexed op!");
case ARMII::AddrMode2: {
bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
unsigned Amt = ARM_AM::getAM2Offset(OffImm);
if (OffReg == 0) {
if (ARM_AM::getSOImmVal(Amt) == -1)
// Can't encode it in a so_imm operand. This transformation will
// add more than 1 instruction. Abandon!
return NULL;
UpdateMI = BuildMI(MF, MI->getDebugLoc(),
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg).addImm(Amt)
.addImm(Pred).addReg(0).addReg(0);
} else if (Amt != 0) {
ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
UpdateMI = BuildMI(MF, MI->getDebugLoc(),
get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
.addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
.addImm(Pred).addReg(0).addReg(0);
} else
UpdateMI = BuildMI(MF, MI->getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg).addReg(OffReg)
.addImm(Pred).addReg(0).addReg(0);
break;
}
case ARMII::AddrMode3 : {
bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
unsigned Amt = ARM_AM::getAM3Offset(OffImm);
if (OffReg == 0)
// Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
UpdateMI = BuildMI(MF, MI->getDebugLoc(),
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg).addImm(Amt)
.addImm(Pred).addReg(0).addReg(0);
else
UpdateMI = BuildMI(MF, MI->getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg).addReg(OffReg)
.addImm(Pred).addReg(0).addReg(0);
break;
}
}
std::vector<MachineInstr*> NewMIs;
if (isPre) {
if (isLoad)
MemMI = BuildMI(MF, MI->getDebugLoc(),
get(MemOpc), MI->getOperand(0).getReg())
.addReg(WBReg).addImm(0).addImm(Pred);
else
MemMI = BuildMI(MF, MI->getDebugLoc(),
get(MemOpc)).addReg(MI->getOperand(1).getReg())
.addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
NewMIs.push_back(MemMI);
NewMIs.push_back(UpdateMI);
} else {
if (isLoad)
MemMI = BuildMI(MF, MI->getDebugLoc(),
get(MemOpc), MI->getOperand(0).getReg())
.addReg(BaseReg).addImm(0).addImm(Pred);
else
MemMI = BuildMI(MF, MI->getDebugLoc(),
get(MemOpc)).addReg(MI->getOperand(1).getReg())
.addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
if (WB.isDead())
UpdateMI->getOperand(0).setIsDead();
NewMIs.push_back(UpdateMI);
NewMIs.push_back(MemMI);
}
// Transfer LiveVariables states, kill / dead info.
if (LV) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
unsigned Reg = MO.getReg();
LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
if (MO.isDef()) {
MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
if (MO.isDead())
LV->addVirtualRegisterDead(Reg, NewMI);
}
if (MO.isUse() && MO.isKill()) {
for (unsigned j = 0; j < 2; ++j) {
// Look at the two new MI's in reverse order.
MachineInstr *NewMI = NewMIs[j];
if (!NewMI->readsRegister(Reg))
continue;
LV->addVirtualRegisterKilled(Reg, NewMI);
if (VI.removeKill(MI))
VI.Kills.push_back(NewMI);
break;
}
}
}
}
}
MFI->insert(MBBI, NewMIs[1]);
MFI->insert(MBBI, NewMIs[0]);
return NewMIs[0];
}
// Branch analysis.
bool
ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
TBB = 0;
FBB = 0;
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
return false; // Empty blocks are easy.
--I;
// Walk backwards from the end of the basic block until the branch is
// analyzed or we give up.
while (isPredicated(I) || I->isTerminator()) {
// Flag to be raised on unanalyzeable instructions. This is useful in cases
// where we want to clean up on the end of the basic block before we bail
// out.
bool CantAnalyze = false;
// Skip over DEBUG values and predicated nonterminators.
while (I->isDebugValue() || !I->isTerminator()) {
if (I == MBB.begin())
return false;
--I;
}
if (isIndirectBranchOpcode(I->getOpcode()) ||
isJumpTableBranchOpcode(I->getOpcode())) {
// Indirect branches and jump tables can't be analyzed, but we still want
// to clean up any instructions at the tail of the basic block.
CantAnalyze = true;
} else if (isUncondBranchOpcode(I->getOpcode())) {
TBB = I->getOperand(0).getMBB();
} else if (isCondBranchOpcode(I->getOpcode())) {
// Bail out if we encounter multiple conditional branches.
if (!Cond.empty())
return true;
assert(!FBB && "FBB should have been null.");
FBB = TBB;
TBB = I->getOperand(0).getMBB();
Cond.push_back(I->getOperand(1));
Cond.push_back(I->getOperand(2));
} else if (I->isReturn()) {
// Returns can't be analyzed, but we should run cleanup.
CantAnalyze = !isPredicated(I);
} else {
// We encountered other unrecognized terminator. Bail out immediately.
return true;
}
// Cleanup code - to be run for unpredicated unconditional branches and
// returns.
if (!isPredicated(I) &&
(isUncondBranchOpcode(I->getOpcode()) ||
isIndirectBranchOpcode(I->getOpcode()) ||
isJumpTableBranchOpcode(I->getOpcode()) ||
I->isReturn())) {
// Forget any previous condition branch information - it no longer applies.
Cond.clear();
FBB = 0;
// If we can modify the function, delete everything below this
// unconditional branch.
if (AllowModify) {
MachineBasicBlock::iterator DI = llvm::next(I);
while (DI != MBB.end()) {
MachineInstr *InstToDelete = DI;
++DI;
InstToDelete->eraseFromParent();
}
}
}
if (CantAnalyze)
return true;
if (I == MBB.begin())
return false;
--I;
}
// We made it past the terminators without bailing out - we must have
// analyzed this branch successfully.
return false;
}
unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
while (I->isDebugValue()) {
if (I == MBB.begin())
return 0;
--I;
}
if (!isUncondBranchOpcode(I->getOpcode()) &&
!isCondBranchOpcode(I->getOpcode()))
return 0;
// Remove the branch.
I->eraseFromParent();
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
if (!isCondBranchOpcode(I->getOpcode()))
return 1;
// Remove the branch.
I->eraseFromParent();
return 2;
}
unsigned
ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const {
ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
int BOpc = !AFI->isThumbFunction()
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
int BccOpc = !AFI->isThumbFunction()
? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
"ARM branch conditions have two components!");
if (FBB == 0) {
if (Cond.empty()) { // Unconditional branch?
if (isThumb)
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else
BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
return 1;
}
// Two-way conditional branch.
BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
if (isThumb)
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
}
bool ARMBaseInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
Cond[0].setImm(ARMCC::getOppositeCondition(CC));
return false;
}
bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
if (MI->isBundle()) {
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
int PIdx = I->findFirstPredOperandIdx();
if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
return true;
}
return false;
}
int PIdx = MI->findFirstPredOperandIdx();
return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
}
bool ARMBaseInstrInfo::
PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
unsigned Opc = MI->getOpcode();
if (isUncondBranchOpcode(Opc)) {
MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg());
return true;
}
int PIdx = MI->findFirstPredOperandIdx();
if (PIdx != -1) {
MachineOperand &PMO = MI->getOperand(PIdx);
PMO.setImm(Pred[0].getImm());
MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
return true;
}
return false;
}
bool ARMBaseInstrInfo::
SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const {
if (Pred1.size() > 2 || Pred2.size() > 2)
return false;
ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
if (CC1 == CC2)
return true;
switch (CC1) {
default:
return false;
case ARMCC::AL:
return true;
case ARMCC::HS:
return CC2 == ARMCC::HI;
case ARMCC::LS:
return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
case ARMCC::GE:
return CC2 == ARMCC::GT;
case ARMCC::LE:
return CC2 == ARMCC::LT;
}
}
bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
bool Found = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
(MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
Pred.push_back(MO);
Found = true;
}
}
return Found;
}
/// isPredicable - Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
if (!MI->isPredicable())
return false;
ARMFunctionInfo *AFI =
MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
if (AFI->isThumb2Function()) {
if (getSubtarget().restrictIT())
return isV8EligibleForIT(MI);
} else { // non-Thumb
if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
return false;
}
return true;
}
/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
LLVM_ATTRIBUTE_NOINLINE
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
unsigned JTI);
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
unsigned JTI) {
assert(JTI < JT.size());
return JT[JTI].MBBs.size();
}
/// GetInstSize - Return the size of the specified MachineInstr.
///
unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
const MachineBasicBlock &MBB = *MI->getParent();
const MachineFunction *MF = MBB.getParent();
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
const MCInstrDesc &MCID = MI->getDesc();
if (MCID.getSize())
return MCID.getSize();
// If this machine instr is an inline asm, measure it.
if (MI->getOpcode() == ARM::INLINEASM)
return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
if (MI->isLabel())
return 0;
unsigned Opc = MI->getOpcode();
switch (Opc) {
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::DBG_VALUE:
return 0;
case TargetOpcode::BUNDLE:
return getInstBundleLength(MI);
case ARM::MOVi16_ga_pcrel:
case ARM::MOVTi16_ga_pcrel:
case ARM::t2MOVi16_ga_pcrel:
case ARM::t2MOVTi16_ga_pcrel:
return 4;
case ARM::MOVi32imm:
case ARM::t2MOVi32imm:
return 8;
case ARM::CONSTPOOL_ENTRY:
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
return MI->getOperand(2).getImm();
case ARM::Int_eh_sjlj_longjmp:
return 16;
case ARM::tInt_eh_sjlj_longjmp:
return 10;
case ARM::Int_eh_sjlj_setjmp:
case ARM::Int_eh_sjlj_setjmp_nofp:
return 20;
case ARM::tInt_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
return 12;
case ARM::BR_JTr:
case ARM::BR_JTm:
case ARM::BR_JTadd:
case ARM::tBR_JTr:
case ARM::t2BR_JT:
case ARM::t2TBB_JT:
case ARM::t2TBH_JT: {
// These are jumptable branches, i.e. a branch followed by an inlined
// jumptable. The size is 4 + 4 * number of entries. For TBB, each
// entry is one byte; TBH two byte each.
unsigned EntrySize = (Opc == ARM::t2TBB_JT)
? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
unsigned NumOps = MCID.getNumOperands();
MachineOperand JTOP =
MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
unsigned JTI = JTOP.getIndex();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
assert(MJTI != 0);
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
assert(JTI < JT.size());
// Thumb instructions are 2 byte aligned, but JT entries are 4 byte
// 4 aligned. The assembler / linker may add 2 byte padding just before
// the JT entries. The size does not include this padding; the
// constant islands pass does separate bookkeeping for it.
// FIXME: If we know the size of the function is less than (1 << 16) *2
// bytes, we can use 16-bit entries instead. Then there won't be an
// alignment issue.
unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
unsigned NumEntries = getNumJTEntries(JT, JTI);
if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
// Make sure the instruction that follows TBB is 2-byte aligned.
// FIXME: Constant island pass should insert an "ALIGN" instruction
// instead.
++NumEntries;
return NumEntries * EntrySize + InstSize;
}
default:
// Otherwise, pseudo-instruction sizes are zero.
return 0;
}
}
unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
unsigned Size = 0;
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
assert(!I->isBundle() && "No nested bundle!");
Size += GetInstSizeInBytes(&*I);
}
return Size;
}
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
bool GPRDest = ARM::GPRRegClass.contains(DestReg);
bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
if (GPRDest && GPRSrc) {
AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))));
return;
}
bool SPRDest = ARM::SPRRegClass.contains(DestReg);
bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
unsigned Opc = 0;
if (SPRDest && SPRSrc)
Opc = ARM::VMOVS;
else if (GPRDest && SPRSrc)
Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VORRq;
if (Opc) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
MIB.addReg(SrcReg, getKillRegState(KillSrc));
if (Opc == ARM::VORRq)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
AddDefaultPred(MIB);
return;
}
// Handle register classes that require multiple instructions.
unsigned BeginIdx = 0;
unsigned SubRegs = 0;
int Spacing = 1;
// Use VORRq when possible.
if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VORRq;
BeginIdx = ARM::qsub_0;
SubRegs = 2;
} else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VORRq;
BeginIdx = ARM::qsub_0;
SubRegs = 4;
// Fall back to VMOVD.
} else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 2;
} else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 3;
} else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 4;
} else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
BeginIdx = ARM::gsub_0;
SubRegs = 2;
} else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 2;
Spacing = 2;
} else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 3;
Spacing = 2;
} else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 4;
Spacing = 2;
}
assert(Opc && "Impossible reg-to-reg copy");
const TargetRegisterInfo *TRI = &getRegisterInfo();
MachineInstrBuilder Mov;
// Copy register tuples backward when the first Dest reg overlaps with SrcReg.
if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
Spacing = -Spacing;
}
#ifndef NDEBUG
SmallSet<unsigned, 4> DstRegs;
#endif
for (unsigned i = 0; i != SubRegs; ++i) {
unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
assert(Dst && Src && "Bad sub-register");
#ifndef NDEBUG
assert(!DstRegs.count(Src) && "destructive vector copy");
DstRegs.insert(Dst);
#endif
Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
// VORR takes two source operands.
if (Opc == ARM::VORRq)
Mov.addReg(Src);
Mov = AddDefaultPred(Mov);
// MOVr can set CC.
if (Opc == ARM::MOVr)
Mov = AddDefaultCC(Mov);
}
// Add implicit super-register defs and kills to the last instruction.
Mov->addRegisterDefined(DestReg, TRI);
if (KillSrc)
Mov->addRegisterKilled(SrcReg, TRI);
}
const MachineInstrBuilder &
ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
unsigned SubIdx, unsigned State,
const TargetRegisterInfo *TRI) const {
if (!SubIdx)
return MIB.addReg(Reg, State);
if (TargetRegisterInfo::isPhysicalRegister(Reg))
return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
return MIB.addReg(Reg, State, SubIdx);
}
void ARMBaseInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
Align);
switch (RC->getSize()) {
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else
llvm_unreachable("Unknown reg class!");
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
if (Subtarget.hasV5TEOps()) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
AddDefaultPred(MIB);
} else {
// Fallback to STM instruction, which has existed since the dawn of
// time.
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
.addFrameIndex(FI).addMemOperand(MMO));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addMemOperand(MMO));
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
} else {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
.addFrameIndex(FI))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
} else {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
.addFrameIndex(FI))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
.addFrameIndex(FI))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
} else
llvm_unreachable("Unknown reg class!");
break;
default:
llvm_unreachable("Unknown reg class!");
}
}
unsigned
ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
default: break;
case ARM::STRrs:
case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
if (MI->getOperand(1).isFI() &&
MI->getOperand(2).isReg() &&
MI->getOperand(3).isImm() &&
MI->getOperand(2).getReg() == 0 &&
MI->getOperand(3).getImm() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
break;
case ARM::STRi12:
case ARM::t2STRi12:
case ARM::tSTRspi:
case ARM::VSTRD:
case ARM::VSTRS:
if (MI->getOperand(1).isFI() &&
MI->getOperand(2).isImm() &&
MI->getOperand(2).getImm() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
break;
case ARM::VST1q64:
case ARM::VST1d64TPseudo:
case ARM::VST1d64QPseudo:
if (MI->getOperand(0).isFI() &&
MI->getOperand(2).getSubReg() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
return MI->getOperand(2).getReg();
}
break;
case ARM::VSTMQIA:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
break;
}
return 0;
}
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
void ARMBaseInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
Align);
switch (RC->getSize()) {
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else
llvm_unreachable("Unknown reg class!");
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB;
if (Subtarget.hasV5TEOps()) {
MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
AddDefaultPred(MIB);
} else {
// Fallback to LDM instruction, which has existed since the dawn of
// time.
MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
.addFrameIndex(FI).addMemOperand(MMO));
MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
}
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
break;
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
.addFrameIndex(FI)
.addMemOperand(MMO));
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI)
.addMemOperand(MMO));
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
break;
default:
llvm_unreachable("Unknown regclass!");
}
}
unsigned
ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
if (MI->getOperand(1).isFI() &&
MI->getOperand(2).isReg() &&
MI->getOperand(3).isImm() &&
MI->getOperand(2).getReg() == 0 &&
MI->getOperand(3).getImm() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
break;
case ARM::LDRi12:
case ARM::t2LDRi12:
case ARM::tLDRspi:
case ARM::VLDRD:
case ARM::VLDRS:
if (MI->getOperand(1).isFI() &&
MI->getOperand(2).isImm() &&
MI->getOperand(2).getImm() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
break;
case ARM::VLD1q64:
case ARM::VLD1d64TPseudo:
case ARM::VLD1d64QPseudo:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
break;
case ARM::VLDMQIA:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
break;
}
return 0;
}
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
// This hook gets to expand COPY instructions before they become
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
return false;
// Look for a copy between even S-registers. That is where we keep floats
// when using NEON v2f32 instructions for f32 arithmetic.
unsigned DstRegS = MI->getOperand(0).getReg();
unsigned SrcRegS = MI->getOperand(1).getReg();
if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
return false;
const TargetRegisterInfo *TRI = &getRegisterInfo();
unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
&ARM::DPRRegClass);
unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
&ARM::DPRRegClass);
if (!DstRegD || !SrcRegD)
return false;
// We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
// legal if the COPY already defines the full DstRegD, and it isn't a
// sub-register insertion.
if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
return false;
// A dead copy shouldn't show up here, but reject it just in case.
if (MI->getOperand(0).isDead())
return false;
// All clear, widen the COPY.
DEBUG(dbgs() << "widening: " << *MI);
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
// Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
// or some other super-register.
int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
if (ImpDefIdx != -1)
MI->RemoveOperand(ImpDefIdx);
// Change the opcode and operands.
MI->setDesc(get(ARM::VMOVD));
MI->getOperand(0).setReg(DstRegD);
MI->getOperand(1).setReg(SrcRegD);
AddDefaultPred(MIB);
// We are now reading SrcRegD instead of SrcRegS. This may upset the
// register scavenger and machine verifier, so we need to indicate that we
// are reading an undefined value from SrcRegD, but a proper value from
// SrcRegS.
MI->getOperand(1).setIsUndef();
MIB.addReg(SrcRegS, RegState::Implicit);
// SrcRegD may actually contain an unrelated value in the ssub_1
// sub-register. Don't kill it. Only kill the ssub_0 sub-register.
if (MI->getOperand(1).isKill()) {
MI->getOperand(1).setIsKill(false);
MI->addRegisterKilled(SrcRegS, TRI, true);
}
DEBUG(dbgs() << "replaced by: " << *MI);
return true;
}
/// Create a copy of a const pool value. Update CPI to the new index and return
/// the label UID.
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
MachineConstantPool *MCP = MF.getConstantPool();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
assert(MCPE.isMachineConstantPoolEntry() &&
"Expecting a machine constantpool entry!");
ARMConstantPoolValue *ACPV =
static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
unsigned PCLabelId = AFI->createPICLabelUId();
ARMConstantPoolValue *NewCPV = 0;
// FIXME: The below assumes PIC relocation model and that the function
// is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
// zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
// instructions, so that's probably OK, but is PIC always correct when
// we get here?
if (ACPV->isGlobalValue())
NewCPV = ARMConstantPoolConstant::
Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
ARMCP::CPValue, 4);
else if (ACPV->isExtSymbol())
NewCPV = ARMConstantPoolSymbol::
Create(MF.getFunction()->getContext(),
cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
else if (ACPV->isBlockAddress())
NewCPV = ARMConstantPoolConstant::
Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
ARMCP::CPBlockAddress, 4);
else if (ACPV->isLSDA())
NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
ARMCP::CPLSDA, 4);
else if (ACPV->isMachineBasicBlock())
NewCPV = ARMConstantPoolMBB::
Create(MF.getFunction()->getContext(),
cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
return PCLabelId;
}
void ARMBaseInstrInfo::
reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
const TargetRegisterInfo &TRI) const {
unsigned Opcode = Orig->getOpcode();
switch (Opcode) {
default: {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
break;
}
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
MachineFunction &MF = *MBB.getParent();
unsigned CPI = Orig->getOperand(1).getIndex();
unsigned PCLabelId = duplicateCPV(MF, CPI);
MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
DestReg)
.addConstantPoolIndex(CPI).addImm(PCLabelId);
MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
break;
}
}
}
MachineInstr *
ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
switch(Orig->getOpcode()) {
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
unsigned CPI = Orig->getOperand(1).getIndex();
unsigned PCLabelId = duplicateCPV(MF, CPI);
Orig->getOperand(1).setIndex(CPI);
Orig->getOperand(2).setImm(PCLabelId);
break;
}
}
return MI;
}
bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const {
int Opcode = MI0->getOpcode();
if (Opcode == ARM::t2LDRpci ||
Opcode == ARM::t2LDRpci_pic ||
Opcode == ARM::tLDRpci ||
Opcode == ARM::tLDRpci_pic ||
Opcode == ARM::MOV_ga_dyn ||
Opcode == ARM::MOV_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_dyn ||
Opcode == ARM::t2MOV_ga_pcrel) {
if (MI1->getOpcode() != Opcode)
return false;
if (MI0->getNumOperands() != MI1->getNumOperands())
return false;
const MachineOperand &MO0 = MI0->getOperand(1);
const MachineOperand &MO1 = MI1->getOperand(1);
if (MO0.getOffset() != MO1.getOffset())
return false;
if (Opcode == ARM::MOV_ga_dyn ||
Opcode == ARM::MOV_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_dyn ||
Opcode == ARM::t2MOV_ga_pcrel)
// Ignore the PC labels.
return MO0.getGlobal() == MO1.getGlobal();
const MachineFunction *MF = MI0->getParent()->getParent();
const MachineConstantPool *MCP = MF->getConstantPool();
int CPI0 = MO0.getIndex();
int CPI1 = MO1.getIndex();
const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
if (isARMCP0 && isARMCP1) {
ARMConstantPoolValue *ACPV0 =
static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
ARMConstantPoolValue *ACPV1 =
static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
return ACPV0->hasSameValue(ACPV1);
} else if (!isARMCP0 && !isARMCP1) {
return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
}
return false;
} else if (Opcode == ARM::PICLDR) {
if (MI1->getOpcode() != Opcode)
return false;
if (MI0->getNumOperands() != MI1->getNumOperands())
return false;
unsigned Addr0 = MI0->getOperand(1).getReg();
unsigned Addr1 = MI1->getOperand(1).getReg();
if (Addr0 != Addr1) {
if (!MRI ||
!TargetRegisterInfo::isVirtualRegister(Addr0) ||
!TargetRegisterInfo::isVirtualRegister(Addr1))
return false;
// This assumes SSA form.
MachineInstr *Def0 = MRI->getVRegDef(Addr0);
MachineInstr *Def1 = MRI->getVRegDef(Addr1);
// Check if the loaded value, e.g. a constantpool of a global address, are
// the same.
if (!produceSameValue(Def0, Def1, MRI))
return false;
}
for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
// %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
const MachineOperand &MO0 = MI0->getOperand(i);
const MachineOperand &MO1 = MI1->getOperand(i);
if (!MO0.isIdenticalTo(MO1))
return false;
}
return true;
}
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
/// determine if two loads are loading from the same base address. It should
/// only return true if the base pointers are the same and the only differences
/// between the two addresses is the offset. It also returns the offsets by
/// reference.
///
/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
/// is permanently disabled.
bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
int64_t &Offset1,
int64_t &Offset2) const {
// Don't worry about Thumb: just ARM and Thumb2.
if (Subtarget.isThumb1Only()) return false;
if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
return false;
switch (Load1->getMachineOpcode()) {
default:
return false;
case ARM::LDRi12:
case ARM::LDRBi12:
case ARM::LDRD:
case ARM::LDRH:
case ARM::LDRSB:
case ARM::LDRSH:
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
case ARM::t2LDRBi8:
case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
switch (Load2->getMachineOpcode()) {
default:
return false;
case ARM::LDRi12:
case ARM::LDRBi12:
case ARM::LDRD:
case ARM::LDRH:
case ARM::LDRSB:
case ARM::LDRSH:
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
case ARM::t2LDRBi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
// Check if base addresses and chain operands match.
if (Load1->getOperand(0) != Load2->getOperand(0) ||
Load1->getOperand(4) != Load2->getOperand(4))
return false;
// Index should be Reg0.
if (Load1->getOperand(3) != Load2->getOperand(3))
return false;
// Determine the offsets.
if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
isa<ConstantSDNode>(Load2->getOperand(1))) {
Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
return true;
}
return false;
}
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
/// be scheduled togther. On some targets if two loads are loading from
/// addresses in the same cache line, it's better if they are scheduled
/// together. This function takes two integers that represent the load offsets
/// from the common base address. It returns true if it decides it's desirable
/// to schedule the two loads together. "NumLoads" is the number of loads that
/// have already been scheduled after Load1.
///
/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
/// is permanently disabled.
bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const {
// Don't worry about Thumb: just ARM and Thumb2.
if (Subtarget.isThumb1Only()) return false;
assert(Offset2 > Offset1);
if ((Offset2 - Offset1) / 8 > 64)
return false;
// Check if the machine opcodes are different. If they are different
// then we consider them to not be of the same base address,
// EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
// In this case, they are considered to be the same because they are different
// encoding forms of the same basic instruction.
if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
!((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
(Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
Load2->getMachineOpcode() == ARM::t2LDRBi8)))
return false; // FIXME: overly conservative?
// Four loads in a row should be sufficient.
if (NumLoads >= 3)
return false;
return true;
}
bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const {
// Debug info is never a scheduling boundary. It's necessary to be explicit
// due to the special treatment of IT instructions below, otherwise a
// dbg_value followed by an IT will result in the IT instruction being
// considered a scheduling hazard, which is wrong. It should be the actual
// instruction preceding the dbg_value instruction(s), just like it is
// when debug info is not present.
if (MI->isDebugValue())
return false;
// Terminators and labels can't be scheduled around.
if (MI->isTerminator() || MI->isLabel())
return true;
// Treat the start of the IT block as a scheduling boundary, but schedule
// t2IT along with all instructions following it.
// FIXME: This is a big hammer. But the alternative is to add all potential
// true and anti dependencies to IT block instructions as implicit operands
// to the t2IT instruction. The added compile time and complexity does not
// seem worth it.
MachineBasicBlock::const_iterator I = MI;
// Make sure to skip any dbg_value instructions
while (++I != MBB->end() && I->isDebugValue())
;
if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
return true;
// Don't attempt to schedule around any instruction that defines
// a stack-oriented pointer, as it's unlikely to be profitable. This
// saves compile time, because it doesn't require every single
// stack slot reference to depend on the instruction that does the
// modification.
// Calls don't actually change the stack pointer, even if they have imp-defs.
// No ARM calling conventions change the stack pointer. (X86 calling
// conventions sometimes do).
if (!MI->isCall() && MI->definesRegister(ARM::SP))
return true;
return false;
}
bool ARMBaseInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
const BranchProbability &Probability) const {
if (!NumCycles)
return false;
// Attempt to estimate the relative costs of predication versus branching.
unsigned UnpredCost = Probability.getNumerator() * NumCycles;
UnpredCost /= Probability.getDenominator();
UnpredCost += 1; // The branch itself
UnpredCost += Subtarget.getMispredictionPenalty() / 10;
return (NumCycles + ExtraPredCycles) <= UnpredCost;
}
bool ARMBaseInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned TCycles, unsigned TExtra,
MachineBasicBlock &FMBB,
unsigned FCycles, unsigned FExtra,
const BranchProbability &Probability) const {
if (!TCycles || !FCycles)
return false;
// Attempt to estimate the relative costs of predication versus branching.
unsigned TUnpredCost = Probability.getNumerator() * TCycles;
TUnpredCost /= Probability.getDenominator();
uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
unsigned FUnpredCost = Comp * FCycles;
FUnpredCost /= Probability.getDenominator();
unsigned UnpredCost = TUnpredCost + FUnpredCost;
UnpredCost += 1; // The branch itself
UnpredCost += Subtarget.getMispredictionPenalty() / 10;
return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
}
bool
ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const {
// Reduce false anti-dependencies to let Swift's out-of-order execution
// engine do its thing.
return Subtarget.isSwift();
}
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
ARMCC::CondCodes
llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
int PIdx = MI->findFirstPredOperandIdx();
if (PIdx == -1) {
PredReg = 0;
return ARMCC::AL;
}
PredReg = MI->getOperand(PIdx+1).getReg();
return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
}
int llvm::getMatchingCondBranchOpcode(int Opc) {
if (Opc == ARM::B)
return ARM::Bcc;
if (Opc == ARM::tB)
return ARM::tBcc;
if (Opc == ARM::t2B)
return ARM::t2Bcc;
llvm_unreachable("Unknown unconditional branch opcode!");
}
/// commuteInstruction - Handle commutable instructions.
MachineInstr *
ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
switch (MI->getOpcode()) {
case ARM::MOVCCr:
case ARM::t2MOVCCr: {
// MOVCC can be commuted by inverting the condition.
unsigned PredReg = 0;
ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
return NULL;
MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
if (!MI)
return NULL;
// After swapping the MOVCC operands, also invert the condition.
MI->getOperand(MI->findFirstPredOperandIdx())
.setImm(ARMCC::getOppositeCondition(CC));
return MI;
}
}
return TargetInstrInfo::commuteInstruction(MI, NewMI);
}
/// Identify instructions that can be folded into a MOVCC instruction, and
/// return the defining instruction.
static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return 0;
if (!MRI.hasOneNonDBGUse(Reg))
return 0;
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return 0;
// MI is folded into the MOVCC by predicating it.
if (!MI->isPredicable())
return 0;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
// Reject frame index operands, PEI can't handle the predicated pseudos.
if (MO.isFI() || MO.isCPI() || MO.isJTI())
return 0;
if (!MO.isReg())
continue;
// MI can't have any tied operands, that would conflict with predication.
if (MO.isTied())
return 0;
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
return 0;
if (MO.isDef() && !MO.isDead())
return 0;
}
bool DontMoveAcrossStores = true;
if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
return 0;
return MI;
}
bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
SmallVectorImpl<MachineOperand> &Cond,
unsigned &TrueOp, unsigned &FalseOp,
bool &Optimizable) const {
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
// MOVCC operands:
// 0: Def.
// 1: True use.
// 2: False use.
// 3: Condition code.
// 4: CPSR use.
TrueOp = 1;
FalseOp = 2;
Cond.push_back(MI->getOperand(3));
Cond.push_back(MI->getOperand(4));
// We can always fold a def.
Optimizable = true;
return false;
}
MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
bool PreferFalse) const {
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
bool Invert = !DefMI;
if (!DefMI)
DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
if (!DefMI)
return 0;
// Find new register class to use.
MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
unsigned DestReg = MI->getOperand(0).getReg();
const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
if (!MRI.constrainRegClass(DestReg, PreviousClass))
return 0;
// Create a new predicated version of DefMI.
// Rfalse is the first use.
MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
DefMI->getDesc(), DestReg);
// Copy all the DefMI operands, excluding its (null) predicate.
const MCInstrDesc &DefDesc = DefMI->getDesc();
for (unsigned i = 1, e = DefDesc.getNumOperands();
i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
NewMI.addOperand(DefMI->getOperand(i));
unsigned CondCode = MI->getOperand(3).getImm();
if (Invert)
NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
else
NewMI.addImm(CondCode);
NewMI.addOperand(MI->getOperand(4));
// DefMI is not the -S version that sets CPSR, so add an optional %noreg.
if (NewMI->hasOptionalDef())
AddDefaultCC(NewMI);
// The output register value when the predicate is false is an implicit
// register operand tied to the first def.
// The tie makes the register allocator ensure the FalseReg is allocated the
// same register as operand 0.
FalseReg.setImplicit();
NewMI.addOperand(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
// The caller will erase MI, but not DefMI.
DefMI->eraseFromParent();
return NewMI;
}
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
/// instruction is encoded with an 'S' bit is determined by the optional CPSR
/// def operand.
///
/// This will go away once we can teach tblgen how to set the optional CPSR def
/// operand itself.
struct AddSubFlagsOpcodePair {
uint16_t PseudoOpc;
uint16_t MachineOpc;
};
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::ADDSri, ARM::ADDri},
{ARM::ADDSrr, ARM::ADDrr},
{ARM::ADDSrsi, ARM::ADDrsi},
{ARM::ADDSrsr, ARM::ADDrsr},
{ARM::SUBSri, ARM::SUBri},
{ARM::SUBSrr, ARM::SUBrr},
{ARM::SUBSrsi, ARM::SUBrsi},
{ARM::SUBSrsr, ARM::SUBrsr},
{ARM::RSBSri, ARM::RSBri},
{ARM::RSBSrsi, ARM::RSBrsi},
{ARM::RSBSrsr, ARM::RSBrsr},
{ARM::t2ADDSri, ARM::t2ADDri},
{ARM::t2ADDSrr, ARM::t2ADDrr},
{ARM::t2ADDSrs, ARM::t2ADDrs},
{ARM::t2SUBSri, ARM::t2SUBri},
{ARM::t2SUBSrr, ARM::t2SUBrr},
{ARM::t2SUBSrs, ARM::t2SUBrs},
{ARM::t2RSBSri, ARM::t2RSBri},
{ARM::t2RSBSrs, ARM::t2RSBrs},
};
unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
return AddSubFlagsOpcodeMap[i].MachineOpc;
return 0;
}
void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags) {
if (NumBytes == 0 && DestReg != BaseReg) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
.addReg(BaseReg, RegState::Kill)
.addImm((unsigned)Pred).addReg(PredReg).addReg(0)
.setMIFlags(MIFlags);
return;
}
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
while (NumBytes) {
unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
assert(ThisVal && "Didn't extract field correctly");
// We will handle these bits from offset, clear them.
NumBytes &= ~ThisVal;
assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
// Build the new ADD / SUB.
unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
.addReg(BaseReg, RegState::Kill).addImm(ThisVal)
.addImm((unsigned)Pred).addReg(PredReg).addReg(0)
.setMIFlags(MIFlags);
BaseReg = DestReg;
}
}
bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF,
MachineInstr *MI,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
// instead. We can't modify those so make sure we're dealing with an
// instruction we understand.
bool IsPop = isPopOpcode(MI->getOpcode());
bool IsPush = isPushOpcode(MI->getOpcode());
if (!IsPush && !IsPop)
return false;
bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
MI->getOpcode() == ARM::VLDMDIA_UPD;
bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
MI->getOpcode() == ARM::tPOP ||
MI->getOpcode() == ARM::tPOP_RET;
assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
MI->getOperand(1).getReg() == ARM::SP)) &&
"trying to fold sp update into non-sp-updating push/pop");
// The VFP push & pop act on D-registers, so we can only fold an adjustment
// by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
// if this is violated.
if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
return false;
// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
// pred) so the list starts at 4. Thumb1 starts after the predicate.
int RegListIdx = IsT1PushPop ? 2 : 4;
// Calculate the space we'll need in terms of registers.
unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
unsigned RD0Reg, RegsNeeded;
if (IsVFPPushPop) {
RD0Reg = ARM::D0;
RegsNeeded = NumBytes / 8;
} else {
RD0Reg = ARM::R0;
RegsNeeded = NumBytes / 4;
}
// We're going to have to strip all list operands off before
// re-adding them since the order matters, so save the existing ones
// for later.
SmallVector<MachineOperand, 4> RegList;
for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
RegList.push_back(MI->getOperand(i));
MachineBasicBlock *MBB = MI->getParent();
const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
// Now try to find enough space in the reglist to allocate NumBytes.
for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
--CurReg) {
if (!IsPop) {
// Pushing any register is completely harmless, mark the
// register involved as undef since we don't care about it in
// the slightest.
RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
false, false, true));
--RegsNeeded;
continue;
}
// However, we can only pop an extra register if it's not live. For
// registers live within the function we might clobber a return value
// register; the other way a register can be live here is if it's
// callee-saved.
if (isCalleeSavedRegister(CurReg, CSRegs) ||
MBB->computeRegisterLiveness(TRI, CurReg, MI) !=
MachineBasicBlock::LQR_Dead) {
// VFP pops don't allow holes in the register list, so any skip is fatal
// for our transformation. GPR pops do, so we should just keep looking.
if (IsVFPPushPop)
return false;
else
continue;
}
// Mark the unimportant registers as <def,dead> in the POP.
RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
true));
--RegsNeeded;
}
if (RegsNeeded > 0)
return false;
// Finally we know we can profitably perform the optimisation so go
// ahead: strip all existing registers off and add them back again
// in the right order.
for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
MI->RemoveOperand(i);
// Add the complete list back in.
MachineInstrBuilder MIB(MF, &*MI);
for (int i = RegList.size() - 1; i >= 0; --i)
MIB.addOperand(RegList[i]);
return true;
}
bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARMBaseInstrInfo &TII) {
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
bool isSub = false;
// Memory operands in inline assembly always use AddrMode2.
if (Opcode == ARM::INLINEASM)
AddrMode = ARMII::AddrMode2;
if (Opcode == ARM::ADDri) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
if (Offset == 0) {
// Turn it into a move.
MI.setDesc(TII.get(ARM::MOVr));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.RemoveOperand(FrameRegIdx+1);
Offset = 0;
return true;
} else if (Offset < 0) {
Offset = -Offset;
isSub = true;
MI.setDesc(TII.get(ARM::SUBri));
}
// Common case: small offset, fits into instruction.
if (ARM_AM::getSOImmVal(Offset) != -1) {
// Replace the FrameIndex with sp / fp
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
Offset = 0;
return true;
}
// Otherwise, pull as much of the immedidate into this ADDri/SUBri
// as possible.
unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
// We will handle these bits from offset, clear them.
Offset &= ~ThisImmVal;
// Get the properly encoded SOImmVal field.
assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
"Bit extraction didn't work?");
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
} else {
unsigned ImmIdx = 0;
int InstrOffs = 0;
unsigned NumBits = 0;
unsigned Scale = 1;
switch (AddrMode) {
case ARMII::AddrMode_i12: {
ImmIdx = FrameRegIdx + 1;
InstrOffs = MI.getOperand(ImmIdx).getImm();
NumBits = 12;
break;
}
case ARMII::AddrMode2: {
ImmIdx = FrameRegIdx+2;
InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs *= -1;
NumBits = 12;
break;
}
case ARMII::AddrMode3: {
ImmIdx = FrameRegIdx+2;
InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs *= -1;
NumBits = 8;
break;
}
case ARMII::AddrMode4:
case ARMII::AddrMode6:
// Can't fold any offset even if it's zero.
return false;
case ARMII::AddrMode5: {
ImmIdx = FrameRegIdx+1;
InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs *= -1;
NumBits = 8;
Scale = 4;
break;
}
default:
llvm_unreachable("Unsupported addressing mode!");
}
Offset += InstrOffs * Scale;
assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
if (Offset < 0) {
Offset = -Offset;
isSub = true;
}
// Attempt to fold address comp. if opcode has offset bits
if (NumBits > 0) {
// Common case: small offset, fits into instruction.
MachineOperand &ImmOp = MI.getOperand(ImmIdx);
int ImmedOffset = Offset / Scale;
unsigned Mask = (1 << NumBits) - 1;
if ((unsigned)Offset <= Mask * Scale) {
// Replace the FrameIndex with sp
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// FIXME: When addrmode2 goes away, this will simplify (like the
// T2 version), as the LDR.i12 versions don't need the encoding
// tricks for the offset value.
if (isSub) {
if (AddrMode == ARMII::AddrMode_i12)
ImmedOffset = -ImmedOffset;
else
ImmedOffset |= 1 << NumBits;
}
ImmOp.ChangeToImmediate(ImmedOffset);
Offset = 0;
return true;
}
// Otherwise, it didn't fit. Pull in what we can to simplify the immed.
ImmedOffset = ImmedOffset & Mask;
if (isSub) {
if (AddrMode == ARMII::AddrMode_i12)
ImmedOffset = -ImmedOffset;
else
ImmedOffset |= 1 << NumBits;
}
ImmOp.ChangeToImmediate(ImmedOffset);
Offset &= ~(Mask*Scale);
}
}
Offset = (isSub) ? -Offset : Offset;
return Offset == 0;
}
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool ARMBaseInstrInfo::
analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
int &CmpMask, int &CmpValue) const {
switch (MI->getOpcode()) {
default: break;
case ARM::CMPri:
case ARM::t2CMPri:
SrcReg = MI->getOperand(0).getReg();
SrcReg2 = 0;
CmpMask = ~0;
CmpValue = MI->getOperand(1).getImm();
return true;
case ARM::CMPrr:
case ARM::t2CMPrr:
SrcReg = MI->getOperand(0).getReg();
SrcReg2 = MI->getOperand(1).getReg();
CmpMask = ~0;
CmpValue = 0;
return true;
case ARM::TSTri:
case ARM::t2TSTri:
SrcReg = MI->getOperand(0).getReg();
SrcReg2 = 0;
CmpMask = MI->getOperand(1).getImm();
CmpValue = 0;
return true;
}
return false;
}
/// isSuitableForMask - Identify a suitable 'and' instruction that
/// operates on the given source register and applies the same mask
/// as a 'tst' instruction. Provide a limited look-through for copies.
/// When successful, MI will hold the found instruction.
static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
int CmpMask, bool CommonUse) {
switch (MI->getOpcode()) {
case ARM::ANDri:
case ARM::t2ANDri:
if (CmpMask != MI->getOperand(2).getImm())
return false;
if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
return true;
break;
case ARM::COPY: {
// Walk down one instruction which is potentially an 'and'.
const MachineInstr &Copy = *MI;
MachineBasicBlock::iterator AND(
llvm::next(MachineBasicBlock::iterator(MI)));
if (AND == MI->getParent()->end()) return false;
MI = AND;
return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
CmpMask, true);
}
}
return false;
}
/// getSwappedCondition - assume the flags are set by MI(a,b), return
/// the condition code if we modify the instructions such that flags are
/// set by MI(b,a).
inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
switch (CC) {
default: return ARMCC::AL;
case ARMCC::EQ: return ARMCC::EQ;
case ARMCC::NE: return ARMCC::NE;
case ARMCC::HS: return ARMCC::LS;
case ARMCC::LO: return ARMCC::HI;
case ARMCC::HI: return ARMCC::LO;
case ARMCC::LS: return ARMCC::HS;
case ARMCC::GE: return ARMCC::LE;
case ARMCC::LT: return ARMCC::GT;
case ARMCC::GT: return ARMCC::LT;
case ARMCC::LE: return ARMCC::GE;
}
}
/// isRedundantFlagInstr - check whether the first instruction, whose only
/// purpose is to update flags, can be made redundant.
/// CMPrr can be made redundant by SUBrr if the operands are the same.
/// CMPri can be made redundant by SUBri if the operands are the same.
/// This function can be extended later on.
inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
unsigned SrcReg2, int ImmValue,
MachineInstr *OI) {
if ((CmpI->getOpcode() == ARM::CMPrr ||
CmpI->getOpcode() == ARM::t2CMPrr) &&
(OI->getOpcode() == ARM::SUBrr ||
OI->getOpcode() == ARM::t2SUBrr) &&
((OI->getOperand(1).getReg() == SrcReg &&
OI->getOperand(2).getReg() == SrcReg2) ||
(OI->getOperand(1).getReg() == SrcReg2 &&
OI->getOperand(2).getReg() == SrcReg)))
return true;
if ((CmpI->getOpcode() == ARM::CMPri ||
CmpI->getOpcode() == ARM::t2CMPri) &&
(OI->getOpcode() == ARM::SUBri ||
OI->getOpcode() == ARM::t2SUBri) &&
OI->getOperand(1).getReg() == SrcReg &&
OI->getOperand(2).getImm() == ImmValue)
return true;
return false;
}
/// optimizeCompareInstr - Convert the instruction supplying the argument to the
/// comparison into one that sets the zero bit in the flags register;
/// Remove a redundant Compare instruction if an earlier instruction can set the
/// flags in the same way as Compare.
/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
/// condition code of instructions which use the flags.
bool ARMBaseInstrInfo::
optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const {
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI) return false;
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
MI = 0;
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
UE = MRI->use_end(); UI != UE; ++UI) {
if (UI->getParent() != CmpInstr->getParent()) continue;
MachineInstr *PotentialAND = &*UI;
if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
isPredicated(PotentialAND))
continue;
MI = PotentialAND;
break;
}
if (!MI) return false;
}
}
// Get ready to iterate backward from CmpInstr.
MachineBasicBlock::iterator I = CmpInstr, E = MI,
B = CmpInstr->getParent()->begin();
// Early exit if CmpInstr is at the beginning of the BB.
if (I == B) return false;
// There are two possible candidates which can be changed to set CPSR:
// One is MI, the other is a SUB instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
// For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
MachineInstr *Sub = NULL;
if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
MI = NULL;
else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
// For CMPri, we need to check Sub, thus we can't return here.
if (CmpInstr->getOpcode() == ARM::CMPri ||
CmpInstr->getOpcode() == ARM::t2CMPri)
MI = NULL;
else
return false;
}
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for Sub.
const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
for (; I != E; --I) {
const MachineInstr &Instr = *I;
if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
Instr.readsRegister(ARM::CPSR, TRI))
// This instruction modifies or uses CPSR after the one we want to
// change. We can't do this transformation.
return false;
// Check whether CmpInstr can be made redundant by the current instruction.
if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
Sub = &*I;
break;
}
if (I == B)
// The 'and' is below the comparison instruction.
return false;
}
// Return false if no candidates exist.
if (!MI && !Sub)
return false;
// The single candidate is called MI.
if (!MI) MI = Sub;
// We can't use a predicated instruction - it doesn't always write the flags.
if (isPredicated(MI))
return false;
switch (MI->getOpcode()) {
default: break;
case ARM::RSBrr:
case ARM::RSBri:
case ARM::RSCrr:
case ARM::RSCri:
case ARM::ADDrr:
case ARM::ADDri:
case ARM::ADCrr:
case ARM::ADCri:
case ARM::SUBrr:
case ARM::SUBri:
case ARM::SBCrr:
case ARM::SBCri:
case ARM::t2RSBri:
case ARM::t2ADDrr:
case ARM::t2ADDri:
case ARM::t2ADCrr:
case ARM::t2ADCri:
case ARM::t2SUBrr:
case ARM::t2SUBri:
case ARM::t2SBCrr:
case ARM::t2SBCri:
case ARM::ANDrr:
case ARM::ANDri:
case ARM::t2ANDrr:
case ARM::t2ANDri:
case ARM::ORRrr:
case ARM::ORRri:
case ARM::t2ORRrr:
case ARM::t2ORRri:
case ARM::EORrr:
case ARM::EORri:
case ARM::t2EORrr:
case ARM::t2EORri: {
// Scan forward for the use of CPSR
// When checking against MI: if it's a conditional code requires
// checking of V bit, then this is not safe to do.
// It is safe to remove CmpInstr if CPSR is redefined or killed.
// If we are done with the basic block, we need to check whether CPSR is
// live-out.
SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
OperandsToUpdate;
bool isSafe = false;
I = CmpInstr;
E = CmpInstr->getParent()->end();
while (!isSafe && ++I != E) {
const MachineInstr &Instr = *I;
for (unsigned IO = 0, EO = Instr.getNumOperands();
!isSafe && IO != EO; ++IO) {
const MachineOperand &MO = Instr.getOperand(IO);
if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
isSafe = true;
break;
}
if (!MO.isReg() || MO.getReg() != ARM::CPSR)
continue;
if (MO.isDef()) {
isSafe = true;
break;
}
// Condition code is after the operand before CPSR except for VSELs.
ARMCC::CondCodes CC;
bool IsInstrVSel = true;
switch (Instr.getOpcode()) {
default:
IsInstrVSel = false;
CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
break;
case ARM::VSELEQD:
case ARM::VSELEQS:
CC = ARMCC::EQ;
break;
case ARM::VSELGTD:
case ARM::VSELGTS:
CC = ARMCC::GT;
break;
case ARM::VSELGED:
case ARM::VSELGES:
CC = ARMCC::GE;
break;
case ARM::VSELVSS:
case ARM::VSELVSD:
CC = ARMCC::VS;
break;
}
if (Sub) {
ARMCC::CondCodes NewCC = getSwappedCondition(CC);
if (NewCC == ARMCC::AL)
return false;
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
// on CMP needs to be updated to be based on SUB.
// Push the condition code operands to OperandsToUpdate.
// If it is safe to remove CmpInstr, the condition code of these
// operands will be modified.
if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
Sub->getOperand(2).getReg() == SrcReg) {
// VSel doesn't support condition code update.
if (IsInstrVSel)
return false;
OperandsToUpdate.push_back(
std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
}
} else
switch (CC) {
default:
// CPSR can be used multiple times, we should continue.
break;
case ARMCC::VS:
case ARMCC::VC:
case ARMCC::GE:
case ARMCC::LT:
case ARMCC::GT:
case ARMCC::LE:
return false;
}
}
}
// If CPSR is not killed nor re-defined, we should check whether it is
// live-out. If it is live-out, do not optimize.
if (!isSafe) {
MachineBasicBlock *MBB = CmpInstr->getParent();
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI)
if ((*SI)->isLiveIn(ARM::CPSR))
return false;
}
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
MI->getOperand(5).setIsDef(true);
assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
CmpInstr->eraseFromParent();
// Modify the condition code of operands in OperandsToUpdate.
// Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
// be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
return true;
}
}
return false;
}
bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
MachineInstr *DefMI, unsigned Reg,
MachineRegisterInfo *MRI) const {
// Fold large immediates into add, sub, or, xor.
unsigned DefOpc = DefMI->getOpcode();
if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
return false;
if (!DefMI->getOperand(1).isImm())
// Could be t2MOVi32imm <ga:xx>
return false;
if (!MRI->hasOneNonDBGUse(Reg))
return false;
const MCInstrDesc &DefMCID = DefMI->getDesc();
if (DefMCID.hasOptionalDef()) {
unsigned NumOps = DefMCID.getNumOperands();
const MachineOperand &MO = DefMI->getOperand(NumOps-1);
if (MO.getReg() == ARM::CPSR && !MO.isDead())
// If DefMI defines CPSR and it is not dead, it's obviously not safe
// to delete DefMI.
return false;
}
const MCInstrDesc &UseMCID = UseMI->getDesc();
if (UseMCID.hasOptionalDef()) {
unsigned NumOps = UseMCID.getNumOperands();
if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
// If the instruction sets the flag, do not attempt this optimization
// since it may change the semantics of the code.
return false;
}
unsigned UseOpc = UseMI->getOpcode();
unsigned NewUseOpc = 0;
uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
bool Commute = false;
switch (UseOpc) {
default: return false;
case ARM::SUBrr:
case ARM::ADDrr:
case ARM::ORRrr:
case ARM::EORrr:
case ARM::t2SUBrr:
case ARM::t2ADDrr:
case ARM::t2ORRrr:
case ARM::t2EORrr: {
Commute = UseMI->getOperand(2).getReg() != Reg;
switch (UseOpc) {
default: break;
case ARM::SUBrr: {
if (Commute)
return false;
ImmVal = -ImmVal;
NewUseOpc = ARM::SUBri;
// Fallthrough
}
case ARM::ADDrr:
case ARM::ORRrr:
case ARM::EORrr: {
if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
return false;
SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
switch (UseOpc) {
default: break;
case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
case ARM::EORrr: NewUseOpc = ARM::EORri; break;
}
break;
}
case ARM::t2SUBrr: {
if (Commute)
return false;
ImmVal = -ImmVal;
NewUseOpc = ARM::t2SUBri;
// Fallthrough
}
case ARM::t2ADDrr:
case ARM::t2ORRrr:
case ARM::t2EORrr: {
if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
return false;
SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
switch (UseOpc) {
default: break;
case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
}
break;
}
}
}
}
unsigned OpIdx = Commute ? 2 : 1;
unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
bool isKill = UseMI->getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
UseMI, UseMI->getDebugLoc(),
get(NewUseOpc), NewReg)
.addReg(Reg1, getKillRegState(isKill))
.addImm(SOImmValV1)));
UseMI->setDesc(get(NewUseOpc));
UseMI->getOperand(1).setReg(NewReg);
UseMI->getOperand(1).setIsKill();
UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
DefMI->eraseFromParent();
return true;
}
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
const MachineInstr *MI) {
switch (MI->getOpcode()) {
default: {
const MCInstrDesc &Desc = MI->getDesc();
int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
assert(UOps >= 0 && "bad # UOps");
return UOps;
}
case ARM::LDRrs:
case ARM::LDRBrs:
case ARM::STRrs:
case ARM::STRBrs: {
unsigned ShOpVal = MI->getOperand(3).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
(ShImm == 0 ||
((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
return 1;
return 2;
}
case ARM::LDRH:
case ARM::STRH: {
if (!MI->getOperand(2).getReg())
return 1;
unsigned ShOpVal = MI->getOperand(3).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
(ShImm == 0 ||
((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
return 1;
return 2;
}
case ARM::LDRSB:
case ARM::LDRSH:
return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
case ARM::LDRSB_POST:
case ARM::LDRSH_POST: {
unsigned Rt = MI->getOperand(0).getReg();
unsigned Rm = MI->getOperand(3).getReg();
return (Rt == Rm) ? 4 : 3;
}
case ARM::LDR_PRE_REG:
case ARM::LDRB_PRE_REG: {
unsigned Rt = MI->getOperand(0).getReg();
unsigned Rm = MI->getOperand(3).getReg();
if (Rt == Rm)
return 3;
unsigned ShOpVal = MI->getOperand(4).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
(ShImm == 0 ||
((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
return 2;
return 3;
}
case ARM::STR_PRE_REG:
case ARM::STRB_PRE_REG: {
unsigned ShOpVal = MI->getOperand(4).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
(ShImm == 0 ||
((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
return 2;
return 3;
}
case ARM::LDRH_PRE:
case ARM::STRH_PRE: {
unsigned Rt = MI->getOperand(0).getReg();
unsigned Rm = MI->getOperand(3).getReg();
if (!Rm)
return 2;
if (Rt == Rm)
return 3;
return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
? 3 : 2;
}
case ARM::LDR_POST_REG:
case ARM::LDRB_POST_REG:
case ARM::LDRH_POST: {
unsigned Rt = MI->getOperand(0).getReg();
unsigned Rm = MI->getOperand(3).getReg();
return (Rt == Rm) ? 3 : 2;
}
case ARM::LDR_PRE_IMM:
case ARM::LDRB_PRE_IMM:
case ARM::LDR_POST_IMM:
case ARM::LDRB_POST_IMM:
case ARM::STRB_POST_IMM:
case ARM::STRB_POST_REG:
case ARM::STRB_PRE_IMM:
case ARM::STRH_POST:
case ARM::STR_POST_IMM:
case ARM::STR_POST_REG:
case ARM::STR_PRE_IMM:
return 2;
case ARM::LDRSB_PRE:
case ARM::LDRSH_PRE: {
unsigned Rm = MI->getOperand(3).getReg();
if (Rm == 0)
return 3;
unsigned Rt = MI->getOperand(0).getReg();
if (Rt == Rm)
return 4;
unsigned ShOpVal = MI->getOperand(4).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
(ShImm == 0 ||
((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
return 3;
return 4;
}
case ARM::LDRD: {
unsigned Rt = MI->getOperand(0).getReg();
unsigned Rn = MI->getOperand(2).getReg();
unsigned Rm = MI->getOperand(3).getReg();
if (Rm)
return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
return (Rt == Rn) ? 3 : 2;
}
case ARM::STRD: {
unsigned Rm = MI->getOperand(3).getReg();
if (Rm)
return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
return 2;
}
case ARM::LDRD_POST:
case ARM::t2LDRD_POST:
return 3;
case ARM::STRD_POST:
case ARM::t2STRD_POST:
return 4;
case ARM::LDRD_PRE: {
unsigned Rt = MI->getOperand(0).getReg();
unsigned Rn = MI->getOperand(3).getReg();
unsigned Rm = MI->getOperand(4).getReg();
if (Rm)
return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
return (Rt == Rn) ? 4 : 3;
}
case ARM::t2LDRD_PRE: {
unsigned Rt = MI->getOperand(0).getReg();
unsigned Rn = MI->getOperand(3).getReg();
return (Rt == Rn) ? 4 : 3;
}
case ARM::STRD_PRE: {
unsigned Rm = MI->getOperand(4).getReg();
if (Rm)
return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
return 3;
}
case ARM::t2STRD_PRE:
return 3;
case ARM::t2LDR_POST:
case ARM::t2LDRB_POST:
case ARM::t2LDRB_PRE:
case ARM::t2LDRSBi12:
case ARM::t2LDRSBi8:
case ARM::t2LDRSBpci:
case ARM::t2LDRSBs:
case ARM::t2LDRH_POST:
case ARM::t2LDRH_PRE:
case ARM::t2LDRSBT:
case ARM::t2LDRSB_POST:
case ARM::t2LDRSB_PRE:
case ARM::t2LDRSH_POST:
case ARM::t2LDRSH_PRE:
case ARM::t2LDRSHi12:
case ARM::t2LDRSHi8:
case ARM::t2LDRSHpci:
case ARM::t2LDRSHs:
return 2;
case ARM::t2LDRDi8: {
unsigned Rt = MI->getOperand(0).getReg();
unsigned Rn = MI->getOperand(2).getReg();
return (Rt == Rn) ? 3 : 2;
}
case ARM::t2STRB_POST:
case ARM::t2STRB_PRE:
case ARM::t2STRBs:
case ARM::t2STRDi8:
case ARM::t2STRH_POST:
case ARM::t2STRH_PRE:
case ARM::t2STRHs:
case ARM::t2STR_POST:
case ARM::t2STR_PRE:
case ARM::t2STRs:
return 2;
}
}
// Return the number of 32-bit words loaded by LDM or stored by STM. If this
// can't be easily determined return 0 (missing MachineMemOperand).
//
// FIXME: The current MachineInstr design does not support relying on machine
// mem operands to determine the width of a memory access. Instead, we expect
// the target to provide this information based on the instruction opcode and
// operands. However, using MachineMemOperand is a the best solution now for
// two reasons:
//
// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
// operands. This is much more dangerous than using the MachineMemOperand
// sizes because CodeGen passes can insert/remove optional machine operands. In
// fact, it's totally incorrect for preRA passes and appears to be wrong for
// postRA passes as well.
//
// 2) getNumLDMAddresses is only used by the scheduling machine model and any
// machine model that calls this should handle the unknown (zero size) case.
//
// Long term, we should require a target hook that verifies MachineMemOperand
// sizes during MC lowering. That target hook should be local to MC lowering
// because we can't ensure that it is aware of other MI forms. Doing this will
// ensure that MachineMemOperands are correctly propagated through all passes.
unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
unsigned Size = 0;
for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
E = MI->memoperands_end(); I != E; ++I) {
Size += (*I)->getSize();
}
return Size / 4;
}
unsigned
ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MachineInstr *MI) const {
if (!ItinData || ItinData->isEmpty())
return 1;
const MCInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
int ItinUOps = ItinData->getNumMicroOps(Class);
if (ItinUOps >= 0) {
if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
return getNumMicroOpsSwiftLdSt(ItinData, MI);
return ItinUOps;
}
unsigned Opc = MI->getOpcode();
switch (Opc) {
default:
llvm_unreachable("Unexpected multi-uops instruction!");
case ARM::VLDMQIA:
case ARM::VSTMQIA:
return 2;
// The number of uOps for load / store multiple are determined by the number
// registers.
//
// On Cortex-A8, each pair of register loads / stores can be scheduled on the
// same cycle. The scheduling for the first load / store must be done
// separately by assuming the address is not 64-bit aligned.
//
// On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
// is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
// load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
case ARM::VLDMDIA:
case ARM::VLDMDIA_UPD:
case ARM::VLDMDDB_UPD:
case ARM::VLDMSIA:
case ARM::VLDMSIA_UPD:
case ARM::VLDMSDB_UPD:
case ARM::VSTMDIA:
case ARM::VSTMDIA_UPD:
case ARM::VSTMDDB_UPD:
case ARM::VSTMSIA:
case ARM::VSTMSIA_UPD:
case ARM::VSTMSDB_UPD: {
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
return (NumRegs / 2) + (NumRegs % 2) + 1;
}
case ARM::LDMIA_RET:
case ARM::LDMIA:
case ARM::LDMDA:
case ARM::LDMDB:
case ARM::LDMIB:
case ARM::LDMIA_UPD:
case ARM::LDMDA_UPD:
case ARM::LDMDB_UPD:
case ARM::LDMIB_UPD:
case ARM::STMIA:
case ARM::STMDA:
case ARM::STMDB:
case ARM::STMIB:
case ARM::STMIA_UPD:
case ARM::STMDA_UPD:
case ARM::STMDB_UPD:
case ARM::STMIB_UPD:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
case ARM::tSTMIA_UPD:
case ARM::tPOP_RET:
case ARM::tPOP:
case ARM::tPUSH:
case ARM::t2LDMIA_RET:
case ARM::t2LDMIA:
case ARM::t2LDMDB:
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB_UPD:
case ARM::t2STMIA:
case ARM::t2STMDB:
case ARM::t2STMIA_UPD:
case ARM::t2STMDB_UPD: {
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
if (Subtarget.isSwift()) {
int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
switch (Opc) {
default: break;
case ARM::VLDMDIA_UPD:
case ARM::VLDMDDB_UPD:
case ARM::VLDMSIA_UPD:
case ARM::VLDMSDB_UPD:
case ARM::VSTMDIA_UPD:
case ARM::VSTMDDB_UPD:
case ARM::VSTMSIA_UPD:
case ARM::VSTMSDB_UPD:
case ARM::LDMIA_UPD:
case ARM::LDMDA_UPD:
case ARM::LDMDB_UPD:
case ARM::LDMIB_UPD:
case ARM::STMIA_UPD:
case ARM::STMDA_UPD:
case ARM::STMDB_UPD:
case ARM::STMIB_UPD:
case ARM::tLDMIA_UPD:
case ARM::tSTMIA_UPD:
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB_UPD:
case ARM::t2STMIA_UPD:
case ARM::t2STMDB_UPD:
++UOps; // One for base register writeback.
break;
case ARM::LDMIA_RET:
case ARM::tPOP_RET:
case ARM::t2LDMIA_RET:
UOps += 2; // One for base reg wb, one for write to pc.
break;
}
return UOps;
} else if (Subtarget.isCortexA8()) {
if (NumRegs < 4)
return 2;
// 4 registers would be issued: 2, 2.
// 5 registers would be issued: 2, 2, 1.
int A8UOps = (NumRegs / 2);
if (NumRegs % 2)
++A8UOps;
return A8UOps;
} else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
int A9UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
if ((NumRegs % 2) ||
!MI->hasOneMemOperand() ||
(*MI->memoperands_begin())->getAlignment() < 8)
++A9UOps;
return A9UOps;
} else {
// Assume the worst.
return NumRegs;
}
}
}
}
int
ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
unsigned DefIdx, unsigned DefAlign) const {
int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
if (RegNo <= 0)
// Def is the address writeback.
return ItinData->getOperandCycle(DefClass, DefIdx);
int DefCycle;
if (Subtarget.isCortexA8()) {
// (regno / 2) + (regno % 2) + 1
DefCycle = RegNo / 2 + 1;
if (RegNo % 2)
++DefCycle;
} else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
DefCycle = RegNo;
bool isSLoad = false;
switch (DefMCID.getOpcode()) {
default: break;
case ARM::VLDMSIA:
case ARM::VLDMSIA_UPD:
case ARM::VLDMSDB_UPD:
isSLoad = true;
break;
}
// If there are odd number of 'S' registers or if it's not 64-bit aligned,
// then it takes an extra cycle.
if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
++DefCycle;
} else {
// Assume the worst.
DefCycle = RegNo + 2;
}
return DefCycle;
}
int
ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
unsigned DefIdx, unsigned DefAlign) const {
int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
if (RegNo <= 0)
// Def is the address writeback.
return ItinData->getOperandCycle(DefClass, DefIdx);
int DefCycle;
if (Subtarget.isCortexA8()) {
// 4 registers would be issued: 1, 2, 1.
// 5 registers would be issued: 1, 2, 2.
DefCycle = RegNo / 2;
if (DefCycle < 1)
DefCycle = 1;
// Result latency is issue cycle + 2: E2.
DefCycle += 2;
} else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
DefCycle = (RegNo / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
if ((RegNo % 2) || DefAlign < 8)
++DefCycle;
// Result latency is AGU cycles + 2.
DefCycle += 2;
} else {
// Assume the worst.
DefCycle = RegNo + 2;
}
return DefCycle;
}
int
ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &UseMCID,
unsigned UseClass,
unsigned UseIdx, unsigned UseAlign) const {
int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
if (RegNo <= 0)
return ItinData->getOperandCycle(UseClass, UseIdx);
int UseCycle;
if (Subtarget.isCortexA8()) {
// (regno / 2) + (regno % 2) + 1
UseCycle = RegNo / 2 + 1;
if (RegNo % 2)
++UseCycle;
} else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
UseCycle = RegNo;
bool isSStore = false;
switch (UseMCID.getOpcode()) {
default: break;
case ARM::VSTMSIA:
case ARM::VSTMSIA_UPD:
case ARM::VSTMSDB_UPD:
isSStore = true;
break;
}
// If there are odd number of 'S' registers or if it's not 64-bit aligned,
// then it takes an extra cycle.
if ((isSStore && (RegNo % 2)) || UseAlign < 8)
++UseCycle;
} else {
// Assume the worst.
UseCycle = RegNo + 2;
}
return UseCycle;
}
int
ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &UseMCID,
unsigned UseClass,
unsigned UseIdx, unsigned UseAlign) const {
int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
if (RegNo <= 0)
return ItinData->getOperandCycle(UseClass, UseIdx);
int UseCycle;
if (Subtarget.isCortexA8()) {
UseCycle = RegNo / 2;
if (UseCycle < 2)
UseCycle = 2;
// Read in E3.
UseCycle += 2;
} else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
UseCycle = (RegNo / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
if ((RegNo % 2) || UseAlign < 8)
++UseCycle;
} else {
// Assume the worst.
UseCycle = 1;
}
return UseCycle;
}
int
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefIdx, unsigned DefAlign,
const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const {
unsigned DefClass = DefMCID.getSchedClass();
unsigned UseClass = UseMCID.getSchedClass();
if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
// This may be a def / use of a variable_ops instruction, the operand
// latency might be determinable dynamically. Let the target try to
// figure it out.
int DefCycle = -1;
bool LdmBypass = false;
switch (DefMCID.getOpcode()) {
default:
DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
break;
case ARM::VLDMDIA:
case ARM::VLDMDIA_UPD:
case ARM::VLDMDDB_UPD:
case ARM::VLDMSIA:
case ARM::VLDMSIA_UPD:
case ARM::VLDMSDB_UPD:
DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
break;
case ARM::LDMIA_RET:
case ARM::LDMIA:
case ARM::LDMDA:
case ARM::LDMDB:
case ARM::LDMIB:
case ARM::LDMIA_UPD:
case ARM::LDMDA_UPD:
case ARM::LDMDB_UPD:
case ARM::LDMIB_UPD:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
case ARM::tPUSH:
case ARM::t2LDMIA_RET:
case ARM::t2LDMIA:
case ARM::t2LDMDB:
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB_UPD:
LdmBypass = 1;
DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
break;
}
if (DefCycle == -1)
// We can't seem to determine the result latency of the def, assume it's 2.
DefCycle = 2;
int UseCycle = -1;
switch (UseMCID.getOpcode()) {
default:
UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
break;
case ARM::VSTMDIA:
case ARM::VSTMDIA_UPD:
case ARM::VSTMDDB_UPD:
case ARM::VSTMSIA:
case ARM::VSTMSIA_UPD:
case ARM::VSTMSDB_UPD:
UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
break;
case ARM::STMIA:
case ARM::STMDA:
case ARM::STMDB:
case ARM::STMIB:
case ARM::STMIA_UPD:
case ARM::STMDA_UPD:
case ARM::STMDB_UPD:
case ARM::STMIB_UPD:
case ARM::tSTMIA_UPD:
case ARM::tPOP_RET:
case ARM::tPOP:
case ARM::t2STMIA:
case ARM::t2STMDB:
case ARM::t2STMIA_UPD:
case ARM::t2STMDB_UPD:
UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
break;
}
if (UseCycle == -1)
// Assume it's read in the first stage.
UseCycle = 1;
UseCycle = DefCycle - UseCycle + 1;
if (UseCycle > 0) {
if (LdmBypass) {
// It's a variable_ops instruction so we can't use DefIdx here. Just use
// first def operand.
if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
UseClass, UseIdx))
--UseCycle;
} else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
UseClass, UseIdx)) {
--UseCycle;
}
}
return UseCycle;
}
static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
const MachineInstr *MI, unsigned Reg,
unsigned &DefIdx, unsigned &Dist) {
Dist = 0;
MachineBasicBlock::const_iterator I = MI; ++I;
MachineBasicBlock::const_instr_iterator II =
llvm::prior(I.getInstrIterator());
assert(II->isInsideBundle() && "Empty bundle?");
int Idx = -1;
while (II->isInsideBundle()) {
Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
if (Idx != -1)
break;
--II;
++Dist;
}
assert(Idx != -1 && "Cannot find bundled definition!");
DefIdx = Idx;
return II;
}
static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
const MachineInstr *MI, unsigned Reg,
unsigned &UseIdx, unsigned &Dist) {
Dist = 0;
MachineBasicBlock::const_instr_iterator II = MI; ++II;
assert(II->isInsideBundle() && "Empty bundle?");
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
// FIXME: This doesn't properly handle multiple uses.
int Idx = -1;
while (II != E && II->isInsideBundle()) {
Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
if (Idx != -1)
break;
if (II->getOpcode() != ARM::t2IT)
++Dist;
++II;
}
if (Idx == -1) {
Dist = 0;
return 0;
}
UseIdx = Idx;
return II;
}
/// Return the number of cycles to add to (or subtract from) the static
/// itinerary based on the def opcode and alignment. The caller will ensure that
/// adjusted latency is at least one cycle.
static int adjustDefLatency(const ARMSubtarget &Subtarget,
const MachineInstr *DefMI,
const MCInstrDesc *DefMCID, unsigned DefAlign) {
int Adjust = 0;
if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
switch (DefMCID->getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
unsigned ShOpVal = DefMI->getOperand(3).getImm();
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
--Adjust;
break;
}
case ARM::t2LDRs:
case ARM::t2LDRBs:
case ARM::t2LDRHs:
case ARM::t2LDRSHs: {
// Thumb2 mode: lsl only.
unsigned ShAmt = DefMI->getOperand(3).getImm();
if (ShAmt == 0 || ShAmt == 2)
--Adjust;
break;
}
}
} else if (Subtarget.isSwift()) {
// FIXME: Properly handle all of the latency adjustments for address
// writeback.
switch (DefMCID->getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
unsigned ShOpVal = DefMI->getOperand(3).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
(ShImm == 0 ||
((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
Adjust -= 2;
else if (!isSub &&
ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
--Adjust;
break;
}
case ARM::t2LDRs:
case ARM::t2LDRBs:
case ARM::t2LDRHs:
case ARM::t2LDRSHs: {
// Thumb2 mode: lsl only.
unsigned ShAmt = DefMI->getOperand(3).getImm();
if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
Adjust -= 2;
break;
}
}
}
if (DefAlign < 8 && Subtarget.isLikeA9()) {
switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
case ARM::VLD1q16:
case ARM::VLD1q32:
case ARM::VLD1q64:
case ARM::VLD1q8wb_fixed:
case ARM::VLD1q16wb_fixed:
case ARM::VLD1q32wb_fixed:
case ARM::VLD1q64wb_fixed:
case ARM::VLD1q8wb_register:
case ARM::VLD1q16wb_register:
case ARM::VLD1q32wb_register:
case ARM::VLD1q64wb_register:
case ARM::VLD2d8:
case ARM::VLD2d16:
case ARM::VLD2d32:
case ARM::VLD2q8:
case ARM::VLD2q16:
case ARM::VLD2q32:
case ARM::VLD2d8wb_fixed:
case ARM::VLD2d16wb_fixed:
case ARM::VLD2d32wb_fixed:
case ARM::VLD2q8wb_fixed:
case ARM::VLD2q16wb_fixed:
case ARM::VLD2q32wb_fixed:
case ARM::VLD2d8wb_register:
case ARM::VLD2d16wb_register:
case ARM::VLD2d32wb_register:
case ARM::VLD2q8wb_register:
case ARM::VLD2q16wb_register:
case ARM::VLD2q32wb_register:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
case ARM::VLD1d64T:
case ARM::VLD3d8_UPD:
case ARM::VLD3d16_UPD:
case ARM::VLD3d32_UPD:
case ARM::VLD1d64Twb_fixed:
case ARM::VLD1d64Twb_register:
case ARM::VLD3q8_UPD:
case ARM::VLD3q16_UPD:
case ARM::VLD3q32_UPD:
case ARM::VLD4d8:
case ARM::VLD4d16:
case ARM::VLD4d32:
case ARM::VLD1d64Q:
case ARM::VLD4d8_UPD:
case ARM::VLD4d16_UPD:
case ARM::VLD4d32_UPD:
case ARM::VLD1d64Qwb_fixed:
case ARM::VLD1d64Qwb_register:
case ARM::VLD4q8_UPD:
case ARM::VLD4q16_UPD:
case ARM::VLD4q32_UPD:
case ARM::VLD1DUPq8:
case ARM::VLD1DUPq16:
case ARM::VLD1DUPq32:
case ARM::VLD1DUPq8wb_fixed:
case ARM::VLD1DUPq16wb_fixed:
case ARM::VLD1DUPq32wb_fixed:
case ARM::VLD1DUPq8wb_register:
case ARM::VLD1DUPq16wb_register:
case ARM::VLD1DUPq32wb_register:
case ARM::VLD2DUPd8:
case ARM::VLD2DUPd16:
case ARM::VLD2DUPd32:
case ARM::VLD2DUPd8wb_fixed:
case ARM::VLD2DUPd16wb_fixed:
case ARM::VLD2DUPd32wb_fixed:
case ARM::VLD2DUPd8wb_register:
case ARM::VLD2DUPd16wb_register:
case ARM::VLD2DUPd32wb_register:
case ARM::VLD4DUPd8:
case ARM::VLD4DUPd16:
case ARM::VLD4DUPd32:
case ARM::VLD4DUPd8_UPD:
case ARM::VLD4DUPd16_UPD:
case ARM::VLD4DUPd32_UPD:
case ARM::VLD1LNd8:
case ARM::VLD1LNd16:
case ARM::VLD1LNd32:
case ARM::VLD1LNd8_UPD:
case ARM::VLD1LNd16_UPD:
case ARM::VLD1LNd32_UPD:
case ARM::VLD2LNd8:
case ARM::VLD2LNd16:
case ARM::VLD2LNd32:
case ARM::VLD2LNq16:
case ARM::VLD2LNq32:
case ARM::VLD2LNd8_UPD:
case ARM::VLD2LNd16_UPD:
case ARM::VLD2LNd32_UPD:
case ARM::VLD2LNq16_UPD:
case ARM::VLD2LNq32_UPD:
case ARM::VLD4LNd8:
case ARM::VLD4LNd16:
case ARM::VLD4LNd32:
case ARM::VLD4LNq16:
case ARM::VLD4LNq32:
case ARM::VLD4LNd8_UPD:
case ARM::VLD4LNd16_UPD:
case ARM::VLD4LNd32_UPD:
case ARM::VLD4LNq16_UPD:
case ARM::VLD4LNq32_UPD:
// If the address is not 64-bit aligned, the latencies of these
// instructions increases by one.
++Adjust;
break;
}
}
return Adjust;
}
int
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI,
unsigned UseIdx) const {
// No operand latency. The caller may fall back to getInstrLatency.
if (!ItinData || ItinData->isEmpty())
return -1;
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
unsigned Reg = DefMO.getReg();
const MCInstrDesc *DefMCID = &DefMI->getDesc();
const MCInstrDesc *UseMCID = &UseMI->getDesc();
unsigned DefAdj = 0;
if (DefMI->isBundle()) {
DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
DefMCID = &DefMI->getDesc();
}
if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
DefMI->isRegSequence() || DefMI->isImplicitDef()) {
return 1;
}
unsigned UseAdj = 0;
if (UseMI->isBundle()) {
unsigned NewUseIdx;
const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
Reg, NewUseIdx, UseAdj);
if (!NewUseMI)
return -1;
UseMI = NewUseMI;
UseIdx = NewUseIdx;
UseMCID = &UseMI->getDesc();
}
if (Reg == ARM::CPSR) {
if (DefMI->getOpcode() == ARM::FMSTAT) {
// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
return Subtarget.isLikeA9() ? 1 : 20;
}
// CPSR set and branch can be paired in the same cycle.
if (UseMI->isBranch())
return 0;
// Otherwise it takes the instruction latency (generally one).
unsigned Latency = getInstrLatency(ItinData, DefMI);
// For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
// its uses. Instructions which are otherwise scheduled between them may
// incur a code size penalty (not able to use the CPSR setting 16-bit
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
if (MF->getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize))
--Latency;
}
return Latency;
}
if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
return -1;
unsigned DefAlign = DefMI->hasOneMemOperand()
? (*DefMI->memoperands_begin())->getAlignment() : 0;
unsigned UseAlign = UseMI->hasOneMemOperand()
? (*UseMI->memoperands_begin())->getAlignment() : 0;
// Get the itinerary's latency if possible, and handle variable_ops.
int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
*UseMCID, UseIdx, UseAlign);
// Unable to find operand latency. The caller may resort to getInstrLatency.
if (Latency < 0)
return Latency;
// Adjust for IT block position.
int Adj = DefAdj + UseAdj;
// Adjust for dynamic def-side opcode variants not captured by the itinerary.
Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
if (Adj >= 0 || (int)Latency > -Adj) {
return Latency + Adj;
}
// Return the itinerary latency, which may be zero but not less than zero.
return Latency;
}
int
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const {
if (!DefNode->isMachineOpcode())
return 1;
const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
if (isZeroCost(DefMCID.Opcode))
return 0;
if (!ItinData || ItinData->isEmpty())
return DefMCID.mayLoad() ? 3 : 1;
if (!UseNode->isMachineOpcode()) {
int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
if (Subtarget.isLikeA9() || Subtarget.isSwift())
return Latency <= 2 ? 1 : Latency - 1;
else
return Latency <= 3 ? 1 : Latency - 2;
}
const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
unsigned DefAlign = !DefMN->memoperands_empty()
? (*DefMN->memoperands_begin())->getAlignment() : 0;
const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
unsigned UseAlign = !UseMN->memoperands_empty()
? (*UseMN->memoperands_begin())->getAlignment() : 0;
int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
UseMCID, UseIdx, UseAlign);
if (Latency > 1 &&
(Subtarget.isCortexA8() || Subtarget.isLikeA9())) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
switch (DefMCID.getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
unsigned ShOpVal =
cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
--Latency;
break;
}
case ARM::t2LDRs:
case ARM::t2LDRBs:
case ARM::t2LDRHs:
case ARM::t2LDRSHs: {
// Thumb2 mode: lsl only.
unsigned ShAmt =
cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
if (ShAmt == 0 || ShAmt == 2)
--Latency;
break;
}
}
} else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
// FIXME: Properly handle all of the latency adjustments for address
// writeback.
switch (DefMCID.getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
unsigned ShOpVal =
cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
Latency -= 2;
else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
--Latency;
break;
}
case ARM::t2LDRs:
case ARM::t2LDRBs:
case ARM::t2LDRHs:
case ARM::t2LDRSHs: {
// Thumb2 mode: lsl 0-3 only.
Latency -= 2;
break;
}
}
}
if (DefAlign < 8 && Subtarget.isLikeA9())
switch (DefMCID.getOpcode()) {
default: break;
case ARM::VLD1q8:
case ARM::VLD1q16:
case ARM::VLD1q32:
case ARM::VLD1q64:
case ARM::VLD1q8wb_register:
case ARM::VLD1q16wb_register:
case ARM::VLD1q32wb_register:
case ARM::VLD1q64wb_register:
case ARM::VLD1q8wb_fixed:
case ARM::VLD1q16wb_fixed:
case ARM::VLD1q32wb_fixed:
case ARM::VLD1q64wb_fixed:
case ARM::VLD2d8:
case ARM::VLD2d16:
case ARM::VLD2d32:
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
case ARM::VLD2d8wb_fixed:
case ARM::VLD2d16wb_fixed:
case ARM::VLD2d32wb_fixed:
case ARM::VLD2q8PseudoWB_fixed:
case ARM::VLD2q16PseudoWB_fixed:
case ARM::VLD2q32PseudoWB_fixed:
case ARM::VLD2d8wb_register:
case ARM::VLD2d16wb_register:
case ARM::VLD2d32wb_register:
case ARM::VLD2q8PseudoWB_register:
case ARM::VLD2q16PseudoWB_register:
case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64TPseudoWB_fixed:
case ARM::VLD3d8Pseudo_UPD:
case ARM::VLD3d16Pseudo_UPD:
case ARM::VLD3d32Pseudo_UPD:
case ARM::VLD3q8Pseudo_UPD:
case ARM::VLD3q16Pseudo_UPD:
case ARM::VLD3q32Pseudo_UPD:
case ARM::VLD3q8oddPseudo:
case ARM::VLD3q16oddPseudo:
case ARM::VLD3q32oddPseudo:
case ARM::VLD3q8oddPseudo_UPD:
case ARM::VLD3q16oddPseudo_UPD:
case ARM::VLD3q32oddPseudo_UPD:
case ARM::VLD4d8Pseudo:
case ARM::VLD4d16Pseudo:
case ARM::VLD4d32Pseudo:
case ARM::VLD1d64QPseudo:
+ case ARM::VLD1d64QPseudoWB_fixed:
case ARM::VLD4d8Pseudo_UPD:
case ARM::VLD4d16Pseudo_UPD:
case ARM::VLD4d32Pseudo_UPD:
case ARM::VLD4q8Pseudo_UPD:
case ARM::VLD4q16Pseudo_UPD:
case ARM::VLD4q32Pseudo_UPD:
case ARM::VLD4q8oddPseudo:
case ARM::VLD4q16oddPseudo:
case ARM::VLD4q32oddPseudo:
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
case ARM::VLD1DUPq8:
case ARM::VLD1DUPq16:
case ARM::VLD1DUPq32:
case ARM::VLD1DUPq8wb_fixed:
case ARM::VLD1DUPq16wb_fixed:
case ARM::VLD1DUPq32wb_fixed:
case ARM::VLD1DUPq8wb_register:
case ARM::VLD1DUPq16wb_register:
case ARM::VLD1DUPq32wb_register:
case ARM::VLD2DUPd8:
case ARM::VLD2DUPd16:
case ARM::VLD2DUPd32:
case ARM::VLD2DUPd8wb_fixed:
case ARM::VLD2DUPd16wb_fixed:
case ARM::VLD2DUPd32wb_fixed:
case ARM::VLD2DUPd8wb_register:
case ARM::VLD2DUPd16wb_register:
case ARM::VLD2DUPd32wb_register:
case ARM::VLD4DUPd8Pseudo:
case ARM::VLD4DUPd16Pseudo:
case ARM::VLD4DUPd32Pseudo:
case ARM::VLD4DUPd8Pseudo_UPD:
case ARM::VLD4DUPd16Pseudo_UPD:
case ARM::VLD4DUPd32Pseudo_UPD:
case ARM::VLD1LNq8Pseudo:
case ARM::VLD1LNq16Pseudo:
case ARM::VLD1LNq32Pseudo:
case ARM::VLD1LNq8Pseudo_UPD:
case ARM::VLD1LNq16Pseudo_UPD:
case ARM::VLD1LNq32Pseudo_UPD:
case ARM::VLD2LNd8Pseudo:
case ARM::VLD2LNd16Pseudo:
case ARM::VLD2LNd32Pseudo:
case ARM::VLD2LNq16Pseudo:
case ARM::VLD2LNq32Pseudo:
case ARM::VLD2LNd8Pseudo_UPD:
case ARM::VLD2LNd16Pseudo_UPD:
case ARM::VLD2LNd32Pseudo_UPD:
case ARM::VLD2LNq16Pseudo_UPD:
case ARM::VLD2LNq32Pseudo_UPD:
case ARM::VLD4LNd8Pseudo:
case ARM::VLD4LNd16Pseudo:
case ARM::VLD4LNd32Pseudo:
case ARM::VLD4LNq16Pseudo:
case ARM::VLD4LNq32Pseudo:
case ARM::VLD4LNd8Pseudo_UPD:
case ARM::VLD4LNd16Pseudo_UPD:
case ARM::VLD4LNd32Pseudo_UPD:
case ARM::VLD4LNq16Pseudo_UPD:
case ARM::VLD4LNq32Pseudo_UPD:
// If the address is not 64-bit aligned, the latencies of these
// instructions increases by one.
++Latency;
break;
}
return Latency;
}
unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
if (MI->isCopyLike() || MI->isInsertSubreg() ||
MI->isRegSequence() || MI->isImplicitDef())
return 0;
if (MI->isBundle())
return 0;
const MCInstrDesc &MCID = MI->getDesc();
if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
return 1;
}
return 0;
}
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
if (MI->isCopyLike() || MI->isInsertSubreg() ||
MI->isRegSequence() || MI->isImplicitDef())
return 1;
// An instruction scheduler typically runs on unbundled instructions, however
// other passes may query the latency of a bundled instruction.
if (MI->isBundle()) {
unsigned Latency = 0;
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
if (I->getOpcode() != ARM::t2IT)
Latency += getInstrLatency(ItinData, I, PredCost);
}
return Latency;
}
const MCInstrDesc &MCID = MI->getDesc();
if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
*PredCost = 1;
}
// Be sure to call getStageLatency for an empty itinerary in case it has a
// valid MinLatency property.
if (!ItinData)
return MI->mayLoad() ? 3 : 1;
unsigned Class = MCID.getSchedClass();
// For instructions with variable uops, use uops as latency.
if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
return getNumMicroOps(ItinData, MI);
// For the common case, fall back on the itinerary's latency.
unsigned Latency = ItinData->getStageLatency(Class);
// Adjust for dynamic def-side opcode variants not captured by the itinerary.
unsigned DefAlign = MI->hasOneMemOperand()
? (*MI->memoperands_begin())->getAlignment() : 0;
int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
if (Adj >= 0 || (int)Latency > -Adj) {
return Latency + Adj;
}
return Latency;
}
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const {
if (!Node->isMachineOpcode())
return 1;
if (!ItinData || ItinData->isEmpty())
return 1;
unsigned Opcode = Node->getMachineOpcode();
switch (Opcode) {
default:
return ItinData->getStageLatency(get(Opcode).getSchedClass());
case ARM::VLDMQIA:
case ARM::VSTMQIA:
return 2;
}
}
bool ARMBaseInstrInfo::
hasHighOperandLatency(const InstrItineraryData *ItinData,
const MachineRegisterInfo *MRI,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI, unsigned UseIdx) const {
unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
if (Subtarget.isCortexA8() &&
(DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
// CortexA8 VFP instructions are not pipelined.
return true;
// Hoist VFP / NEON instructions with 4 or higher latency.
int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
if (Latency < 0)
Latency = getInstrLatency(ItinData, DefMI);
if (Latency <= 3)
return false;
return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
}
bool ARMBaseInstrInfo::
hasLowDefLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx) const {
if (!ItinData || ItinData->isEmpty())
return false;
unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
if (DDomain == ARMII::DomainGeneral) {
unsigned DefClass = DefMI->getDesc().getSchedClass();
int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
return (DefCycle != -1 && DefCycle <= 2);
}
return false;
}
bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const {
if (convertAddSubFlagsOpcode(MI->getOpcode())) {
ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
return false;
}
return true;
}
bool
ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
unsigned &AddSubOpc,
bool &NegAcc, bool &HasLane) const {
DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
if (I == MLxEntryMap.end())
return false;
const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
MulOpc = Entry.MulOpc;
AddSubOpc = Entry.AddSubOpc;
NegAcc = Entry.NegAcc;
HasLane = Entry.HasLane;
return true;
}
//===----------------------------------------------------------------------===//
// Execution domains.
//===----------------------------------------------------------------------===//
//
// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
// and some can go down both. The vmov instructions go down the VFP pipeline,
// but they can be changed to vorr equivalents that are executed by the NEON
// pipeline.
//
// We use the following execution domain numbering:
//
enum ARMExeDomain {
ExeGeneric = 0,
ExeVFP = 1,
ExeNEON = 2
};
//
// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
//
std::pair<uint16_t, uint16_t>
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
// VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
// if they are not predicated.
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
// CortexA9 is particularly picky about mixing the two and wants these
// converted.
if (Subtarget.isCortexA9() && !isPredicated(MI) &&
(MI->getOpcode() == ARM::VMOVRS ||
MI->getOpcode() == ARM::VMOVSR ||
MI->getOpcode() == ARM::VMOVS))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
// No other instructions can be swizzled, so just determine their domain.
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
if (Domain & ARMII::DomainNEON)
return std::make_pair(ExeNEON, 0);
// Certain instructions can go either way on Cortex-A8.
// Treat them as NEON instructions.
if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
return std::make_pair(ExeNEON, 0);
if (Domain & ARMII::DomainVFP)
return std::make_pair(ExeVFP, 0);
return std::make_pair(ExeGeneric, 0);
}
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
unsigned SReg, unsigned &Lane) {
unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
Lane = 0;
if (DReg != ARM::NoRegister)
return DReg;
Lane = 1;
DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
assert(DReg && "S-register with no D super-register?");
return DReg;
}
/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
/// set ImplicitSReg to a register number that must be marked as implicit-use or
/// zero if no register needs to be defined as implicit-use.
///
/// If the function cannot determine if an SPR should be marked implicit use or
/// not, it returns false.
///
/// This function handles cases where an instruction is being modified from taking
/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
/// lane of the DPR).
///
/// If the other SPR is defined, an implicit-use of it should be added. Else,
/// (including the case where the DPR itself is defined), it should not.
///
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
MachineInstr *MI,
unsigned DReg, unsigned Lane,
unsigned &ImplicitSReg) {
// If the DPR is defined or used already, the other SPR lane will be chained
// correctly, so there is nothing to be done.
if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
ImplicitSReg = 0;
return true;
}
// Otherwise we need to go searching to see if the SPR is set explicitly.
ImplicitSReg = TRI->getSubReg(DReg,
(Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
MachineBasicBlock::LivenessQueryResult LQR =
MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
if (LQR == MachineBasicBlock::LQR_Live)
return true;
else if (LQR == MachineBasicBlock::LQR_Unknown)
return false;
// If the register is known not to be live, there is no need to add an
// implicit-use.
ImplicitSReg = 0;
return true;
}
void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
unsigned DstReg, SrcReg, DReg;
unsigned Lane;
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
const TargetRegisterInfo *TRI = &getRegisterInfo();
switch (MI->getOpcode()) {
default:
llvm_unreachable("cannot handle opcode!");
break;
case ARM::VMOVD:
if (Domain != ExeNEON)
break;
// Zap the predicate operands.
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
// Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
MI->RemoveOperand(i-1);
// Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
MI->setDesc(get(ARM::VORRd));
AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
.addReg(SrcReg)
.addReg(SrcReg));
break;
case ARM::VMOVRS:
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
// Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
MI->RemoveOperand(i-1);
DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
// Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
// Note that DSrc has been widened and the other lane may be undef, which
// contaminates the entire register.
MI->setDesc(get(ARM::VGETLNi32));
AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
.addReg(DReg, RegState::Undef)
.addImm(Lane));
// The old source should be an implicit use, otherwise we might think it
// was dead before here.
MIB.addReg(SrcReg, RegState::Implicit);
break;
case ARM::VMOVSR: {
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
// Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
unsigned ImplicitSReg;
if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
break;
for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
MI->RemoveOperand(i-1);
// Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
// Again DDst may be undefined at the beginning of this instruction.
MI->setDesc(get(ARM::VSETLNi32));
MIB.addReg(DReg, RegState::Define)
.addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
.addReg(SrcReg)
.addImm(Lane);
AddDefaultPred(MIB);
// The narrower destination must be marked as set to keep previous chains
// in place.
MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
if (ImplicitSReg != 0)
MIB.addReg(ImplicitSReg, RegState::Implicit);
break;
}
case ARM::VMOVS: {
if (Domain != ExeNEON)
break;
// Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
unsigned ImplicitSReg;
if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
break;
for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
MI->RemoveOperand(i-1);
if (DSrc == DDst) {
// Destination can be:
// %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
MI->setDesc(get(ARM::VDUPLN32d));
MIB.addReg(DDst, RegState::Define)
.addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
.addImm(SrcLane);
AddDefaultPred(MIB);
// Neither the source or the destination are naturally represented any
// more, so add them in manually.
MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
MIB.addReg(SrcReg, RegState::Implicit);
if (ImplicitSReg != 0)
MIB.addReg(ImplicitSReg, RegState::Implicit);
break;
}
// In general there's no single instruction that can perform an S <-> S
// move in NEON space, but a pair of VEXT instructions *can* do the
// job. It turns out that the VEXTs needed will only use DSrc once, with
// the position based purely on the combination of lane-0 and lane-1
// involved. For example
// vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
// vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
// vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
// vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
//
// Pattern of the MachineInstrs is:
// %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
MachineInstrBuilder NewMIB;
NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
get(ARM::VEXTd32), DDst);
// On the first instruction, both DSrc and DDst may be <undef> if present.
// Specifically when the original instruction didn't have them as an
// <imp-use>.
unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
bool CurUndef = !MI->readsRegister(CurReg, TRI);
NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
CurUndef = !MI->readsRegister(CurReg, TRI);
NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
NewMIB.addImm(1);
AddDefaultPred(NewMIB);
if (SrcLane == DstLane)
NewMIB.addReg(SrcReg, RegState::Implicit);
MI->setDesc(get(ARM::VEXTd32));
MIB.addReg(DDst, RegState::Define);
// On the second instruction, DDst has definitely been defined above, so
// it is not <undef>. DSrc, if present, can be <undef> as above.
CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
MIB.addReg(CurReg, getUndefRegState(CurUndef));
CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
MIB.addReg(CurReg, getUndefRegState(CurUndef));
MIB.addImm(1);
AddDefaultPred(MIB);
if (SrcLane != DstLane)
MIB.addReg(SrcReg, RegState::Implicit);
// As before, the original destination is no longer represented, add it
// implicitly.
MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
if (ImplicitSReg != 0)
MIB.addReg(ImplicitSReg, RegState::Implicit);
break;
}
}
}
//===----------------------------------------------------------------------===//
// Partial register updates
//===----------------------------------------------------------------------===//
//
// Swift renames NEON registers with 64-bit granularity. That means any
// instruction writing an S-reg implicitly reads the containing D-reg. The
// problem is mostly avoided by translating f32 operations to v2f32 operations
// on D-registers, but f32 loads are still a problem.
//
// These instructions can load an f32 into a NEON register:
//
// VLDRS - Only writes S, partial D update.
// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
//
// FCONSTD can be used as a dependency-breaking instruction.
unsigned ARMBaseInstrInfo::
getPartialRegUpdateClearance(const MachineInstr *MI,
unsigned OpNum,
const TargetRegisterInfo *TRI) const {
if (!SwiftPartialUpdateClearance ||
!(Subtarget.isSwift() || Subtarget.isCortexA15()))
return 0;
assert(TRI && "Need TRI instance");
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.readsReg())
return 0;
unsigned Reg = MO.getReg();
int UseOp = -1;
switch(MI->getOpcode()) {
// Normal instructions writing only an S-register.
case ARM::VLDRS:
case ARM::FCONSTS:
case ARM::VMOVSR:
case ARM::VMOVv8i8:
case ARM::VMOVv4i16:
case ARM::VMOVv2i32:
case ARM::VMOVv2f32:
case ARM::VMOVv1i64:
UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
break;
// Explicitly reads the dependency.
case ARM::VLD1LNd32:
UseOp = 3;
break;
default:
return 0;
}
// If this instruction actually reads a value from Reg, there is no unwanted
// dependency.
if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
return 0;
// We must be able to clobber the whole D-reg.
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
// Virtual register must be a foo:ssub_0<def,undef> operand.
if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
return 0;
} else if (ARM::SPRRegClass.contains(Reg)) {
// Physical register: MI must define the full D-reg.
unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
&ARM::DPRRegClass);
if (!DReg || !MI->definesRegister(DReg, TRI))
return 0;
}
// MI has an unwanted D-register dependency.
// Avoid defs in the previous N instructrions.
return SwiftPartialUpdateClearance;
}
// Break a partial register dependency after getPartialRegUpdateClearance
// returned non-zero.
void ARMBaseInstrInfo::
breakPartialRegDependency(MachineBasicBlock::iterator MI,
unsigned OpNum,
const TargetRegisterInfo *TRI) const {
assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
assert(TRI && "Need TRI instance");
const MachineOperand &MO = MI->getOperand(OpNum);
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
"Can't break virtual register dependencies.");
unsigned DReg = Reg;
// If MI defines an S-reg, find the corresponding D super-register.
if (ARM::SPRRegClass.contains(Reg)) {
DReg = ARM::D0 + (Reg - ARM::S0) / 2;
assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
}
assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
// FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
// the full D-register by loading the same value to both lanes. The
// instruction is micro-coded with 2 uops, so don't do this until we can
// properly schedule micro-coded instructions. The dispatcher stalls cause
// too big regressions.
// Insert the dependency-breaking FCONSTD before MI.
// 96 is the encoding of 0.5, but the actual value doesn't matter here.
AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
get(ARM::FCONSTD), DReg).addImm(96));
MI->addRegisterKilled(DReg, TRI, true);
}
bool ARMBaseInstrInfo::hasNOP() const {
return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
}
bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
if (MI->getNumOperands() < 4)
return true;
unsigned ShOpVal = MI->getOperand(3).getImm();
unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
// Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
((ShImm == 1 || ShImm == 2) &&
ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
return true;
return false;
}
Index: head/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
===================================================================
--- head/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp (revision 265925)
@@ -1,1280 +1,1284 @@
//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass that expands pseudo instructions into target
// instructions to allow proper scheduling, if-conversion, and other late
// optimizations. This pass should be run after register allocation but before
// the post-regalloc scheduling pass.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-pseudo"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
static cl::opt<bool>
VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
cl::desc("Verify machine code after expanding ARM pseudos"));
namespace {
class ARMExpandPseudo : public MachineFunctionPass {
public:
static char ID;
ARMExpandPseudo() : MachineFunctionPass(ID) {}
const ARMBaseInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
virtual const char *getPassName() const {
return "ARM pseudo instruction expansion pass";
}
private:
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool ExpandMBB(MachineBasicBlock &MBB);
void ExpandVLD(MachineBasicBlock::iterator &MBBI);
void ExpandVST(MachineBasicBlock::iterator &MBBI);
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned Opc, bool IsExt);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
};
char ARMExpandPseudo::ID = 0;
}
/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
/// the instructions created from the expansion.
void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI,
MachineInstrBuilder &DefMI) {
const MCInstrDesc &Desc = OldMI.getDesc();
for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
i != e; ++i) {
const MachineOperand &MO = OldMI.getOperand(i);
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
UseMI.addOperand(MO);
else
DefMI.addOperand(MO);
}
}
namespace {
// Constants for register spacing in NEON load/store instructions.
// For quad-register load-lane and store-lane pseudo instructors, the
// spacing is initially assumed to be EvenDblSpc, and that is changed to
// OddDblSpc depending on the lane number operand.
enum NEONRegSpacing {
SingleSpc,
EvenDblSpc,
OddDblSpc
};
// Entries for NEON load/store information table. The table is sorted by
// PseudoOpc for fast binary-search lookups.
struct NEONLdStTableEntry {
uint16_t PseudoOpc;
uint16_t RealOpc;
bool IsLoad;
bool isUpdating;
bool hasWritebackOperand;
uint8_t RegSpacing; // One of type NEONRegSpacing
uint8_t NumRegs; // D registers loaded or stored
uint8_t RegElts; // elements per D register; used for lane ops
// FIXME: Temporary flag to denote whether the real instruction takes
// a single register (like the encoding) or all of the registers in
// the list (like the asm syntax and the isel DAG). When all definitions
// are converted to take only the single encoded register, this will
// go away.
bool copyAllListRegs;
// Comparison methods for binary search of the table.
bool operator<(const NEONLdStTableEntry &TE) const {
return PseudoOpc < TE.PseudoOpc;
}
friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
return TE.PseudoOpc < PseudoOpc;
}
friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
const NEONLdStTableEntry &TE) {
return PseudoOpc < TE.PseudoOpc;
}
};
}
static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true},
{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true},
{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
+{ ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true},
{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true},
{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true},
{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true},
{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true},
{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true},
{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true},
{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true},
{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true},
{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true},
{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true},
{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true},
{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true},
{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true},
{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true},
{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true},
{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true},
{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true},
{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true},
{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true},
{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true},
{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true},
{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true},
{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true},
{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true},
{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true},
{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true},
{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true},
{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true},
{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true},
{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true},
{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true},
{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true},
{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true},
{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true},
{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true},
{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true},
{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true},
{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true},
{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true},
{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true},
{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true},
{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true},
{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true},
{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true},
{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true},
{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true},
{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true},
{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true},
{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true},
{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true},
{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true},
{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true},
{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true},
{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true},
{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true},
{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true},
{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true},
{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true},
{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true},
{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true},
{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true},
{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true},
{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true},
{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true},
{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true},
{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true},
{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true},
{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true},
{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true},
{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true},
{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true},
{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true},
{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true},
{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true},
{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true},
{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true},
{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true},
{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true},
{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true},
{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true},
{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true},
{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true},
{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true},
{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true},
{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true},
{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true},
{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true},
{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true},
{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true},
{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true},
{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true}
};
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
/// load or store pseudo instruction.
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
const unsigned NumEntries = array_lengthof(NEONLdStTable);
#ifndef NDEBUG
// Make sure the table is sorted.
static bool TableChecked = false;
if (!TableChecked) {
for (unsigned i = 0; i != NumEntries-1; ++i)
assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
"NEONLdStTable is not sorted!");
TableChecked = true;
}
#endif
const NEONLdStTableEntry *I =
std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
return I;
return NULL;
}
/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
/// corresponding to the specified register spacing. Not all of the results
/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
const TargetRegisterInfo *TRI, unsigned &D0,
unsigned &D1, unsigned &D2, unsigned &D3) {
if (RegSpc == SingleSpc) {
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
D1 = TRI->getSubReg(Reg, ARM::dsub_1);
D2 = TRI->getSubReg(Reg, ARM::dsub_2);
D3 = TRI->getSubReg(Reg, ARM::dsub_3);
} else if (RegSpc == EvenDblSpc) {
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
D1 = TRI->getSubReg(Reg, ARM::dsub_2);
D2 = TRI->getSubReg(Reg, ARM::dsub_4);
D3 = TRI->getSubReg(Reg, ARM::dsub_6);
} else {
assert(RegSpc == OddDblSpc && "unknown register spacing");
D0 = TRI->getSubReg(Reg, ARM::dsub_1);
D1 = TRI->getSubReg(Reg, ARM::dsub_3);
D2 = TRI->getSubReg(Reg, ARM::dsub_5);
D3 = TRI->getSubReg(Reg, ARM::dsub_7);
}
}
/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
/// operands to real VLD instructions with D register operands.
void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
bool DstIsDead = MI.getOperand(OpIdx).isDead();
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 1 && TableEntry->copyAllListRegs)
MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 2 && TableEntry->copyAllListRegs)
MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 3 && TableEntry->copyAllListRegs)
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
if (TableEntry->isUpdating)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
MIB.addOperand(MI.getOperand(OpIdx++));
// For an instruction writing double-spaced subregs, the pseudo instruction
// has an extra operand that is a use of the super-register. Record the
// operand index and skip over it.
unsigned SrcOpIdx = 0;
if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
SrcOpIdx = OpIdx++;
// Copy the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the super-register source operand used for double-spaced subregs over
// to the new instruction as an implicit operand.
if (SrcOpIdx != 0) {
MachineOperand MO = MI.getOperand(SrcOpIdx);
MO.setImplicit(true);
MIB.addOperand(MO);
}
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
}
/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
/// operands to real VST instructions with D register operands.
void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
if (TableEntry->isUpdating)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0, getUndefRegState(SrcIsUndef));
if (NumRegs > 1 && TableEntry->copyAllListRegs)
MIB.addReg(D1, getUndefRegState(SrcIsUndef));
if (NumRegs > 2 && TableEntry->copyAllListRegs)
MIB.addReg(D2, getUndefRegState(SrcIsUndef));
if (NumRegs > 3 && TableEntry->copyAllListRegs)
MIB.addReg(D3, getUndefRegState(SrcIsUndef));
// Copy the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
}
/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
/// register operands to real instructions with D register operands.
void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && "NEONLdStTable lookup failed");
NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
unsigned RegElts = TableEntry->RegElts;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
// The lane operand is always the 3rd from last operand, before the 2
// predicate operands.
unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
// Adjust the lane and spacing as needed for Q registers.
assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
if (RegSpc == EvenDblSpc && Lane >= RegElts) {
RegSpc = OddDblSpc;
Lane -= RegElts;
}
assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
unsigned DstReg = 0;
bool DstIsDead = false;
if (TableEntry->IsLoad) {
DstIsDead = MI.getOperand(OpIdx).isDead();
DstReg = MI.getOperand(OpIdx++).getReg();
GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 1)
MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 2)
MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 3)
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
}
if (TableEntry->isUpdating)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
MIB.addOperand(MI.getOperand(OpIdx++));
// Grab the super-register source.
MachineOperand MO = MI.getOperand(OpIdx++);
if (!TableEntry->IsLoad)
GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
// Add the subregs as sources of the new instruction.
unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
getKillRegState(MO.isKill()));
MIB.addReg(D0, SrcFlags);
if (NumRegs > 1)
MIB.addReg(D1, SrcFlags);
if (NumRegs > 2)
MIB.addReg(D2, SrcFlags);
if (NumRegs > 3)
MIB.addReg(D3, SrcFlags);
// Add the lane number operand.
MIB.addImm(Lane);
OpIdx += 1;
// Copy the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the super-register source to be an implicit source.
MO.setImplicit(true);
MIB.addOperand(MO);
if (TableEntry->IsLoad)
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
}
/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
/// register operands to real instructions with D register operands.
void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned Opc, bool IsExt) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
unsigned OpIdx = 0;
// Transfer the destination register operand.
MIB.addOperand(MI.getOperand(OpIdx++));
if (IsExt)
MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0);
// Copy the other source register operand.
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
if (SrcIsKill) // Add an implicit kill for the super-reg.
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
}
void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
MachineInstrBuilder LO16, HI16;
if (!STI->hasV6T2Ops() &&
(Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
// Expand into a movi + orr.
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
unsigned ImmVal = (unsigned)MO.getImm();
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
LO16.addImm(Pred).addReg(PredReg).addReg(0);
HI16.addImm(Pred).addReg(PredReg).addReg(0);
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
return;
}
unsigned LO16Opc = 0;
unsigned HI16Opc = 0;
if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
LO16Opc = ARM::t2MOVi16;
HI16Opc = ARM::t2MOVTi16;
} else {
LO16Opc = ARM::MOVi16;
HI16Opc = ARM::MOVTi16;
}
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
if (MO.isImm()) {
unsigned Imm = MO.getImm();
unsigned Lo16 = Imm & 0xffff;
unsigned Hi16 = (Imm >> 16) & 0xffff;
LO16 = LO16.addImm(Lo16);
HI16 = HI16.addImm(Hi16);
} else {
const GlobalValue *GV = MO.getGlobal();
unsigned TF = MO.getTargetFlags();
LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
}
LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
LO16.addImm(Pred).addReg(PredReg);
HI16.addImm(Pred).addReg(PredReg);
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
}
bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
default:
return false;
case ARM::VMOVScc:
case ARM::VMOVDcc: {
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
MI.getOperand(1).getReg())
.addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm()) // 'pred'
.addOperand(MI.getOperand(4));
MI.eraseFromParent();
return true;
}
case ARM::t2MOVCCr:
case ARM::MOVCCr: {
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
.addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm()) // 'pred'
.addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
case ARM::MOVCCsi: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
(MI.getOperand(1).getReg()))
.addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm())
.addImm(MI.getOperand(4).getImm()) // 'pred'
.addOperand(MI.getOperand(5))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
case ARM::MOVCCsr: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
(MI.getOperand(1).getReg()))
.addOperand(MI.getOperand(2))
.addOperand(MI.getOperand(3))
.addImm(MI.getOperand(4).getImm())
.addImm(MI.getOperand(5).getImm()) // 'pred'
.addOperand(MI.getOperand(6))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
case ARM::t2MOVCCi16:
case ARM::MOVCCi16: {
unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
.addOperand(MI.getOperand(4));
MI.eraseFromParent();
return true;
}
case ARM::t2MOVCCi:
case ARM::MOVCCi: {
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
.addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
case ARM::t2MVNCCi:
case ARM::MVNCCi: {
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
.addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
case ARM::t2MOVCClsl:
case ARM::t2MOVCClsr:
case ARM::t2MOVCCasr:
case ARM::t2MOVCCror: {
unsigned NewOpc;
switch (Opcode) {
case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
default: llvm_unreachable("unexpeced conditional move");
}
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
.addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm())
.addImm(MI.getOperand(4).getImm()) // 'pred'
.addOperand(MI.getOperand(5))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
case ARM::Int_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
static_cast<const ARMBaseInstrInfo*>(TII);
const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
// For functions using a base pointer, we rematerialize it (via the frame
// pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
// for us. Otherwise, expand to nothing.
if (RI.hasBasePointer(MF)) {
int32_t NumBytes = AFI->getFramePtrSpillOffset();
unsigned FramePtr = RI.getFrameRegister(MF);
assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
"base pointer without frame pointer?");
if (AFI->isThumb2Function()) {
emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
} else if (AFI->isThumbFunction()) {
emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
FramePtr, -NumBytes, *TII, RI);
} else {
emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
FramePtr, -NumBytes, ARMCC::AL, 0,
*TII);
}
// If there's dynamic realignment, adjust for it.
if (RI.needsStackRealignment(MF)) {
MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
// Emit bic r6, r6, MaxAlign
unsigned bicOpc = AFI->isThumbFunction() ?
ARM::t2BICri : ARM::BICri;
AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(bicOpc), ARM::R6)
.addReg(ARM::R6, RegState::Kill)
.addImm(MaxAlign-1)));
}
}
MI.eraseFromParent();
return true;
}
case ARM::MOVsrl_flag:
case ARM::MOVsra_flag: {
// These are just fancy MOVs instructions.
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
.addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
ARM_AM::lsr : ARM_AM::asr),
1)))
.addReg(ARM::CPSR, RegState::Define);
MI.eraseFromParent();
return true;
}
case ARM::RRX: {
// This encodes as "MOVs Rd, Rm, rrx
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
.addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
.addReg(0);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
}
case ARM::tTPsoft:
case ARM::TPsoft: {
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
.addExternalSymbol("__aeabi_read_tp", 0);
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
}
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
? ARM::tLDRpci : ARM::t2LDRpci;
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(NewLdOpc), DstReg)
.addOperand(MI.getOperand(1)));
MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::tPICADD))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addOperand(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
case ARM::MOV_ga_dyn:
case ARM::MOV_ga_pcrel:
case ARM::MOV_ga_pcrel_ldr:
case ARM::t2MOV_ga_dyn:
case ARM::t2MOV_ga_pcrel: {
// Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
unsigned LabelId = AFI->createPICLabelUId();
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
const MachineOperand &MO1 = MI.getOperand(1);
const GlobalValue *GV = MO1.getGlobal();
unsigned TF = MO1.getTargetFlags();
bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode!=ARM::t2MOV_ga_dyn);
bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
unsigned LO16TF = isPIC
? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
unsigned HI16TF = isPIC
? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY;
unsigned PICAddOpc = isARM
? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
: ARM::tPICADD;
MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(LO16Opc), DstReg)
.addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
.addImm(LabelId);
MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(HI16Opc), DstReg)
.addReg(DstReg)
.addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
.addImm(LabelId);
if (!isPIC) {
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(PICAddOpc))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg).addImm(LabelId);
if (isARM) {
AddDefaultPred(MIB3);
if (Opcode == ARM::MOV_ga_pcrel_ldr)
MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
TransferImpOps(MI, MIB1, MIB3);
MI.eraseFromParent();
return true;
}
case ARM::MOVi32imm:
case ARM::MOVCCi32imm:
case ARM::t2MOVi32imm:
case ARM::t2MOVCCi32imm:
ExpandMOV32BitImm(MBB, MBBI);
return true;
case ARM::SUBS_PC_LR: {
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
.addReg(ARM::LR)
.addOperand(MI.getOperand(0))
.addOperand(MI.getOperand(1))
.addOperand(MI.getOperand(2))
.addReg(ARM::CPSR, RegState::Undef);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
}
case ARM::VLDMQIA: {
unsigned NewOpc = ARM::VLDMDIA;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
unsigned OpIdx = 0;
// Grab the Q register destination.
bool DstIsDead = MI.getOperand(OpIdx).isDead();
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
// Copy the source register.
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Add the destination operands (D subregs).
unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
return true;
}
case ARM::VSTMQIA: {
unsigned NewOpc = ARM::VSTMDIA;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
unsigned OpIdx = 0;
// Grab the Q register source.
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
// Copy the destination register.
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Add the source operands (D subregs).
unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
MIB.addReg(D0).addReg(D1);
if (SrcIsKill) // Add an implicit kill for the Q register.
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
return true;
}
case ARM::VDUPfqf:
case ARM::VDUPfdf:{
unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q :
ARM::VDUPLN32d;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
unsigned OpIdx = 0;
unsigned SrcReg = MI.getOperand(1).getReg();
unsigned Lane = TRI->getEncodingValue(SrcReg) & 1;
unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
&ARM::DPR_VFP2RegClass);
// The lane is [0,1] for the containing DReg superregister.
// Copy the dst/src register operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addReg(DReg);
++OpIdx;
// Add the lane select operand.
MIB.addImm(Lane);
// Add the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
}
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
case ARM::VLD2q8PseudoWB_fixed:
case ARM::VLD2q16PseudoWB_fixed:
case ARM::VLD2q32PseudoWB_fixed:
case ARM::VLD2q8PseudoWB_register:
case ARM::VLD2q16PseudoWB_register:
case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64TPseudoWB_fixed:
case ARM::VLD3d8Pseudo_UPD:
case ARM::VLD3d16Pseudo_UPD:
case ARM::VLD3d32Pseudo_UPD:
case ARM::VLD3q8Pseudo_UPD:
case ARM::VLD3q16Pseudo_UPD:
case ARM::VLD3q32Pseudo_UPD:
case ARM::VLD3q8oddPseudo:
case ARM::VLD3q16oddPseudo:
case ARM::VLD3q32oddPseudo:
case ARM::VLD3q8oddPseudo_UPD:
case ARM::VLD3q16oddPseudo_UPD:
case ARM::VLD3q32oddPseudo_UPD:
case ARM::VLD4d8Pseudo:
case ARM::VLD4d16Pseudo:
case ARM::VLD4d32Pseudo:
case ARM::VLD1d64QPseudo:
+ case ARM::VLD1d64QPseudoWB_fixed:
case ARM::VLD4d8Pseudo_UPD:
case ARM::VLD4d16Pseudo_UPD:
case ARM::VLD4d32Pseudo_UPD:
case ARM::VLD4q8Pseudo_UPD:
case ARM::VLD4q16Pseudo_UPD:
case ARM::VLD4q32Pseudo_UPD:
case ARM::VLD4q8oddPseudo:
case ARM::VLD4q16oddPseudo:
case ARM::VLD4q32oddPseudo:
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
case ARM::VLD3DUPd8Pseudo:
case ARM::VLD3DUPd16Pseudo:
case ARM::VLD3DUPd32Pseudo:
case ARM::VLD3DUPd8Pseudo_UPD:
case ARM::VLD3DUPd16Pseudo_UPD:
case ARM::VLD3DUPd32Pseudo_UPD:
case ARM::VLD4DUPd8Pseudo:
case ARM::VLD4DUPd16Pseudo:
case ARM::VLD4DUPd32Pseudo:
case ARM::VLD4DUPd8Pseudo_UPD:
case ARM::VLD4DUPd16Pseudo_UPD:
case ARM::VLD4DUPd32Pseudo_UPD:
ExpandVLD(MBBI);
return true;
case ARM::VST2q8Pseudo:
case ARM::VST2q16Pseudo:
case ARM::VST2q32Pseudo:
case ARM::VST2q8PseudoWB_fixed:
case ARM::VST2q16PseudoWB_fixed:
case ARM::VST2q32PseudoWB_fixed:
case ARM::VST2q8PseudoWB_register:
case ARM::VST2q16PseudoWB_register:
case ARM::VST2q32PseudoWB_register:
case ARM::VST3d8Pseudo:
case ARM::VST3d16Pseudo:
case ARM::VST3d32Pseudo:
case ARM::VST1d64TPseudo:
case ARM::VST3d8Pseudo_UPD:
case ARM::VST3d16Pseudo_UPD:
case ARM::VST3d32Pseudo_UPD:
case ARM::VST1d64TPseudoWB_fixed:
case ARM::VST1d64TPseudoWB_register:
case ARM::VST3q8Pseudo_UPD:
case ARM::VST3q16Pseudo_UPD:
case ARM::VST3q32Pseudo_UPD:
case ARM::VST3q8oddPseudo:
case ARM::VST3q16oddPseudo:
case ARM::VST3q32oddPseudo:
case ARM::VST3q8oddPseudo_UPD:
case ARM::VST3q16oddPseudo_UPD:
case ARM::VST3q32oddPseudo_UPD:
case ARM::VST4d8Pseudo:
case ARM::VST4d16Pseudo:
case ARM::VST4d32Pseudo:
case ARM::VST1d64QPseudo:
case ARM::VST4d8Pseudo_UPD:
case ARM::VST4d16Pseudo_UPD:
case ARM::VST4d32Pseudo_UPD:
case ARM::VST1d64QPseudoWB_fixed:
case ARM::VST1d64QPseudoWB_register:
case ARM::VST4q8Pseudo_UPD:
case ARM::VST4q16Pseudo_UPD:
case ARM::VST4q32Pseudo_UPD:
case ARM::VST4q8oddPseudo:
case ARM::VST4q16oddPseudo:
case ARM::VST4q32oddPseudo:
case ARM::VST4q8oddPseudo_UPD:
case ARM::VST4q16oddPseudo_UPD:
case ARM::VST4q32oddPseudo_UPD:
ExpandVST(MBBI);
return true;
case ARM::VLD1LNq8Pseudo:
case ARM::VLD1LNq16Pseudo:
case ARM::VLD1LNq32Pseudo:
case ARM::VLD1LNq8Pseudo_UPD:
case ARM::VLD1LNq16Pseudo_UPD:
case ARM::VLD1LNq32Pseudo_UPD:
case ARM::VLD2LNd8Pseudo:
case ARM::VLD2LNd16Pseudo:
case ARM::VLD2LNd32Pseudo:
case ARM::VLD2LNq16Pseudo:
case ARM::VLD2LNq32Pseudo:
case ARM::VLD2LNd8Pseudo_UPD:
case ARM::VLD2LNd16Pseudo_UPD:
case ARM::VLD2LNd32Pseudo_UPD:
case ARM::VLD2LNq16Pseudo_UPD:
case ARM::VLD2LNq32Pseudo_UPD:
case ARM::VLD3LNd8Pseudo:
case ARM::VLD3LNd16Pseudo:
case ARM::VLD3LNd32Pseudo:
case ARM::VLD3LNq16Pseudo:
case ARM::VLD3LNq32Pseudo:
case ARM::VLD3LNd8Pseudo_UPD:
case ARM::VLD3LNd16Pseudo_UPD:
case ARM::VLD3LNd32Pseudo_UPD:
case ARM::VLD3LNq16Pseudo_UPD:
case ARM::VLD3LNq32Pseudo_UPD:
case ARM::VLD4LNd8Pseudo:
case ARM::VLD4LNd16Pseudo:
case ARM::VLD4LNd32Pseudo:
case ARM::VLD4LNq16Pseudo:
case ARM::VLD4LNq32Pseudo:
case ARM::VLD4LNd8Pseudo_UPD:
case ARM::VLD4LNd16Pseudo_UPD:
case ARM::VLD4LNd32Pseudo_UPD:
case ARM::VLD4LNq16Pseudo_UPD:
case ARM::VLD4LNq32Pseudo_UPD:
case ARM::VST1LNq8Pseudo:
case ARM::VST1LNq16Pseudo:
case ARM::VST1LNq32Pseudo:
case ARM::VST1LNq8Pseudo_UPD:
case ARM::VST1LNq16Pseudo_UPD:
case ARM::VST1LNq32Pseudo_UPD:
case ARM::VST2LNd8Pseudo:
case ARM::VST2LNd16Pseudo:
case ARM::VST2LNd32Pseudo:
case ARM::VST2LNq16Pseudo:
case ARM::VST2LNq32Pseudo:
case ARM::VST2LNd8Pseudo_UPD:
case ARM::VST2LNd16Pseudo_UPD:
case ARM::VST2LNd32Pseudo_UPD:
case ARM::VST2LNq16Pseudo_UPD:
case ARM::VST2LNq32Pseudo_UPD:
case ARM::VST3LNd8Pseudo:
case ARM::VST3LNd16Pseudo:
case ARM::VST3LNd32Pseudo:
case ARM::VST3LNq16Pseudo:
case ARM::VST3LNq32Pseudo:
case ARM::VST3LNd8Pseudo_UPD:
case ARM::VST3LNd16Pseudo_UPD:
case ARM::VST3LNd32Pseudo_UPD:
case ARM::VST3LNq16Pseudo_UPD:
case ARM::VST3LNq32Pseudo_UPD:
case ARM::VST4LNd8Pseudo:
case ARM::VST4LNd16Pseudo:
case ARM::VST4LNd32Pseudo:
case ARM::VST4LNq16Pseudo:
case ARM::VST4LNq32Pseudo:
case ARM::VST4LNd8Pseudo_UPD:
case ARM::VST4LNd16Pseudo_UPD:
case ARM::VST4LNd32Pseudo_UPD:
case ARM::VST4LNq16Pseudo_UPD:
case ARM::VST4LNq32Pseudo_UPD:
ExpandLaneOp(MBBI);
return true;
case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
}
}
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
Modified |= ExpandMI(MBB, MBBI);
MBBI = NMBBI;
}
return Modified;
}
bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
TRI = TM.getRegisterInfo();
STI = &TM.getSubtarget<ARMSubtarget>();
AFI = MF.getInfo<ARMFunctionInfo>();
bool Modified = false;
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
++MFI)
Modified |= ExpandMBB(*MFI);
if (VerifyARMPseudo)
MF.verify(this, "After expanding ARM pseudo instructions.");
return Modified;
}
/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
/// expansion pass.
FunctionPass *llvm::createARMExpandPseudoPass() {
return new ARMExpandPseudo();
}
Index: head/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
===================================================================
--- head/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp (revision 265925)
@@ -1,3527 +1,3584 @@
//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an instruction selector for the ARM target.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
static cl::opt<bool>
DisableShifterOp("disable-shifter-op", cl::Hidden,
cl::desc("Disable isel of shifter-op"),
cl::init(false));
static cl::opt<bool>
CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
cl::desc("Check fp vmla / vmls hazard at isel time"),
cl::init(true));
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
///
namespace {
enum AddrMode2Type {
AM2_BASE, // Simple AM2 (+-imm12)
AM2_SHOP // Shifter-op AM2
};
class ARMDAGToDAGISel : public SelectionDAGISel {
ARMBaseTargetMachine &TM;
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
public:
explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), TM(tm),
Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
}
virtual const char *getPassName() const {
return "ARM Instruction Selection";
}
virtual void PreprocessISelDAG();
/// getI32Imm - Return a target constant of type i32 with the specified
/// value.
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
SDNode *Select(SDNode *N);
bool hasNoVMLxHazardUse(SDNode *N) const;
bool isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
bool SelectRegShifterOperand(SDValue N, SDValue &A,
SDValue &B, SDValue &C,
bool CheckProfitability = true);
bool SelectImmShifterOperand(SDValue N, SDValue &A,
SDValue &B, bool CheckProfitability = true);
bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
SDValue &B, SDValue &C) {
// Don't apply the profitability check
return SelectRegShifterOperand(N, A, B, C, false);
}
bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
SDValue &B) {
// Don't apply the profitability check
return SelectImmShifterOperand(N, A, B, false);
}
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
}
bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
}
bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
SelectAddrMode2Worker(N, Base, Offset, Opc);
// return SelectAddrMode2ShOp(N, Base, Offset, Opc);
// This always matches one way or another.
return true;
}
bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
const ConstantSDNode *CN = cast<ConstantSDNode>(N);
Pred = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
return true;
}
bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
bool SelectAddrMode3(SDValue N, SDValue &Base,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode5(SDValue N, SDValue &Base,
SDValue &Offset);
bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
// Thumb Addressing Modes:
bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
unsigned Scale);
bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
SDValue &OffImm);
bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
// Thumb 2 Addressing Modes:
bool SelectT2ShifterOperandReg(SDValue N,
SDValue &BaseReg, SDValue &Opc);
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
SDValue &OffImm);
bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
inline bool is_so_imm(unsigned Imm) const {
return ARM_AM::getSOImmVal(Imm) != -1;
}
inline bool is_so_imm_not(unsigned Imm) const {
return ARM_AM::getSOImmVal(~Imm) != -1;
}
inline bool is_t2_so_imm(unsigned Imm) const {
return ARM_AM::getT2SOImmVal(Imm) != -1;
}
inline bool is_t2_so_imm_not(unsigned Imm) const {
return ARM_AM::getT2SOImmVal(~Imm) != -1;
}
// Include the pieces autogenerated from the target description.
#include "ARMGenDAGISel.inc"
private:
/// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
/// ARM.
SDNode *SelectARMIndexedLoad(SDNode *N);
SDNode *SelectT2IndexedLoad(SDNode *N);
/// SelectVLD - Select NEON load intrinsics. NumVecs should be
/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// loads of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
/// SelectVST - Select NEON store intrinsics. NumVecs should
/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// stores of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
/// load/store of D registers and Q registers.
SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes, const uint16_t *QOpcodes);
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
/// should be 2, 3 or 4. The opcode array specifies the instructions used
/// for loading D registers. (Q registers are not supported.)
SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *Opcodes);
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
/// generated to force the table registers to be consecutive.
SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
// Select special operations if node forms integer ABS pattern
SDNode *SelectABSOp(SDNode *N);
SDNode *SelectInlineAsm(SDNode *N);
SDNode *SelectConcatVector(SDNode *N);
SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
// Form pairs of consecutive R, S, D, or Q registers.
SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
// Form sequences of 4 consecutive S, D, or Q registers.
SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
// Get the alignment operand for a NEON VLD or VST instruction.
SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
};
}
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
/// operand. If so Imm will receive the 32-bit value.
static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
Imm = cast<ConstantSDNode>(N)->getZExtValue();
return true;
}
return false;
}
// isInt32Immediate - This method tests to see if a constant operand.
// If so Imm will receive the 32 bit value.
static bool isInt32Immediate(SDValue N, unsigned &Imm) {
return isInt32Immediate(N.getNode(), Imm);
}
// isOpcWithIntImmediate - This method tests to see if the node is a specific
// opcode and that it has a immediate integer right operand.
// If so Imm will receive the 32 bit value.
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
return N->getOpcode() == Opc &&
isInt32Immediate(N->getOperand(1).getNode(), Imm);
}
/// \brief Check whether a particular node is a constant value representable as
/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
///
/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
static bool isScaledConstantInRange(SDValue Node, int Scale,
int RangeMin, int RangeMax,
int &ScaledConstant) {
assert(Scale > 0 && "Invalid scale!");
// Check that this is a constant.
const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
if (!C)
return false;
ScaledConstant = (int) C->getZExtValue();
if ((ScaledConstant % Scale) != 0)
return false;
ScaledConstant /= Scale;
return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
}
void ARMDAGToDAGISel::PreprocessISelDAG() {
if (!Subtarget->hasV6T2Ops())
return;
bool isThumb2 = Subtarget->isThumb();
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
if (N->getOpcode() != ISD::ADD)
continue;
// Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
// leading zeros, followed by consecutive set bits, followed by 1 or 2
// trailing zeros, e.g. 1020.
// Transform the expression to
// (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
// of trailing zeros of c2. The left shift would be folded as an shifter
// operand of 'add' and the 'and' and 'srl' would become a bits extraction
// node (UBFX).
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned And_imm = 0;
if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
std::swap(N0, N1);
}
if (!And_imm)
continue;
// Check if the AND mask is an immediate of the form: 000.....1111111100
unsigned TZ = countTrailingZeros(And_imm);
if (TZ != 1 && TZ != 2)
// Be conservative here. Shifter operands aren't always free. e.g. On
// Swift, left shifter operand of 1 / 2 for free but others are not.
// e.g.
// ubfx r3, r1, #16, #8
// ldr.w r3, [r0, r3, lsl #2]
// vs.
// mov.w r9, #1020
// and.w r2, r9, r1, lsr #14
// ldr r2, [r0, r2]
continue;
And_imm >>= TZ;
if (And_imm & (And_imm + 1))
continue;
// Look for (and (srl X, c1), c2).
SDValue Srl = N1.getOperand(0);
unsigned Srl_imm = 0;
if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
(Srl_imm <= 2))
continue;
// Make sure first operand is not a shifter operand which would prevent
// folding of the left shift.
SDValue CPTmp0;
SDValue CPTmp1;
SDValue CPTmp2;
if (isThumb2) {
if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
continue;
} else {
if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
continue;
}
// Now make the transformation.
Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
Srl.getOperand(0),
CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
Srl, CurDAG->getConstant(And_imm, MVT::i32));
N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
N1, CurDAG->getConstant(TZ, MVT::i32));
CurDAG->UpdateNodeOperands(N, N0, N1);
}
}
/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
/// least on current ARM implementations) which should be avoidded.
bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
if (OptLevel == CodeGenOpt::None)
return true;
if (!CheckVMLxHazard)
return true;
if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9() &&
!Subtarget->isSwift())
return true;
if (!N->hasOneUse())
return false;
SDNode *Use = *N->use_begin();
if (Use->getOpcode() == ISD::CopyToReg)
return true;
if (Use->isMachineOpcode()) {
const ARMBaseInstrInfo *TII =
static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
if (MCID.mayStore())
return true;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
return true;
// vmlx feeding into another vmlx. We actually want to unfold
// the use later in the MLxExpansion pass. e.g.
// vmla
// vmla (stall 8 cycles)
//
// vmul (5 cycles)
// vadd (5 cycles)
// vmla
// This adds up to about 18 - 19 cycles.
//
// vmla
// vmul (stall 4 cycles)
// vadd adds up to about 14 cycles.
return TII->isFpMLxInstruction(Opcode);
}
return false;
}
bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal,
unsigned ShAmt) {
if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
return true;
if (Shift.hasOneUse())
return true;
// R << 2 is free.
return ShOpcVal == ARM_AM::lsl &&
(ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
}
bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
SDValue &BaseReg,
SDValue &Opc,
bool CheckProfitability) {
if (DisableShifterOp)
return false;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
// Don't match base register only case. That is matched to a separate
// lower complexity pattern with explicit register operand.
if (ShOpcVal == ARM_AM::no_shift) return false;
BaseReg = N.getOperand(0);
unsigned ShImmVal = 0;
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!RHS) return false;
ShImmVal = RHS->getZExtValue() & 31;
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc,
bool CheckProfitability) {
if (DisableShifterOp)
return false;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
// Don't match base register only case. That is matched to a separate
// lower complexity pattern with explicit register operand.
if (ShOpcVal == ARM_AM::no_shift) return false;
BaseReg = N.getOperand(0);
unsigned ShImmVal = 0;
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (RHS) return false;
ShReg = N.getOperand(1);
if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
return false;
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
SDValue &Base,
SDValue &OffImm) {
// Match simple R + imm12 operands.
// Base only.
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
!CurDAG->isBaseWithConstantOffset(N)) {
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
} else
Base = N;
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
if (N.getOpcode() == ISD::SUB)
RHSC = -RHSC;
if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
}
}
// Base only.
Base = N;
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL &&
((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
if (RHSC & 1) {
RHSC = RHSC & ~1;
ARM_AM::AddrOpc AddSub = ARM_AM::add;
if (RHSC < 0) {
AddSub = ARM_AM::sub;
RHSC = - RHSC;
}
if (isPowerOf2_32(RHSC)) {
unsigned ShAmt = Log2_32(RHSC);
Base = Offset = N.getOperand(0);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
ARM_AM::lsl),
MVT::i32);
return true;
}
}
}
}
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
// ISD::OR that is equivalent to an ISD::ADD.
!CurDAG->isBaseWithConstantOffset(N))
return false;
// Leave simple R +/- imm12 operands for LDRi12
if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
-0x1000+1, 0x1000, RHSC)) // 12 bits.
return false;
}
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
ARM_AM::ShiftOpc ShOpcVal =
ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
unsigned ShAmt = 0;
Base = N.getOperand(0);
Offset = N.getOperand(1);
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't fold
// it.
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
ShAmt = Sh->getZExtValue();
if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
Offset = N.getOperand(1).getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
}
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isLikeA9() || Subtarget->isSwift() ||
N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
// fold it.
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
} else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
}
}
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
MVT::i32);
return true;
}
//-----
AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
SDValue &Base,
SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL &&
(!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
if (RHSC & 1) {
RHSC = RHSC & ~1;
ARM_AM::AddrOpc AddSub = ARM_AM::add;
if (RHSC < 0) {
AddSub = ARM_AM::sub;
RHSC = - RHSC;
}
if (isPowerOf2_32(RHSC)) {
unsigned ShAmt = Log2_32(RHSC);
Base = Offset = N.getOperand(0);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
ARM_AM::lsl),
MVT::i32);
return AM2_SHOP;
}
}
}
}
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
// ISD::OR that is equivalent to an ADD.
!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
}
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
ARM_AM::no_shift),
MVT::i32);
return AM2_BASE;
}
// Match simple R +/- imm12 operands.
if (N.getOpcode() != ISD::SUB) {
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
-0x1000+1, 0x1000, RHSC)) { // 12 bits.
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
ARM_AM::AddrOpc AddSub = ARM_AM::add;
if (RHSC < 0) {
AddSub = ARM_AM::sub;
RHSC = - RHSC;
}
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
ARM_AM::no_shift),
MVT::i32);
return AM2_BASE;
}
}
if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
// Compute R +/- (R << N) and reuse it.
Base = N;
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
ARM_AM::no_shift),
MVT::i32);
return AM2_BASE;
}
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
ARM_AM::ShiftOpc ShOpcVal =
ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
unsigned ShAmt = 0;
Base = N.getOperand(0);
Offset = N.getOperand(1);
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't fold
// it.
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
ShAmt = Sh->getZExtValue();
if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
Offset = N.getOperand(1).getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
}
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isLikeA9() || Subtarget->isSwift() ||
N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
// fold it.
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
} else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
}
}
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
MVT::i32);
return AM2_SHOP;
}
bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc) {
unsigned Opcode = Op->getOpcode();
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
? cast<LoadSDNode>(Op)->getAddressingMode()
: cast<StoreSDNode>(Op)->getAddressingMode();
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
? ARM_AM::add : ARM_AM::sub;
int Val;
if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
return false;
Offset = N;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
unsigned ShAmt = 0;
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't fold
// it.
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
ShAmt = Sh->getZExtValue();
if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
Offset = N.getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
}
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc) {
unsigned Opcode = Op->getOpcode();
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
? cast<LoadSDNode>(Op)->getAddressingMode()
: cast<StoreSDNode>(Op)->getAddressingMode();
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
? ARM_AM::add : ARM_AM::sub;
int Val;
if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
if (AddSub == ARM_AM::sub) Val *= -1;
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(Val, MVT::i32);
return true;
}
return false;
}
bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc) {
unsigned Opcode = Op->getOpcode();
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
? cast<LoadSDNode>(Op)->getAddressingMode()
: cast<StoreSDNode>(Op)->getAddressingMode();
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
? ARM_AM::add : ARM_AM::sub;
int Val;
if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
ARM_AM::no_shift),
MVT::i32);
return true;
}
return false;
}
bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
Base = N;
return true;
}
bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
SDValue &Base, SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::SUB) {
// X - C is canonicalize to X + -C, no need to handle it here.
Base = N.getOperand(0);
Offset = N.getOperand(1);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
return true;
}
if (!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
return true;
}
// If the RHS is +/- imm8, fold into addr mode.
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
-256 + 1, 256, RHSC)) { // 8 bits.
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
ARM_AM::AddrOpc AddSub = ARM_AM::add;
if (RHSC < 0) {
AddSub = ARM_AM::sub;
RHSC = -RHSC;
}
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
return true;
}
Base = N.getOperand(0);
Offset = N.getOperand(1);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc) {
unsigned Opcode = Op->getOpcode();
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
? cast<LoadSDNode>(Op)->getAddressingMode()
: cast<StoreSDNode>(Op)->getAddressingMode();
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
? ARM_AM::add : ARM_AM::sub;
int Val;
if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
return true;
}
Offset = N;
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
SDValue &Base, SDValue &Offset) {
if (!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
}
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
MVT::i32);
return true;
}
// If the RHS is +/- imm8, fold into addr mode.
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
-256 + 1, 256, RHSC)) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
}
ARM_AM::AddrOpc AddSub = ARM_AM::add;
if (RHSC < 0) {
AddSub = ARM_AM::sub;
RHSC = -RHSC;
}
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
MVT::i32);
return true;
}
Base = N;
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
SDValue &Align) {
Addr = N;
unsigned Alignment = 0;
if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
// The maximum alignment is equal to the memory size being referenced.
unsigned LSNAlign = LSN->getAlignment();
unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
if (LSNAlign >= MemSize && MemSize > 1)
Alignment = MemSize;
} else {
// All other uses of addrmode6 are for intrinsics. For now just record
// the raw alignment value; it will be refined later based on the legal
// alignment operands for the intrinsic.
Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
}
Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
SDValue &Offset) {
LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
ISD::MemIndexedMode AM = LdSt->getAddressingMode();
if (AM != ISD::POST_INC)
return false;
Offset = N;
if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
Offset = CurDAG->getRegister(0, MVT::i32);
}
return true;
}
bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
SDValue &Offset, SDValue &Label) {
if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
Offset = N.getOperand(0);
SDValue N1 = N.getOperand(1);
Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
MVT::i32);
return true;
}
return false;
}
//===----------------------------------------------------------------------===//
// Thumb Addressing Modes
//===----------------------------------------------------------------------===//
bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
SDValue &Base, SDValue &Offset){
if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
if (!NC || !NC->isNullValue())
return false;
Base = Offset = N;
return true;
}
Base = N.getOperand(0);
Offset = N.getOperand(1);
return true;
}
bool
ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
SDValue &Offset, unsigned Scale) {
if (Scale == 4) {
SDValue TmpBase, TmpOffImm;
if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
return false; // We want to select tLDRspi / tSTRspi instead.
if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
return false; // We want to select tLDRpci instead.
}
if (!CurDAG->isBaseWithConstantOffset(N))
return false;
// Thumb does not have [sp, r] address mode.
RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
if ((LHSR && LHSR->getReg() == ARM::SP) ||
(RHSR && RHSR->getReg() == ARM::SP))
return false;
// FIXME: Why do we explicitly check for a match here and then return false?
// Presumably to allow something else to match, but shouldn't this be
// documented?
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
return false;
Base = N.getOperand(0);
Offset = N.getOperand(1);
return true;
}
bool
ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
SDValue &Base,
SDValue &Offset) {
return SelectThumbAddrModeRI(N, Base, Offset, 1);
}
bool
ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
SDValue &Base,
SDValue &Offset) {
return SelectThumbAddrModeRI(N, Base, Offset, 2);
}
bool
ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
SDValue &Base,
SDValue &Offset) {
return SelectThumbAddrModeRI(N, Base, Offset, 4);
}
bool
ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
SDValue &Base, SDValue &OffImm) {
if (Scale == 4) {
SDValue TmpBase, TmpOffImm;
if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
return false; // We want to select tLDRspi / tSTRspi instead.
if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
return false; // We want to select tLDRpci instead.
}
if (!CurDAG->isBaseWithConstantOffset(N)) {
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
} else {
Base = N;
}
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
if ((LHSR && LHSR->getReg() == ARM::SP) ||
(RHSR && RHSR->getReg() == ARM::SP)) {
ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
// Thumb does not have [sp, #imm5] address mode for non-zero imm5.
if (LHSC != 0 || RHSC != 0) return false;
Base = N;
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
// If the RHS is + imm5 * scale, fold into addr mode.
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
Base = N.getOperand(0);
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
}
Base = N.getOperand(0);
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
bool
ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
SDValue &OffImm) {
return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
}
bool
ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
SDValue &OffImm) {
return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
}
bool
ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
SDValue &OffImm) {
return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
}
bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
SDValue &Base, SDValue &OffImm) {
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
if (!CurDAG->isBaseWithConstantOffset(N))
return false;
RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
(LHSR && LHSR->getReg() == ARM::SP)) {
// If the RHS is + imm8 * scale, fold into addr mode.
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
}
}
return false;
}
//===----------------------------------------------------------------------===//
// Thumb 2 Addressing Modes
//===----------------------------------------------------------------------===//
bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
SDValue &Opc) {
if (DisableShifterOp)
return false;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
// Don't match base register only case. That is matched to a separate
// lower complexity pattern with explicit register operand.
if (ShOpcVal == ARM_AM::no_shift) return false;
BaseReg = N.getOperand(0);
unsigned ShImmVal = 0;
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
ShImmVal = RHS->getZExtValue() & 31;
Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
return true;
}
return false;
}
bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R + imm12 operands.
// Base only.
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
!CurDAG->isBaseWithConstantOffset(N)) {
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::TargetConstantPool)
return false; // We want to select t2LDRpci instead.
} else
Base = N;
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
if (SelectT2AddrModeImm8(N, Base, OffImm))
// Let t2LDRi8 handle (R - imm8).
return false;
int RHSC = (int)RHS->getZExtValue();
if (N.getOpcode() == ISD::SUB)
RHSC = -RHSC;
if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
}
}
// Base only.
Base = N;
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R - imm8 operands.
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
!CurDAG->isBaseWithConstantOffset(N))
return false;
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getSExtValue();
if (N.getOpcode() == ISD::SUB)
RHSC = -RHSC;
if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
}
}
return false;
}
bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
SDValue &OffImm){
unsigned Opcode = Op->getOpcode();
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
? cast<LoadSDNode>(Op)->getAddressingMode()
: cast<StoreSDNode>(Op)->getAddressingMode();
int RHSC;
if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
? CurDAG->getTargetConstant(RHSC, MVT::i32)
: CurDAG->getTargetConstant(-RHSC, MVT::i32);
return true;
}
return false;
}
bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
SDValue &Base,
SDValue &OffReg, SDValue &ShImm) {
// (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
return false;
// Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
return false;
else if (RHSC < 0 && RHSC >= -255) // 8 bits
return false;
}
// Look for (R + R) or (R + (R << [1,2,3])).
unsigned ShAmt = 0;
Base = N.getOperand(0);
OffReg = N.getOperand(1);
// Swap if it is ((R << c) + R).
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
if (ShOpcVal != ARM_AM::lsl) {
ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
if (ShOpcVal == ARM_AM::lsl)
std::swap(Base, OffReg);
}
if (ShOpcVal == ARM_AM::lsl) {
// Check to see if the RHS of the shift is a constant, if not, we can't fold
// it.
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
ShAmt = Sh->getZExtValue();
if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
OffReg = OffReg.getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
}
ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
SDValue &OffImm) {
// This *must* succeed since it's used for the irreplacable ldrex and strex
// instructions.
Base = N;
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
return true;
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!RHS)
return true;
uint32_t RHSC = (int)RHS->getZExtValue();
if (RHSC > 1020 || RHSC % 4 != 0)
return true;
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32);
return true;
}
//===--------------------------------------------------------------------===//
/// getAL - Returns a ARMCC::AL immediate node.
static inline SDValue getAL(SelectionDAG *CurDAG) {
return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
}
SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM == ISD::UNINDEXED)
return NULL;
EVT LoadedVT = LD->getMemoryVT();
SDValue Offset, AMOpc;
bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
unsigned Opcode = 0;
bool Match = false;
if (LoadedVT == MVT::i32 && isPre &&
SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
Opcode = ARM::LDR_PRE_IMM;
Match = true;
} else if (LoadedVT == MVT::i32 && !isPre &&
SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
Opcode = ARM::LDR_POST_IMM;
Match = true;
} else if (LoadedVT == MVT::i32 &&
SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
Match = true;
} else if (LoadedVT == MVT::i16 &&
SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
Match = true;
Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
: (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
} else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
if (LD->getExtensionType() == ISD::SEXTLOAD) {
if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
Match = true;
Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
}
} else {
if (isPre &&
SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
Match = true;
Opcode = ARM::LDRB_PRE_IMM;
} else if (!isPre &&
SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
Match = true;
Opcode = ARM::LDRB_POST_IMM;
} else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
Match = true;
Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
}
}
}
if (Match) {
if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
MVT::i32, MVT::Other, Ops);
} else {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
MVT::i32, MVT::Other, Ops);
}
}
return NULL;
}
SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM == ISD::UNINDEXED)
return NULL;
EVT LoadedVT = LD->getMemoryVT();
bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
SDValue Offset;
bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
unsigned Opcode = 0;
bool Match = false;
if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
switch (LoadedVT.getSimpleVT().SimpleTy) {
case MVT::i32:
Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
break;
case MVT::i16:
if (isSExtLd)
Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
else
Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
break;
case MVT::i8:
case MVT::i1:
if (isSExtLd)
Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
else
Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
break;
default:
return NULL;
}
Match = true;
}
if (Match) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
MVT::Other, Ops);
}
return NULL;
}
/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form a D register from a pair of S registers.
SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form a quad register from a pair of D registers.
SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive D registers from a pair of Q registers.
SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive S registers.
SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive D registers.
SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive Q registers.
SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
/// of a NEON VLD or VST instruction. The supported values depend on the
/// number of registers being loaded.
SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
bool is64BitVector) {
unsigned NumRegs = NumVecs;
if (!is64BitVector && NumVecs < 3)
NumRegs *= 2;
unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
if (Alignment >= 32 && NumRegs == 4)
Alignment = 32;
else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
Alignment = 16;
else if (Alignment >= 8)
Alignment = 8;
else
Alignment = 0;
return CurDAG->getTargetConstant(Alignment, MVT::i32);
}
+static bool isVLDfixed(unsigned Opc)
+{
+ switch (Opc) {
+ default: return false;
+ case ARM::VLD1d8wb_fixed : return true;
+ case ARM::VLD1d16wb_fixed : return true;
+ case ARM::VLD1d64Qwb_fixed : return true;
+ case ARM::VLD1d32wb_fixed : return true;
+ case ARM::VLD1d64wb_fixed : return true;
+ case ARM::VLD1d64TPseudoWB_fixed : return true;
+ case ARM::VLD1d64QPseudoWB_fixed : return true;
+ case ARM::VLD1q8wb_fixed : return true;
+ case ARM::VLD1q16wb_fixed : return true;
+ case ARM::VLD1q32wb_fixed : return true;
+ case ARM::VLD1q64wb_fixed : return true;
+ case ARM::VLD2d8wb_fixed : return true;
+ case ARM::VLD2d16wb_fixed : return true;
+ case ARM::VLD2d32wb_fixed : return true;
+ case ARM::VLD2q8PseudoWB_fixed : return true;
+ case ARM::VLD2q16PseudoWB_fixed : return true;
+ case ARM::VLD2q32PseudoWB_fixed : return true;
+ case ARM::VLD2DUPd8wb_fixed : return true;
+ case ARM::VLD2DUPd16wb_fixed : return true;
+ case ARM::VLD2DUPd32wb_fixed : return true;
+ }
+}
+
+static bool isVSTfixed(unsigned Opc)
+{
+ switch (Opc) {
+ default: return false;
+ case ARM::VST1d8wb_fixed : return true;
+ case ARM::VST1d16wb_fixed : return true;
+ case ARM::VST1d32wb_fixed : return true;
+ case ARM::VST1d64wb_fixed : return true;
+ case ARM::VST1q8wb_fixed : return true;
+ case ARM::VST1q16wb_fixed : return true;
+ case ARM::VST1q32wb_fixed : return true;
+ case ARM::VST1q64wb_fixed : return true;
+ case ARM::VST1d64TPseudoWB_fixed : return true;
+ case ARM::VST1d64QPseudoWB_fixed : return true;
+ case ARM::VST2d8wb_fixed : return true;
+ case ARM::VST2d16wb_fixed : return true;
+ case ARM::VST2d32wb_fixed : return true;
+ case ARM::VST2q8PseudoWB_fixed : return true;
+ case ARM::VST2q16PseudoWB_fixed : return true;
+ case ARM::VST2q32PseudoWB_fixed : return true;
+ }
+}
+
// Get the register stride update opcode of a VLD/VST instruction that
// is otherwise equivalent to the given fixed stride updating instruction.
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+ assert((isVLDfixed(Opc) || isVSTfixed(Opc))
+ && "Incorrect fixed stride updating instruction.");
switch (Opc) {
default: break;
case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
+ case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
+ case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
+ case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
+ case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
}
return Opc; // If not one we handle, return it unchanged.
}
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
SDLoc dl(N);
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
EVT VT = N->getValueType(0);
bool is64BitVector = VT.is64BitVector();
Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld type");
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
case MVT::v1i64: OpcodeIndex = 3; break;
// Quad-register operations:
case MVT::v16i8: OpcodeIndex = 0; break;
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
case MVT::v2i64: OpcodeIndex = 3;
assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
break;
}
EVT ResTy;
if (NumVecs == 1)
ResTy = VT;
else {
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
if (!is64BitVector)
ResTyElts *= 2;
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
}
std::vector<EVT> ResTys;
ResTys.push_back(ResTy);
if (isUpdating)
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDNode *VLd;
SmallVector<SDValue, 7> Ops;
// Double registers and VLD1/VLD2 quad registers are directly supported.
if (is64BitVector || NumVecs <= 2) {
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
QOpcodes0[OpcodeIndex]);
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
+ if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
- // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
+ // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
+ if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
!isa<ConstantSDNode>(Inc.getNode()))
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
} else {
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
EVT AddrTy = MemAddr.getValueType();
// Load the even subregs. This is always an updating load, so that it
// provides the address to the second load for the odd subregs.
SDValue ImplDef =
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
ResTy, AddrTy, MVT::Other, OpsA);
Chain = SDValue(VLdA, 2);
// Load the odd subregs.
Ops.push_back(SDValue(VLdA, 1));
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
assert(isa<ConstantSDNode>(Inc.getNode()) &&
"only constant post-increment update allowed for VLD3/4");
(void)Inc;
Ops.push_back(Reg0);
}
Ops.push_back(SDValue(VLdA, 0));
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
}
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
if (NumVecs == 1)
return VLd;
// Extract out the subregisters.
SDValue SuperReg = SDValue(VLd, 0);
assert(ARM::dsub_7 == ARM::dsub_0+7 &&
ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
return NULL;
}
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
SDLoc dl(N);
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
SDValue Chain = N->getOperand(0);
EVT VT = N->getOperand(Vec0Idx).getValueType();
bool is64BitVector = VT.is64BitVector();
Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vst type");
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
case MVT::v1i64: OpcodeIndex = 3; break;
// Quad-register operations:
case MVT::v16i8: OpcodeIndex = 0; break;
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
case MVT::v2i64: OpcodeIndex = 3;
assert(NumVecs == 1 && "v2i64 type only supported for VST1");
break;
}
std::vector<EVT> ResTys;
if (isUpdating)
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SmallVector<SDValue, 7> Ops;
// Double registers and VST1/VST2 quad registers are directly supported.
if (is64BitVector || NumVecs <= 2) {
SDValue SrcReg;
if (NumVecs == 1) {
SrcReg = N->getOperand(Vec0Idx);
} else if (is64BitVector) {
// Form a REG_SEQUENCE to force register allocation.
SDValue V0 = N->getOperand(Vec0Idx + 0);
SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2)
SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
else {
SDValue V2 = N->getOperand(Vec0Idx + 2);
// If it's a vst3, form a quad D-register and leave the last part as
// an undef.
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
: N->getOperand(Vec0Idx + 3);
SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
}
} else {
// Form a QQ register.
SDValue Q0 = N->getOperand(Vec0Idx);
SDValue Q1 = N->getOperand(Vec0Idx + 1);
SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
}
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
QOpcodes0[OpcodeIndex]);
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
- // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+ // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
- !isa<ConstantSDNode>(Inc.getNode()))
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ if (!isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(Inc);
+ else if (NumVecs > 2 && !isVSTfixed(Opc))
+ Ops.push_back(Reg0);
}
Ops.push_back(SrcReg);
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
return VSt;
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
// Form the QQQQ REG_SEQUENCE.
SDValue V0 = N->getOperand(Vec0Idx + 0);
SDValue V1 = N->getOperand(Vec0Idx + 1);
SDValue V2 = N->getOperand(Vec0Idx + 2);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(Vec0Idx + 3);
SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers. This is always an updating store, so that it
// provides the address to the second store for the odd subregs.
const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
MemAddr.getValueType(),
MVT::Other, OpsA);
cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
Ops.push_back(SDValue(VStA, 0));
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
assert(isa<ConstantSDNode>(Inc.getNode()) &&
"only constant post-increment update allowed for VST3/4");
(void)Inc;
Ops.push_back(Reg0);
}
Ops.push_back(RegSeq);
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
Ops);
cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
return VStB;
}
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
SDLoc dl(N);
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
SDValue Chain = N->getOperand(0);
unsigned Lane =
cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
EVT VT = N->getOperand(Vec0Idx).getValueType();
bool is64BitVector = VT.is64BitVector();
unsigned Alignment = 0;
if (NumVecs != 3) {
Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
if (Alignment > NumBytes)
Alignment = NumBytes;
if (Alignment < 8 && Alignment < NumBytes)
Alignment = 0;
// Alignment must be a power of two; make sure of that.
Alignment = (Alignment & -Alignment);
if (Alignment == 1)
Alignment = 0;
}
Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld/vst lane type");
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
// Quad-register operations:
case MVT::v8i16: OpcodeIndex = 0; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 1; break;
}
std::vector<EVT> ResTys;
if (IsLoad) {
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
if (!is64BitVector)
ResTyElts *= 2;
ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
MVT::i64, ResTyElts));
}
if (isUpdating)
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SmallVector<SDValue, 8> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
SDValue SuperReg;
SDValue V0 = N->getOperand(Vec0Idx + 0);
SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2) {
if (is64BitVector)
SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
else
SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
} else {
SDValue V2 = N->getOperand(Vec0Idx + 2);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(Vec0Idx + 3);
if (is64BitVector)
SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
else
SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
}
Ops.push_back(SuperReg);
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
QOpcodes[OpcodeIndex]);
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
if (!IsLoad)
return VLdLn;
// Extract the subregisters.
SuperReg = SDValue(VLdLn, 0);
assert(ARM::dsub_7 == ARM::dsub_0+7 &&
ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
return NULL;
}
SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
unsigned NumVecs,
const uint16_t *Opcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
SDLoc dl(N);
SDValue MemAddr, Align;
if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
return NULL;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
SDValue Chain = N->getOperand(0);
EVT VT = N->getValueType(0);
unsigned Alignment = 0;
if (NumVecs != 3) {
Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
if (Alignment > NumBytes)
Alignment = NumBytes;
if (Alignment < 8 && Alignment < NumBytes)
Alignment = 0;
// Alignment must be a power of two; make sure of that.
Alignment = (Alignment & -Alignment);
if (Alignment == 1)
Alignment = 0;
}
Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld-dup type");
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
}
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue SuperReg;
unsigned Opc = Opcodes[OpcodeIndex];
SmallVector<SDValue, 6> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (isUpdating) {
// fixed-stride update instructions don't have an explicit writeback
// operand. It's implicit in the opcode itself.
SDValue Inc = N->getOperand(2);
if (!isa<ConstantSDNode>(Inc.getNode()))
Ops.push_back(Inc);
// FIXME: VLD3 and VLD4 haven't been updated to that form yet.
else if (NumVecs > 2)
Ops.push_back(Reg0);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
std::vector<EVT> ResTys;
ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
if (isUpdating)
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
SuperReg = SDValue(VLdDup, 0);
// Extract the subregisters.
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
unsigned SubIdx = ARM::dsub_0;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
return NULL;
}
SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
unsigned Opc) {
assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
SDLoc dl(N);
EVT VT = N->getValueType(0);
unsigned FirstTblReg = IsExt ? 2 : 1;
// Form a REG_SEQUENCE to force register allocation.
SDValue RegSeq;
SDValue V0 = N->getOperand(FirstTblReg + 0);
SDValue V1 = N->getOperand(FirstTblReg + 1);
if (NumVecs == 2)
RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
else {
SDValue V2 = N->getOperand(FirstTblReg + 2);
// If it's a vtbl3, form a quad D-register and leave the last part as
// an undef.
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(FirstTblReg + 3);
RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
}
SmallVector<SDValue, 6> Ops;
if (IsExt)
Ops.push_back(N->getOperand(1));
Ops.push_back(RegSeq);
Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
Ops.push_back(getAL(CurDAG)); // predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
bool isSigned) {
if (!Subtarget->hasV6T2Ops())
return NULL;
unsigned Opc = isSigned
? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
: (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
// For unsigned extracts, check for a shift right and mask
unsigned And_imm = 0;
if (N->getOpcode() == ISD::AND) {
if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
// The immediate is a mask of the low bits iff imm & (imm+1) == 0
if (And_imm & (And_imm + 1))
return NULL;
unsigned Srl_imm = 0;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
Srl_imm)) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
// Note: The width operand is encoded as width-1.
unsigned Width = CountTrailingOnes_32(And_imm) - 1;
unsigned LSB = Srl_imm;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
// It's cheaper to use a right shift to extract the top bits.
if (Subtarget->isThumb()) {
Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
getAL(CurDAG), Reg0, Reg0 };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
// ARM models shift instructions as MOVsi with shifter operand.
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
SDValue ShOpc =
CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB),
MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
getAL(CurDAG), Reg0, Reg0 };
return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5);
}
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
CurDAG->getTargetConstant(Width, MVT::i32),
getAL(CurDAG), Reg0 };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
}
return NULL;
}
// Otherwise, we're looking for a shift of a shift
unsigned Shl_imm = 0;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
unsigned Srl_imm = 0;
if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
// Note: The width operand is encoded as width-1.
unsigned Width = 32 - Srl_imm - 1;
int LSB = Srl_imm - Shl_imm;
if (LSB < 0)
return NULL;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
CurDAG->getTargetConstant(Width, MVT::i32),
getAL(CurDAG), Reg0 };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
}
return NULL;
}
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
/// select_cc setgt X, -1, X, -X
/// select_cc setl[te] X, 0, -X, X
/// select_cc setlt X, 1, -X, X
/// which represent Integer ABS into:
/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
/// ARM instruction selection detects the latter and matches it to
/// ARM::ABS or ARM::t2ABS machine node.
SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
SDValue XORSrc0 = N->getOperand(0);
SDValue XORSrc1 = N->getOperand(1);
EVT VT = N->getValueType(0);
if (Subtarget->isThumb1Only())
return NULL;
if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
return NULL;
SDValue ADDSrc0 = XORSrc0.getOperand(0);
SDValue ADDSrc1 = XORSrc0.getOperand(1);
SDValue SRASrc0 = XORSrc1.getOperand(0);
SDValue SRASrc1 = XORSrc1.getOperand(1);
ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
EVT XType = SRASrc0.getValueType();
unsigned Size = XType.getSizeInBits() - 1;
if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
XType.isInteger() && SRAConstant != NULL &&
Size == SRAConstant->getZExtValue()) {
unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
}
return NULL;
}
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
// The only time a CONCAT_VECTORS operation can have legal types is when
// two 64-bit vectors are concatenated to a 128-bit vector.
EVT VT = N->getValueType(0);
if (!VT.is128BitVector() || N->getNumOperands() != 2)
llvm_unreachable("unexpected CONCAT_VECTORS");
return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
unsigned Op16,unsigned Op32,
unsigned Op64) {
// Mostly direct translation to the given operations, except that we preserve
// the AtomicOrdering for use later on.
AtomicSDNode *AN = cast<AtomicSDNode>(Node);
EVT VT = AN->getMemoryVT();
unsigned Op;
SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other);
if (VT == MVT::i8)
Op = Op8;
else if (VT == MVT::i16)
Op = Op16;
else if (VT == MVT::i32)
Op = Op32;
else if (VT == MVT::i64) {
Op = Op64;
VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other);
} else
llvm_unreachable("Unexpected atomic operation");
SmallVector<SDValue, 6> Ops;
for (unsigned i = 1; i < AN->getNumOperands(); ++i)
Ops.push_back(AN->getOperand(i));
Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
Ops.push_back(AN->getOperand(0)); // Chain moves to the end
return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size());
}
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
N->setNodeId(-1);
return NULL; // Already selected.
}
switch (N->getOpcode()) {
default: break;
case ISD::INLINEASM: {
SDNode *ResNode = SelectInlineAsm(N);
if (ResNode)
return ResNode;
break;
}
case ISD::XOR: {
// Select special operations if XOR node forms integer ABS pattern
SDNode *ResNode = SelectABSOp(N);
if (ResNode)
return ResNode;
// Other cases are autogenerated.
break;
}
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
bool UseCP = true;
if (Subtarget->hasThumb2())
// Thumb2-aware targets have the MOVT instruction, so all immediates can
// be done with MOV + MOVT, at worst.
UseCP = 0;
else {
if (Subtarget->isThumb()) {
UseCP = (Val > 255 && // MOV
~Val > 255 && // MOV + MVN
!ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL
} else
UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
ARM_AM::getSOImmVal(~Val) == -1 && // MVN
!ARM_AM::isSOImmTwoPartVal(Val)); // two instrs.
}
if (UseCP) {
SDValue CPIdx =
CurDAG->getTargetConstantPool(ConstantInt::get(
Type::getInt32Ty(*CurDAG->getContext()), Val),
getTargetLowering()->getPointerTy());
SDNode *ResNode;
if (Subtarget->isThumb1Only()) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
Ops);
} else {
SDValue Ops[] = {
CPIdx,
CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getEntryNode()
};
ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
Ops);
}
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
return NULL;
}
// Other cases are autogenerated.
break;
}
case ISD::FrameIndex: {
// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
if (Subtarget->isThumb1Only()) {
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4);
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
}
case ISD::SRL:
if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
return I;
break;
case ISD::SRA:
if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
return I;
break;
case ISD::MUL:
if (Subtarget->isThumb1Only())
break;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
unsigned RHSV = C->getZExtValue();
if (!RHSV) break;
if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
unsigned ShImm = Log2_32(RHSV-1);
if (ShImm >= 32)
break;
SDValue V = N->getOperand(0);
ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7);
}
}
if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
unsigned ShImm = Log2_32(RHSV+1);
if (ShImm >= 32)
break;
SDValue V = N->getOperand(0);
ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7);
}
}
}
break;
case ISD::AND: {
// Check for unsigned bitfield extract
if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
return I;
// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
// are entirely contributed by c2 and lower 16-bits are entirely contributed
// by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
// Select it to: "movt x, ((c1 & 0xffff) >> 16)
EVT VT = N->getValueType(0);
if (VT != MVT::i32)
break;
unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
? ARM::t2MOVTi16
: (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
if (!Opc)
break;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (!N1C)
break;
if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
SDValue N2 = N0.getOperand(1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
if (!N2C)
break;
unsigned N1CVal = N1C->getZExtValue();
unsigned N2CVal = N2C->getZExtValue();
if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
(N1CVal & 0xffffU) == 0xffffU &&
(N2CVal & 0xffffU) == 0x0U) {
SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
MVT::i32);
SDValue Ops[] = { N0.getOperand(0), Imm16,
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
}
break;
}
case ARMISD::VMOVRRD:
return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
N->getOperand(0), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32));
case ISD::UMUL_LOHI: {
if (Subtarget->isThumb1Only())
break;
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::UMULL : ARM::UMULLv5,
dl, MVT::i32, MVT::i32, Ops);
}
}
case ISD::SMUL_LOHI: {
if (Subtarget->isThumb1Only())
break;
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::SMULL : ARM::SMULLv5,
dl, MVT::i32, MVT::i32, Ops);
}
}
case ARMISD::UMLAL:{
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32)};
return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::UMLAL : ARM::UMLALv5,
dl, MVT::i32, MVT::i32, Ops);
}
}
case ARMISD::SMLAL:{
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32)};
return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::SMLAL : ARM::SMLALv5,
dl, MVT::i32, MVT::i32, Ops);
}
}
case ISD::LOAD: {
SDNode *ResNode = 0;
if (Subtarget->isThumb() && Subtarget->hasThumb2())
ResNode = SelectT2IndexedLoad(N);
else
ResNode = SelectARMIndexedLoad(N);
if (ResNode)
return ResNode;
// Other cases are autogenerated.
break;
}
case ARMISD::BRCOND: {
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
// Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
// Pattern complexity = 6 cost = 1 size = 0
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
// Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
// Pattern complexity = 6 cost = 1 size = 0
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
// Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
// Pattern complexity = 6 cost = 1 size = 0
unsigned Opc = Subtarget->isThumb() ?
((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
SDValue N3 = N->getOperand(3);
SDValue InFlag = N->getOperand(4);
assert(N1.getOpcode() == ISD::BasicBlock);
assert(N2.getOpcode() == ISD::Constant);
assert(N3.getOpcode() == ISD::Register);
SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
cast<ConstantSDNode>(N2)->getZExtValue()),
MVT::i32);
SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
MVT::Glue, Ops);
Chain = SDValue(ResNode, 0);
if (N->getNumValues() == 2) {
InFlag = SDValue(ResNode, 1);
ReplaceUses(SDValue(N, 1), InFlag);
}
ReplaceUses(SDValue(N, 0),
SDValue(Chain.getNode(), Chain.getResNo()));
return NULL;
}
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::v8i8: Opc = ARM::VZIPd8; break;
case MVT::v4i16: Opc = ARM::VZIPd16; break;
case MVT::v2f32:
// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VZIPq8; break;
case MVT::v8i16: Opc = ARM::VZIPq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VZIPq32; break;
}
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::VUZP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::v8i8: Opc = ARM::VUZPd8; break;
case MVT::v4i16: Opc = ARM::VUZPd16; break;
case MVT::v2f32:
// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VUZPq8; break;
case MVT::v8i16: Opc = ARM::VUZPq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VUZPq32; break;
}
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::VTRN: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::v8i8: Opc = ARM::VTRNd8; break;
case MVT::v4i16: Opc = ARM::VTRNd16; break;
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VTRNq8; break;
case MVT::v8i16: Opc = ARM::VTRNq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VTRNq32; break;
}
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::BUILD_VECTOR: {
EVT VecVT = N->getValueType(0);
EVT EltVT = VecVT.getVectorElementType();
unsigned NumElts = VecVT.getVectorNumElements();
if (EltVT == MVT::f64) {
assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
}
assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
if (NumElts == 2)
return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3));
}
case ARMISD::VLD2DUP: {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
ARM::VLD2DUPd32 };
return SelectVLDDup(N, false, 2, Opcodes);
}
case ARMISD::VLD3DUP: {
static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
ARM::VLD3DUPd16Pseudo,
ARM::VLD3DUPd32Pseudo };
return SelectVLDDup(N, false, 3, Opcodes);
}
case ARMISD::VLD4DUP: {
static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
ARM::VLD4DUPd16Pseudo,
ARM::VLD4DUPd32Pseudo };
return SelectVLDDup(N, false, 4, Opcodes);
}
case ARMISD::VLD2DUP_UPD: {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
ARM::VLD2DUPd16wb_fixed,
ARM::VLD2DUPd32wb_fixed };
return SelectVLDDup(N, true, 2, Opcodes);
}
case ARMISD::VLD3DUP_UPD: {
static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
ARM::VLD3DUPd16Pseudo_UPD,
ARM::VLD3DUPd32Pseudo_UPD };
return SelectVLDDup(N, true, 3, Opcodes);
}
case ARMISD::VLD4DUP_UPD: {
static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
ARM::VLD4DUPd16Pseudo_UPD,
ARM::VLD4DUPd32Pseudo_UPD };
return SelectVLDDup(N, true, 4, Opcodes);
}
case ARMISD::VLD1_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
ARM::VLD1d16wb_fixed,
ARM::VLD1d32wb_fixed,
ARM::VLD1d64wb_fixed };
static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
ARM::VLD1q16wb_fixed,
ARM::VLD1q32wb_fixed,
ARM::VLD1q64wb_fixed };
return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD2_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
ARM::VLD2d16wb_fixed,
ARM::VLD2d32wb_fixed,
ARM::VLD1q64wb_fixed};
static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
ARM::VLD2q16PseudoWB_fixed,
ARM::VLD2q32PseudoWB_fixed };
return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD3_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
ARM::VLD3d16Pseudo_UPD,
ARM::VLD3d32Pseudo_UPD,
- ARM::VLD1q64wb_fixed};
+ ARM::VLD1d64TPseudoWB_fixed};
static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
ARM::VLD3q16Pseudo_UPD,
ARM::VLD3q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
ARM::VLD3q16oddPseudo_UPD,
ARM::VLD3q32oddPseudo_UPD };
return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VLD4_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
ARM::VLD4d16Pseudo_UPD,
ARM::VLD4d32Pseudo_UPD,
- ARM::VLD1q64wb_fixed};
+ ARM::VLD1d64QPseudoWB_fixed};
static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
ARM::VLD4q16Pseudo_UPD,
ARM::VLD4q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
ARM::VLD4q16oddPseudo_UPD,
ARM::VLD4q32oddPseudo_UPD };
return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VLD2LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
ARM::VLD2LNd16Pseudo_UPD,
ARM::VLD2LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
ARM::VLD2LNq32Pseudo_UPD };
return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
}
case ARMISD::VLD3LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
ARM::VLD3LNd16Pseudo_UPD,
ARM::VLD3LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
ARM::VLD3LNq32Pseudo_UPD };
return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
}
case ARMISD::VLD4LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
ARM::VLD4LNd16Pseudo_UPD,
ARM::VLD4LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
ARM::VLD4LNq32Pseudo_UPD };
return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
}
case ARMISD::VST1_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
ARM::VST1d16wb_fixed,
ARM::VST1d32wb_fixed,
ARM::VST1d64wb_fixed };
static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
ARM::VST1q16wb_fixed,
ARM::VST1q32wb_fixed,
ARM::VST1q64wb_fixed };
return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST2_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
ARM::VST2d16wb_fixed,
ARM::VST2d32wb_fixed,
ARM::VST1q64wb_fixed};
static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
ARM::VST2q16PseudoWB_fixed,
ARM::VST2q32PseudoWB_fixed };
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST3_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
ARM::VST3d16Pseudo_UPD,
ARM::VST3d32Pseudo_UPD,
ARM::VST1d64TPseudoWB_fixed};
static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
ARM::VST3q16oddPseudo_UPD,
ARM::VST3q32oddPseudo_UPD };
return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VST4_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
ARM::VST4d16Pseudo_UPD,
ARM::VST4d32Pseudo_UPD,
ARM::VST1d64QPseudoWB_fixed};
static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
ARM::VST4q16oddPseudo_UPD,
ARM::VST4q32oddPseudo_UPD };
return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VST2LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
ARM::VST2LNd16Pseudo_UPD,
ARM::VST2LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
ARM::VST2LNq32Pseudo_UPD };
return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
}
case ARMISD::VST3LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
ARM::VST3LNd16Pseudo_UPD,
ARM::VST3LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
ARM::VST3LNq32Pseudo_UPD };
return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
}
case ARMISD::VST4LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
ARM::VST4LNd16Pseudo_UPD,
ARM::VST4LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
ARM::VST4LNq32Pseudo_UPD };
return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
}
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
default:
break;
case Intrinsic::arm_ldrexd: {
SDValue MemAddr = N->getOperand(2);
SDLoc dl(N);
SDValue Chain = N->getOperand(0);
bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD;
// arm_ldrexd returns a i64 value in {i32, i32}
std::vector<EVT> ResTys;
if (isThumb) {
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::i32);
} else
ResTys.push_back(MVT::Untyped);
ResTys.push_back(MVT::Other);
// Place arguments in the right order.
SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(getAL(CurDAG));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(Chain);
SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
// Remap uses.
SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
if (!SDValue(N, 0).use_empty()) {
SDValue Result;
if (isThumb)
Result = SDValue(Ld, 0);
else {
SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
Result = SDValue(ResNode,0);
}
ReplaceUses(SDValue(N, 0), Result);
}
if (!SDValue(N, 1).use_empty()) {
SDValue Result;
if (isThumb)
Result = SDValue(Ld, 1);
else {
SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
Result = SDValue(ResNode,0);
}
ReplaceUses(SDValue(N, 1), Result);
}
ReplaceUses(SDValue(N, 2), OutChain);
return NULL;
}
case Intrinsic::arm_strexd: {
SDLoc dl(N);
SDValue Chain = N->getOperand(0);
SDValue Val0 = N->getOperand(2);
SDValue Val1 = N->getOperand(3);
SDValue MemAddr = N->getOperand(4);
// Store exclusive double return a i32 value which is the return status
// of the issued store.
EVT ResTys[] = { MVT::i32, MVT::Other };
bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
// Place arguments in the right order.
SmallVector<SDValue, 7> Ops;
if (isThumb) {
Ops.push_back(Val0);
Ops.push_back(Val1);
} else
// arm_strexd uses GPRPair.
Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
Ops.push_back(MemAddr);
Ops.push_back(getAL(CurDAG));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(Chain);
unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
return St;
}
case Intrinsic::arm_neon_vld1: {
static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
ARM::VLD1q32, ARM::VLD1q64};
return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
ARM::VLD2d32, ARM::VLD1q64 };
static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
ARM::VLD2q32Pseudo };
return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
ARM::VLD3d16Pseudo,
ARM::VLD3d32Pseudo,
ARM::VLD1d64TPseudo };
static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
ARM::VLD3q16Pseudo_UPD,
ARM::VLD3q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
ARM::VLD3q16oddPseudo,
ARM::VLD3q32oddPseudo };
return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
ARM::VLD4d16Pseudo,
ARM::VLD4d32Pseudo,
ARM::VLD1d64QPseudo };
static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
ARM::VLD4q16Pseudo_UPD,
ARM::VLD4q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
ARM::VLD4q16oddPseudo,
ARM::VLD4q32oddPseudo };
return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld2lane: {
static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
ARM::VLD2LNd16Pseudo,
ARM::VLD2LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
ARM::VLD2LNq32Pseudo };
return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld3lane: {
static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
ARM::VLD3LNd16Pseudo,
ARM::VLD3LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
ARM::VLD3LNq32Pseudo };
return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld4lane: {
static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
ARM::VLD4LNd16Pseudo,
ARM::VLD4LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
ARM::VLD4LNq32Pseudo };
return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst1: {
static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
ARM::VST1q32, ARM::VST1q64 };
return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
ARM::VST2d32, ARM::VST1q64 };
static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
ARM::VST2q32Pseudo };
return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
ARM::VST3d16Pseudo,
ARM::VST3d32Pseudo,
ARM::VST1d64TPseudo };
static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
ARM::VST3q16oddPseudo,
ARM::VST3q32oddPseudo };
return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
ARM::VST4d16Pseudo,
ARM::VST4d32Pseudo,
ARM::VST1d64QPseudo };
static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
ARM::VST4q16oddPseudo,
ARM::VST4q32oddPseudo };
return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst2lane: {
static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
ARM::VST2LNd16Pseudo,
ARM::VST2LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
ARM::VST2LNq32Pseudo };
return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst3lane: {
static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
ARM::VST3LNd16Pseudo,
ARM::VST3LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
ARM::VST3LNq32Pseudo };
return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst4lane: {
static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
ARM::VST4LNd16Pseudo,
ARM::VST4LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
ARM::VST4LNq32Pseudo };
return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
}
}
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IntNo) {
default:
break;
case Intrinsic::arm_neon_vtbl2:
return SelectVTBL(N, false, 2, ARM::VTBL2);
case Intrinsic::arm_neon_vtbl3:
return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
case Intrinsic::arm_neon_vtbl4:
return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
case Intrinsic::arm_neon_vtbx2:
return SelectVTBL(N, true, 2, ARM::VTBX2);
case Intrinsic::arm_neon_vtbx3:
return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
case Intrinsic::arm_neon_vtbx4:
return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
}
break;
}
case ARMISD::VTBL1: {
SDLoc dl(N);
EVT VT = N->getValueType(0);
SmallVector<SDValue, 6> Ops;
Ops.push_back(N->getOperand(0));
Ops.push_back(N->getOperand(1));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
}
case ARMISD::VTBL2: {
SDLoc dl(N);
EVT VT = N->getValueType(0);
// Form a REG_SEQUENCE to force register allocation.
SDValue V0 = N->getOperand(0);
SDValue V1 = N->getOperand(1);
SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
SmallVector<SDValue, 6> Ops;
Ops.push_back(RegSeq);
Ops.push_back(N->getOperand(2));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
}
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
case ISD::ATOMIC_LOAD:
if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64);
else
break;
case ISD::ATOMIC_STORE:
if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_STORE_I64);
else
break;
case ISD::ATOMIC_LOAD_ADD:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_ADD_I8,
ARM::ATOMIC_LOAD_ADD_I16,
ARM::ATOMIC_LOAD_ADD_I32,
ARM::ATOMIC_LOAD_ADD_I64);
case ISD::ATOMIC_LOAD_SUB:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_SUB_I8,
ARM::ATOMIC_LOAD_SUB_I16,
ARM::ATOMIC_LOAD_SUB_I32,
ARM::ATOMIC_LOAD_SUB_I64);
case ISD::ATOMIC_LOAD_AND:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_AND_I8,
ARM::ATOMIC_LOAD_AND_I16,
ARM::ATOMIC_LOAD_AND_I32,
ARM::ATOMIC_LOAD_AND_I64);
case ISD::ATOMIC_LOAD_OR:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_OR_I8,
ARM::ATOMIC_LOAD_OR_I16,
ARM::ATOMIC_LOAD_OR_I32,
ARM::ATOMIC_LOAD_OR_I64);
case ISD::ATOMIC_LOAD_XOR:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_XOR_I8,
ARM::ATOMIC_LOAD_XOR_I16,
ARM::ATOMIC_LOAD_XOR_I32,
ARM::ATOMIC_LOAD_XOR_I64);
case ISD::ATOMIC_LOAD_NAND:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_NAND_I8,
ARM::ATOMIC_LOAD_NAND_I16,
ARM::ATOMIC_LOAD_NAND_I32,
ARM::ATOMIC_LOAD_NAND_I64);
case ISD::ATOMIC_LOAD_MIN:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_MIN_I8,
ARM::ATOMIC_LOAD_MIN_I16,
ARM::ATOMIC_LOAD_MIN_I32,
ARM::ATOMIC_LOAD_MIN_I64);
case ISD::ATOMIC_LOAD_MAX:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_MAX_I8,
ARM::ATOMIC_LOAD_MAX_I16,
ARM::ATOMIC_LOAD_MAX_I32,
ARM::ATOMIC_LOAD_MAX_I64);
case ISD::ATOMIC_LOAD_UMIN:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_UMIN_I8,
ARM::ATOMIC_LOAD_UMIN_I16,
ARM::ATOMIC_LOAD_UMIN_I32,
ARM::ATOMIC_LOAD_UMIN_I64);
case ISD::ATOMIC_LOAD_UMAX:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_UMAX_I8,
ARM::ATOMIC_LOAD_UMAX_I16,
ARM::ATOMIC_LOAD_UMAX_I32,
ARM::ATOMIC_LOAD_UMAX_I64);
case ISD::ATOMIC_SWAP:
return SelectAtomic(N,
ARM::ATOMIC_SWAP_I8,
ARM::ATOMIC_SWAP_I16,
ARM::ATOMIC_SWAP_I32,
ARM::ATOMIC_SWAP_I64);
case ISD::ATOMIC_CMP_SWAP:
return SelectAtomic(N,
ARM::ATOMIC_CMP_SWAP_I8,
ARM::ATOMIC_CMP_SWAP_I16,
ARM::ATOMIC_CMP_SWAP_I32,
ARM::ATOMIC_CMP_SWAP_I64);
}
return SelectCode(N);
}
SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
std::vector<SDValue> AsmNodeOperands;
unsigned Flag, Kind;
bool Changed = false;
unsigned NumOps = N->getNumOperands();
// Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
// However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
// (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
// respectively. Since there is no constraint to explicitly specify a
// reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
// the 64-bit data may be referred by H, Q, R modifiers, so we still pack
// them into a GPRPair.
SDLoc dl(N);
SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0);
SmallVector<bool, 8> OpChanged;
// Glue node will be appended late.
for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
SDValue op = N->getOperand(i);
AsmNodeOperands.push_back(op);
if (i < InlineAsm::Op_FirstOperand)
continue;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
Flag = C->getZExtValue();
Kind = InlineAsm::getKind(Flag);
}
else
continue;
// Immediate operands to inline asm in the SelectionDAG are modeled with
// two operands. The first is a constant of value InlineAsm::Kind_Imm, and
// the second is a constant with the value of the immediate. If we get here
// and we have a Kind_Imm, skip the next operand, and continue.
if (Kind == InlineAsm::Kind_Imm) {
SDValue op = N->getOperand(++i);
AsmNodeOperands.push_back(op);
continue;
}
unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
if (NumRegs)
OpChanged.push_back(false);
unsigned DefIdx = 0;
bool IsTiedToChangedOp = false;
// If it's a use that is tied with a previous def, it has no
// reg class constraint.
if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
IsTiedToChangedOp = OpChanged[DefIdx];
if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
&& Kind != InlineAsm::Kind_RegDefEarlyClobber)
continue;
unsigned RC;
bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
|| NumRegs != 2)
continue;
assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
SDValue V0 = N->getOperand(i+1);
SDValue V1 = N->getOperand(i+2);
unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
SDValue PairedReg;
MachineRegisterInfo &MRI = MF->getRegInfo();
if (Kind == InlineAsm::Kind_RegDef ||
Kind == InlineAsm::Kind_RegDefEarlyClobber) {
// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
// the original GPRs.
unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
SDValue Chain = SDValue(N,0);
SDNode *GU = N->getGluedUser();
SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
Chain.getValue(1));
// Extract values from a GPRPair reg and copy to the original GPR reg.
SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
RegCopy);
SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
RegCopy);
SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
RegCopy.getValue(1));
SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
// Update the original glue user.
std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
Ops.push_back(T1.getValue(1));
CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size());
GU = T1.getNode();
}
else {
// For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
// GPRPair and then pass the GPRPair to the inline asm.
SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
// As REG_SEQ doesn't take RegisterSDNode, we copy them first.
SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
Chain.getValue(1));
SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
T0.getValue(1));
SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
// Copy REG_SEQ into a GPRPair-typed VR and replace the original two
// i32 VRs of inline asm with it.
unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
Glue = Chain.getValue(1);
}
Changed = true;
if(PairedReg.getNode()) {
OpChanged[OpChanged.size() -1 ] = true;
Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
if (IsTiedToChangedOp)
Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
else
Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
// Replace the current flag.
AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
Flag, MVT::i32);
// Add the new register node and skip the original two GPRs.
AsmNodeOperands.push_back(PairedReg);
// Skip the next two GPRs.
i += 2;
}
}
if (Glue.getNode())
AsmNodeOperands.push_back(Glue);
if (!Changed)
return NULL;
SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0],
AsmNodeOperands.size());
New->setNodeId(-1);
return New.getNode();
}
bool ARMDAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
std::vector<SDValue> &OutOps) {
assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
// Require the address to be in a register. That is safe for all ARM
// variants and it is hard to do anything much smarter without knowing
// how the operand is used.
OutOps.push_back(Op);
return false;
}
/// createARMISelDag - This pass converts a legalized DAG into a
/// ARM-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new ARMDAGToDAGISel(TM, OptLevel);
}
Index: head/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
===================================================================
--- head/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td (revision 265924)
+++ head/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td (revision 265925)
@@ -1,7106 +1,7110 @@
//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the ARM NEON instruction set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// NEON-specific Operands.
//===----------------------------------------------------------------------===//
def nModImm : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
}
def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
def nImmSplatI8 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmSplatI8AsmOperand;
}
def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
def nImmSplatI16 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmSplatI16AsmOperand;
}
def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
def nImmSplatI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmSplatI32AsmOperand;
}
def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
def nImmVMOVI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmVMOVI32AsmOperand;
}
def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
def nImmVMOVI32Neg : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmVMOVI32NegAsmOperand;
}
def nImmVMOVF32 : Operand<i32> {
let PrintMethod = "printFPImmOperand";
let ParserMatchClass = FPImmOperand;
}
def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
def nImmSplatI64 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmSplatI64AsmOperand;
}
def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
return ((uint64_t)Imm) < 8;
}]> {
let ParserMatchClass = VectorIndex8Operand;
let PrintMethod = "printVectorIndex";
let MIOperandInfo = (ops i32imm);
}
def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
return ((uint64_t)Imm) < 4;
}]> {
let ParserMatchClass = VectorIndex16Operand;
let PrintMethod = "printVectorIndex";
let MIOperandInfo = (ops i32imm);
}
def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
return ((uint64_t)Imm) < 2;
}]> {
let ParserMatchClass = VectorIndex32Operand;
let PrintMethod = "printVectorIndex";
let MIOperandInfo = (ops i32imm);
}
// Register list of one D register.
def VecListOneDAsmOperand : AsmOperandClass {
let Name = "VecListOneD";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
let ParserMatchClass = VecListOneDAsmOperand;
}
// Register list of two sequential D registers.
def VecListDPairAsmOperand : AsmOperandClass {
let Name = "VecListDPair";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
let ParserMatchClass = VecListDPairAsmOperand;
}
// Register list of three sequential D registers.
def VecListThreeDAsmOperand : AsmOperandClass {
let Name = "VecListThreeD";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
let ParserMatchClass = VecListThreeDAsmOperand;
}
// Register list of four sequential D registers.
def VecListFourDAsmOperand : AsmOperandClass {
let Name = "VecListFourD";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
let ParserMatchClass = VecListFourDAsmOperand;
}
// Register list of two D registers spaced by 2 (two sequential Q registers).
def VecListDPairSpacedAsmOperand : AsmOperandClass {
let Name = "VecListDPairSpaced";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
let ParserMatchClass = VecListDPairSpacedAsmOperand;
}
// Register list of three D registers spaced by 2 (three Q registers).
def VecListThreeQAsmOperand : AsmOperandClass {
let Name = "VecListThreeQ";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
let ParserMatchClass = VecListThreeQAsmOperand;
}
// Register list of three D registers spaced by 2 (three Q registers).
def VecListFourQAsmOperand : AsmOperandClass {
let Name = "VecListFourQ";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
let ParserMatchClass = VecListFourQAsmOperand;
}
// Register list of one D register, with "all lanes" subscripting.
def VecListOneDAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListOneDAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
let ParserMatchClass = VecListOneDAllLanesAsmOperand;
}
// Register list of two D registers, with "all lanes" subscripting.
def VecListDPairAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListDPairAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListDPairAllLanes : RegisterOperand<DPair,
"printVectorListTwoAllLanes"> {
let ParserMatchClass = VecListDPairAllLanesAsmOperand;
}
// Register list of two D registers spaced by 2 (two sequential Q registers).
def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListDPairSpacedAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
"printVectorListTwoSpacedAllLanes"> {
let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
}
// Register list of three D registers, with "all lanes" subscripting.
def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListThreeDAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListThreeDAllLanes : RegisterOperand<DPR,
"printVectorListThreeAllLanes"> {
let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
}
// Register list of three D registers spaced by 2 (three sequential Q regs).
def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListThreeQAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListThreeQAllLanes : RegisterOperand<DPR,
"printVectorListThreeSpacedAllLanes"> {
let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
}
// Register list of four D registers, with "all lanes" subscripting.
def VecListFourDAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListFourDAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
let ParserMatchClass = VecListFourDAllLanesAsmOperand;
}
// Register list of four D registers spaced by 2 (four sequential Q regs).
def VecListFourQAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListFourQAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListFourQAllLanes : RegisterOperand<DPR,
"printVectorListFourSpacedAllLanes"> {
let ParserMatchClass = VecListFourQAllLanesAsmOperand;
}
// Register list of one D register, with byte lane subscripting.
def VecListOneDByteIndexAsmOperand : AsmOperandClass {
let Name = "VecListOneDByteIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListOneDByteIndexed : Operand<i32> {
let ParserMatchClass = VecListOneDByteIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with half-word lane subscripting.
def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListOneDHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListOneDHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListOneDWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListOneDWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListOneDWordIndexed : Operand<i32> {
let ParserMatchClass = VecListOneDWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of two D registers with byte lane subscripting.
def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
let Name = "VecListTwoDByteIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListTwoDByteIndexed : Operand<i32> {
let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with half-word lane subscripting.
def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListTwoDHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListTwoDHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListTwoDWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListTwoDWordIndexed : Operand<i32> {
let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of two Q registers with half-word lane subscripting.
def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListTwoQHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListTwoQHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListTwoQWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListTwoQWordIndexed : Operand<i32> {
let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of three D registers with byte lane subscripting.
def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
let Name = "VecListThreeDByteIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListThreeDByteIndexed : Operand<i32> {
let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with half-word lane subscripting.
def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListThreeDHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListThreeDHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListThreeDWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListThreeDWordIndexed : Operand<i32> {
let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of three Q registers with half-word lane subscripting.
def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListThreeQHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListThreeQHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListThreeQWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListThreeQWordIndexed : Operand<i32> {
let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of four D registers with byte lane subscripting.
def VecListFourDByteIndexAsmOperand : AsmOperandClass {
let Name = "VecListFourDByteIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListFourDByteIndexed : Operand<i32> {
let ParserMatchClass = VecListFourDByteIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with half-word lane subscripting.
def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListFourDHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListFourDHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListFourDWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListFourDWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListFourDWordIndexed : Operand<i32> {
let ParserMatchClass = VecListFourDWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// Register list of four Q registers with half-word lane subscripting.
def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListFourQHWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListFourQHWordIndexed : Operand<i32> {
let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
// ...with word lane subscripting.
def VecListFourQWordIndexAsmOperand : AsmOperandClass {
let Name = "VecListFourQWordIndexed";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListIndexedOperands";
}
def VecListFourQWordIndexed : Operand<i32> {
let ParserMatchClass = VecListFourQWordIndexAsmOperand;
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 8;
}]>;
def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAlignment() >= 8;
}]>;
def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() == 4;
}]>;
def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAlignment() == 4;
}]>;
def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() == 2;
}]>;
def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAlignment() == 2;
}]>;
def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() == 1;
}]>;
def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAlignment() == 1;
}]>;
def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() < 4;
}]>;
def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAlignment() < 4;
}]>;
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
// Types for vector shift by immediates. The "SHX" version is for long and
// narrow operations where the source and destination vectors have different
// types. The "SHINS" version is for shift and insert operations.
def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, i32>]>;
def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
SDTCisVT<2, i32>]>;
def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
def NEONvbsl : SDNode<"ARMISD::VBSL",
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>>;
def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
// VDUPLANE can produce a quad-register result from a double-register source,
// so the result is not constrained to match the source.
def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisVT<2, i32>]>>;
def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>;
def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisSameAs<1, 2>]>;
def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 32 && EltVal == 0);
}]>;
def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 8 && EltVal == 0xff);
}]>;
//===----------------------------------------------------------------------===//
// NEON load / store instructions
//===----------------------------------------------------------------------===//
// Use VLDM to load a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
def VLDMQIA
: PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
IIC_fpLoad_m, "",
[(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
// Use VSTM to store a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
def VSTMQIA
: PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
IIC_fpStore_m, "",
[(store (v2f64 DPair:$src), GPR:$Rn)]>;
// Classes for VLD* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VLDQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
class VLDQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
class VLDQWBfixedPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr), itin,
"$addr.addr = $wb">;
class VLDQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset), itin,
"$addr.addr = $wb">;
class VLDQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
class VLDQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
class VLDQQWBfixedPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr), itin,
"$addr.addr = $wb">;
class VLDQQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset), itin,
"$addr.addr = $wb">;
class VLDQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
"$src = $dst">;
class VLDQQQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
"$addr.addr = $wb, $src = $dst">;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1,
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
class VLD1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x2,
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
// ...with address register writeback:
multiclass VLD1DWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins addrmode6:$Rn), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
multiclass VLD1QWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins addrmode6:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">;
defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">;
defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
// ...with 3 registers
class VLD1D3<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins addrmode6:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">;
defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;
// ...with 4 registers
class VLD1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins addrmode6:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">;
defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">;
defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">;
defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
InstrItinClass itin>
: NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
(ins addrmode6:$Rn), itin,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
RegisterOperand VdTy, InstrItinClass itin> {
def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
(ins addrmode6:$Rn), itin,
"vld2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), itin,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
}
defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
// ...with double-spaced registers
def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>;
def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>;
def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>;
defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>;
defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>;
defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$Rn), IIC_VLD3,
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
}
def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
}
def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
// ...with double-spaced registers:
def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
// ...alternate versions to be allocated odd register numbers:
def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$Rn), IIC_VLD4,
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
}
def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
}
def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
// ...with double-spaced registers:
def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
// ...alternate versions to be allocated odd register numbers:
def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// Classes for VLD*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VLDQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst),
(ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
itin, "$src = $dst">;
class VLDQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
class VLDQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst),
(ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
itin, "$src = $dst">;
class VLDQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
class VLDQQQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst),
(ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
itin, "$src = $dst">;
class VLDQQQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
// VLD1LN : Vector Load (single element to one lane)
class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag LoadOp>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
(ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
"$src = $Vd",
[(set DPR:$Vd, (vector_insert (Ty DPR:$src),
(i32 (LoadOp addrmode6:$Rn)),
imm:$lane))]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD1LN";
}
class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag LoadOp>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
(ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
"$src = $Vd",
[(set DPR:$Vd, (vector_insert (Ty DPR:$src),
(i32 (LoadOp addrmode6oneL32:$Rn)),
imm:$lane))]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD1LN";
}
class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
(i32 (LoadOp addrmode6:$addr)),
imm:$lane))];
}
def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
let Inst{7-5} = lane{2-0};
}
def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
let Inst{7-6} = lane{1-0};
let Inst{5-4} = Rn{5-4};
}
def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
def : Pat<(vector_insert (v2f32 DPR:$src),
(f32 (load addrmode6:$addr)), imm:$lane),
(VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
def : Pat<(vector_insert (v4f32 QPR:$src),
(f32 (load addrmode6:$addr)), imm:$lane),
(VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$src = $Vd, $Rn.addr = $wb", []> {
let DecoderMethod = "DecodeVLD1LN";
}
def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{4};
}
def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{4};
let Inst{4} = Rn{4};
}
def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
// VLD2LN : Vector Load (single 2-element structure to one lane)
class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
"$src1 = $Vd, $src2 = $dst2", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2LN";
}
def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
// ...with double-spaced registers:
def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
// ...with address register writeback:
class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
"\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2LN";
}
def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD3LN";
}
def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
// ...with double-spaced registers:
def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
// ...with address register writeback:
class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
IIC_VLD3lnu, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
[]> {
let DecoderMethod = "DecodeVLD3LN";
}
def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4LN";
}
def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
// ...with double-spaced registers:
def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
// ...with address register writeback:
class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
IIC_VLD4lnu, "vld4", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
[]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4LN" ;
}
def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// VLD1DUP : Vector Load (single element to all lanes)
class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
(ins addrmode6dup:$Rn),
IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
[(set VecListOneDAllLanes:$Vd,
(Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPd32 addrmode6:$addr)>;
class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD1dup,
"vld1", Dt, "$Vd, $Rn", "",
[(set VecListDPairAllLanes:$Vd,
(Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPq32 addrmode6:$addr)>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListOneDAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListOneDAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
}
multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
}
defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
: NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD2dup,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
// ...with double-spaced registers
def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>;
def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// ...with address register writeback:
multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
(outs VdTy:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
def _register : NLdSt<1, 0b10, 0b1101, op7_4,
(outs VdTy:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
}
defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>;
defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6dup:$Rn), IIC_VLD3dup,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
}
def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
// ...with double-spaced registers (not used for codegen):
def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
// ...with address register writeback:
class VLD3DUPWB<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
}
def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
// VLD4DUP : Vector Load (single 4-element structure to all lanes)
class VLD4DUP<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1111, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6dup:$Rn), IIC_VLD4dup,
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4DupInstruction";
}
def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
// ...with double-spaced registers (not used for codegen):
def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
// ...with address register writeback:
class VLD4DUPWB<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1111, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4DupInstruction";
}
def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// Classes for VST* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VSTQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
class VSTQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
"$addr.addr = $wb">;
class VSTQWBfixedPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, QPR:$src), itin,
"$addr.addr = $wb">;
class VSTQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
"$addr.addr = $wb">;
class VSTQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
class VSTQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
"$addr.addr = $wb">;
class VSTQQWBfixedPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, QQPR:$src), itin,
"$addr.addr = $wb">;
class VSTQQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
"$addr.addr = $wb">;
class VSTQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
class VSTQQQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
"$addr.addr = $wb">;
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
class VST1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
def VST1d8 : VST1D<{0,0,0,?}, "8">;
def VST1d16 : VST1D<{0,1,0,?}, "16">;
def VST1d32 : VST1D<{1,0,0,?}, "32">;
def VST1d64 : VST1D<{1,1,0,?}, "64">;
def VST1q8 : VST1Q<{0,0,?,?}, "8">;
def VST1q16 : VST1Q<{0,1,?,?}, "16">;
def VST1q32 : VST1Q<{1,0,?,?}, "32">;
def VST1q64 : VST1Q<{1,1,?,?}, "64">;
// ...with address register writeback:
multiclass VST1DWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
multiclass VST1QWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">;
defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">;
defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">;
defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">;
defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">;
defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
// ...with 3 registers
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
(ins addrmode6:$Rn, VecListThreeD:$Vd),
IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VST1D3WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
def VST1d8T : VST1D3<{0,0,0,?}, "8">;
def VST1d16T : VST1D3<{0,1,0,?}, "16">;
def VST1d32T : VST1D3<{1,0,0,?}, "32">;
def VST1d64T : VST1D3<{1,1,0,?}, "64">;
defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>;
def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
// ...with 4 registers
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
(ins addrmode6:$Rn, VecListFourD:$Vd),
IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VST1D4WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>;
def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
// VST2 : Vector Store (multiple 2-element structures)
class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
InstrItinClass itin>
: NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
RegisterOperand VdTy> {
def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
}
multiclass VST2QWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
}
defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>;
defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
// ...with double-spaced registers
def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>;
def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>;
def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>;
defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>;
defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>;
defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
}
def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
}
def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
// ...with double-spaced registers:
def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
// ...alternate versions to be allocated odd register numbers:
def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
"", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
}
def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
"vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
}
def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
// ...with double-spaced registers:
def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
// ...alternate versions to be allocated odd register numbers:
def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
// Classes for VST*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VSTQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
itin, "">;
class VSTQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb">;
class VSTQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
itin, "">;
class VSTQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb">;
class VSTQQQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
itin, "">;
class VSTQQQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb">;
// VST1LN : Vector Store (single element from one lane)
class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
[(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
: VSTQLNPseudo<IIC_VST1ln> {
let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr)];
}
def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
NEONvgetlaneu, addrmode6> {
let Inst{7-5} = lane{2-0};
}
def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{4};
}
def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
addrmode6oneL32> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
(VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
(VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
// ...with address register writeback:
class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins AdrMode:$Rn, am6offset:$Rm,
DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb",
[(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
AdrMode:$Rn, am6offset:$Rm))]> {
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
: VSTQLNWBPseudo<IIC_VST1lnu> {
let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr, am6offset:$offset))];
}
def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
NEONvgetlaneu, addrmode6> {
let Inst{7-5} = lane{2-0};
}
def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{4};
}
def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
extractelt, addrmode6oneL32> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
"", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
}
def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
// ...with double-spaced registers:
def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{4};
}
def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
let Inst{4} = Rn{4};
}
def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
"\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
}
def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST3LN";
}
def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
// ...with double-spaced registers:
def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
// ...with address register writeback:
class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
IIC_VST3lnu, "vst3", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let DecoderMethod = "DecodeVST3LN";
}
def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
"", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST4LN";
}
def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
// ...with double-spaced registers:
def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
// ...with address register writeback:
class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
IIC_VST4lnu, "vst4", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST4LN";
}
def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
let Inst{7-5} = lane{2-0};
}
def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
// Use vld1/vst1 for unaligned f64 load / store
def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
(VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
(VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
(VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
(VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
(VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
(VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
// load / store if it's legal.
def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
(VLD1q64 addrmode6:$addr)>;
def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
(VST1q64 addrmode6:$addr, QPR:$value)>;
def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
(VLD1q32 addrmode6:$addr)>;
def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
(VST1q32 addrmode6:$addr, QPR:$value)>;
def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
(VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
(VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
(VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
(VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
//===----------------------------------------------------------------------===//
// NEON pattern fragments
//===----------------------------------------------------------------------===//
// Extract D sub-registers of Q registers.
def DSubReg_i8_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
}]>;
def DSubReg_i16_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
}]>;
def DSubReg_i32_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
}]>;
def DSubReg_f64_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
}]>;
// Extract S sub-registers of Q/D registers.
def SSubReg_f32_reg : SDNodeXForm<imm, [{
assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
}]>;
// Translate lane numbers from Q registers to D subregs.
def SubReg_i8_lane : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
}]>;
def SubReg_i16_lane : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
}]>;
def SubReg_i32_lane : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
}]>;
//===----------------------------------------------------------------------===//
// Instruction Classes
//===----------------------------------------------------------------------===//
// Basic 2-register operations: double- and quad-register.
class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
(ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
[(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
(ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
// Basic 2-register intrinsics, both double- and quad-register.
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
(ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
(ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Same as above, but not predicated.
class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Same as N2VQIntXnp but with Vd as a src register.
class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<op19_18, op17_16, op10_8, op7, op6,
(outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
let Constraints = "$src = $Vd";
}
// Narrow 2-register operations.
class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyD, ValueType TyQ, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
(ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
(ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
// Long 2-register operations (currently only used for VMOVL).
class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
(ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
// Long 2-register intrinsics.
class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
(ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
: N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
(ins DPR:$src1, DPR:$src2), IIC_VPERMD,
OpcodeStr, Dt, "$Vd, $Vm",
"$src1 = $Vd, $src2 = $Vm", []>;
class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
InstrItinClass itin, string OpcodeStr, string Dt>
: N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
(ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
"$src1 = $Vd, $src2 = $Vm", []>;
// Basic 3-register operations: double- and quad-register.
class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
// Same as N3VD but no data type.
class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr,
ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VDSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VQSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
// Basic 3-register intrinsics, both double- and quad-register.
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, Format f, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
ResTy, OpTy, IntOp, Commutable,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (IntOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (IntOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
let TwoOperandAliasConstraint = "$Vm = $Vd";
let isCommutable = 0;
}
class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, Format f, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
// Same as N3VQIntnp but with Vd as a src register.
class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, Format f, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr,
Dt, ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
(OpTy QPR:$Vm))))]> {
let Constraints = "$src = $Vd";
}
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
let TwoOperandAliasConstraint = "$Vm = $Vd";
let isCommutable = 0;
}
// Multiply-Add/Sub operations: double- and quad-register.
class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set DPR:$Vd, (Ty (OpNode DPR:$src1,
(Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
(Ty (MulOp DPR:$Vn,
(Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
imm:$lane)))))))]>;
class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
(Ty (MulOp DPR:$Vn,
(Ty (NEONvduplane (Ty DPR_8:$Vm),
imm:$lane)))))))]>;
class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
SDPatternOperator MulOp, SDPatternOperator OpNode>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set QPR:$Vd, (Ty (OpNode QPR:$src1,
(Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$Vn,
(ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))))]>;
class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy,
SDNode MulOp, SDNode ShOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$Vn,
(ResTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))))]>;
// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set DPR:$Vd, (Ty (OpNode DPR:$src1,
(Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set QPR:$Vd, (Ty (OpNode QPR:$src1,
(Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
(OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
(OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
// Long Multiply-Add/Sub operations.
class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
(TyQ (MulOp (TyD DPR:$Vn),
(TyD DPR:$Vm)))))]>;
class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set QPR:$Vd,
(OpNode (TyQ QPR:$src1),
(TyQ (MulOp (TyD DPR:$Vn),
(TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
imm:$lane))))))]>;
class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set QPR:$Vd,
(OpNode (TyQ QPR:$src1),
(TyQ (MulOp (TyD DPR:$Vn),
(TyD (NEONvduplane (TyD DPR_8:$Vm),
imm:$lane))))))]>;
// Long Intrinsic-Op vector operations with explicit extend (VABAL).
class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
(TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
(TyD DPR:$Vm)))))))]>;
// Neon Long 3-argument intrinsic. The destination register is
// a quad-register and is also used as the first source operand register.
class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set QPR:$Vd,
(TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$src1),
(OpTy DPR:$Vn),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]>;
class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$src1),
(OpTy DPR:$Vn),
(OpTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]>;
// Narrowing 3-register intrinsics.
class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
let isCommutable = Commutable;
}
// Long 3-register operations.
class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set QPR:$Vd,
(TyQ (OpNode (TyD DPR:$Vn),
(TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set QPR:$Vd,
(TyQ (OpNode (TyD DPR:$Vn),
(TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
// Long 3-register operations with explicitly extended operands.
class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
(TyQ (ExtOp (TyD DPR:$Vm)))))]> {
let isCommutable = Commutable;
}
// Long 3-register intrinsics with explicit extend (VABDL).
class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
(TyD DPR:$Vm))))))]> {
let isCommutable = Commutable;
}
// Long 3-register intrinsics.
class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
// Same as above, but not predicated.
class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (OpTy DPR:$Vn),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]>;
class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (OpTy DPR:$Vn),
(OpTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]>;
// Wide 3-register operations.
class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
SDNode OpNode, SDNode ExtOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
(TyQ (ExtOp (TyD DPR:$Vm)))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
// Pairwise long 2-register intrinsics, both double- and quad-register.
class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
(ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
(ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Pairwise long 2-register accumulate intrinsics,
// both double- and quad-register.
// The destination register is also used as the first source operand register.
class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
[(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
[(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
// Shift by immediate,
// both double- and quad-register.
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Format f, InstrItinClass itin, Operand ImmTy,
string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Format f, InstrItinClass itin, Operand ImmTy,
string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
}
// Long shift by immediate.
class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
(outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
(i32 imm:$SIMM))))]>;
// Narrow shift by immediate.
class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
(outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
(i32 imm:$SIMM))))]>;
// Shift right by immediate and accumulate,
// both double- and quad-register.
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Operand ImmTy, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set DPR:$Vd, (Ty (add DPR:$src1,
(Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Operand ImmTy, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set QPR:$Vd, (Ty (add QPR:$src1,
(Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
}
// Shift by immediate and insert,
// both double- and quad-register.
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Operand ImmTy, Format f, string OpcodeStr, string Dt,
ValueType Ty,SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Operand ImmTy, Format f, string OpcodeStr, string Dt,
ValueType Ty,SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
}
// Convert, with fractional bits immediate,
// both double- and quad-register.
class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
//===----------------------------------------------------------------------===//
// Multiclasses
//===----------------------------------------------------------------------===//
// Abbreviations used in multiclass suffixes:
// Q = quarter int (8 bit) elements
// H = half int (16 bit) elements
// S = single int (32 bit) elements
// D = double int (64 bit) elements
// Neon 2-register vector operations and intrinsics.
// Neon 2-register comparisons.
// source operand element sizes of 8, 16 and 32 bits:
multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4, string opc, string Dt,
string asm, SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "8"), asm, "",
[(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "16"), asm, "",
[(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "32"), asm, "",
[(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, "f32", asm, "",
[(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
let Inst{10} = 1; // overwrite F = 1
}
// 128-bit vector types.
def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "8"), asm, "",
[(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "16"), asm, "",
[(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "32"), asm, "",
[(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, "f32", asm, "",
[(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
let Inst{10} = 1; // overwrite F = 1
}
}
// Neon 2-register vector intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
// 128-bit vector types.
def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
}
// Neon Narrowing 2-register vector operations,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
SDNode OpNode> {
def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
itin, OpcodeStr, !strconcat(Dt, "16"),
v8i8, v8i16, OpNode>;
def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
itin, OpcodeStr, !strconcat(Dt, "32"),
v4i16, v4i32, OpNode>;
def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
itin, OpcodeStr, !strconcat(Dt, "64"),
v2i32, v2i64, OpNode>;
}
// Neon Narrowing 2-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
SDPatternOperator IntOp> {
def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
itin, OpcodeStr, !strconcat(Dt, "16"),
v8i8, v8i16, IntOp>;
def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
itin, OpcodeStr, !strconcat(Dt, "32"),
v4i16, v4i32, IntOp>;
def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
itin, OpcodeStr, !strconcat(Dt, "64"),
v2i32, v2i64, IntOp>;
}
// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
// source operand element sizes of 16, 32 and 64 bits:
multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
string OpcodeStr, string Dt, SDNode OpNode> {
def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
}
// Neon 3-register vector operations.
// First with only element sizes of 8, 16 and 32 bits:
multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
SDNode OpNode, bit Commutable = 0> {
// 64-bit vector types.
def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "8"),
v8i8, v8i8, OpNode, Commutable>;
def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "16"),
v4i16, v4i16, OpNode, Commutable>;
def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
OpcodeStr, !strconcat(Dt, "32"),
v2i32, v2i32, OpNode, Commutable>;
// 128-bit vector types.
def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "8"),
v16i8, v16i8, OpNode, Commutable>;
def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "16"),
v8i16, v8i16, OpNode, Commutable>;
def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
OpcodeStr, !strconcat(Dt, "32"),
v4i32, v4i32, OpNode, Commutable>;
}
multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
v4i32, v2i32, ShOp>;
}
// ....then also with element size 64 bits:
multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
string OpcodeStr, string Dt,
SDNode OpNode, bit Commutable = 0>
: N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
OpcodeStr, Dt, OpNode, Commutable> {
def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
OpcodeStr, !strconcat(Dt, "64"),
v1i64, v1i64, OpNode, Commutable>;
def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
OpcodeStr, !strconcat(Dt, "64"),
v2i64, v2i64, OpNode, Commutable>;
}
// Neon 3-register vector intrinsics.
// First with only element sizes of 16 and 32 bits:
multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
SDPatternOperator IntOp, bit Commutable = 0> {
// 64-bit vector types.
def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "16"),
v4i16, v4i16, IntOp, Commutable>;
def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
OpcodeStr, !strconcat(Dt, "32"),
v2i32, v2i32, IntOp, Commutable>;
// 128-bit vector types.
def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
OpcodeStr, !strconcat(Dt, "16"),
v8i16, v8i16, IntOp, Commutable>;
def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
OpcodeStr, !strconcat(Dt, "32"),
v4i32, v4i32, IntOp, Commutable>;
}
multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
SDPatternOperator IntOp> {
// 64-bit vector types.
def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "16"),
v4i16, v4i16, IntOp>;
def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
OpcodeStr, !strconcat(Dt, "32"),
v2i32, v2i32, IntOp>;
// 128-bit vector types.
def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
OpcodeStr, !strconcat(Dt, "16"),
v8i16, v8i16, IntOp>;
def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
OpcodeStr, !strconcat(Dt, "32"),
v4i32, v4i32, IntOp>;
}
multiclass N3VIntSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt, SDPatternOperator IntOp> {
def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
}
// ....then also with element size of 8 bits:
multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
SDPatternOperator IntOp, bit Commutable = 0>
: N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "8"),
v8i8, v8i8, IntOp, Commutable>;
def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
OpcodeStr, !strconcat(Dt, "8"),
v16i8, v16i8, IntOp, Commutable>;
}
multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
SDPatternOperator IntOp>
: N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp> {
def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "8"),
v8i8, v8i8, IntOp>;
def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
OpcodeStr, !strconcat(Dt, "8"),
v16i8, v16i8, IntOp>;
}
// ....then also with element size of 64 bits:
multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
SDPatternOperator IntOp, bit Commutable = 0>
: N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
OpcodeStr, !strconcat(Dt, "64"),
v1i64, v1i64, IntOp, Commutable>;
def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
OpcodeStr, !strconcat(Dt, "64"),
v2i64, v2i64, IntOp, Commutable>;
}
multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
SDPatternOperator IntOp>
: N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp> {
def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
OpcodeStr, !strconcat(Dt, "64"),
v1i64, v1i64, IntOp>;
def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
OpcodeStr, !strconcat(Dt, "64"),
v2i64, v2i64, IntOp>;
}
// Neon Narrowing 3-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt,
SDPatternOperator IntOp, bit Commutable = 0> {
def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
OpcodeStr, !strconcat(Dt, "16"),
v8i8, v8i16, IntOp, Commutable>;
def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
OpcodeStr, !strconcat(Dt, "32"),
v4i16, v4i32, IntOp, Commutable>;
def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
OpcodeStr, !strconcat(Dt, "64"),
v2i32, v2i64, IntOp, Commutable>;
}
// Neon Long 3-register vector operations.
multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt,
SDNode OpNode, bit Commutable = 0> {
def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, OpNode, Commutable>;
def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, OpNode, Commutable>;
def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
OpcodeStr, !strconcat(Dt, "32"),
v2i64, v2i32, OpNode, Commutable>;
}
multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
SDNode OpNode> {
def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
!strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
!strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
}
multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt,
SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, OpNode, ExtOp, Commutable>;
def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, OpNode, ExtOp, Commutable>;
def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
OpcodeStr, !strconcat(Dt, "32"),
v2i64, v2i32, OpNode, ExtOp, Commutable>;
}
// Neon Long 3-register vector intrinsics.
// First with only element sizes of 16 and 32 bits:
multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt,
SDPatternOperator IntOp, bit Commutable = 0> {
def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, IntOp, Commutable>;
def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
OpcodeStr, !strconcat(Dt, "32"),
v2i64, v2i32, IntOp, Commutable>;
}
multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
SDPatternOperator IntOp> {
def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
}
// ....then also with element size of 8 bits:
multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt,
SDPatternOperator IntOp, bit Commutable = 0>
: N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
IntOp, Commutable> {
def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, IntOp, Commutable>;
}
// ....with explicit extend (VABDL).
multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, IntOp, ExtOp, Commutable>;
def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, IntOp, ExtOp, Commutable>;
def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "32"),
v2i64, v2i32, IntOp, ExtOp, Commutable>;
}
// Neon Wide 3-register vector intrinsics,
// source operand element sizes of 8, 16 and 32 bits:
multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt,
SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, OpNode, ExtOp, Commutable>;
def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, OpNode, ExtOp, Commutable>;
def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
OpcodeStr, !strconcat(Dt, "32"),
v2i64, v2i32, OpNode, ExtOp, Commutable>;
}
// Neon Multiply-Op vector operations,
// element sizes of 8, 16 and 32 bits:
multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt, SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
// 128-bit vector types.
def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
}
multiclass N3VMulOpSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt, SDNode ShOp> {
def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
mul, ShOp>;
def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
mul, ShOp>;
}
// Neon Intrinsic-Op vector operations,
// element sizes of 8, 16 and 32 bits:
multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
string OpcodeStr, string Dt, SDPatternOperator IntOp,
SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
// 128-bit vector types.
def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
}
// Neon 3-argument intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
// 128-bit vector types.
def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
}
// Neon Long Multiply-Op vector operations,
// element sizes of 8, 16 and 32 bits:
multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt, SDNode MulOp,
SDNode OpNode> {
def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
!strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
!strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
!strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
}
multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
string Dt, SDNode MulOp, SDNode OpNode> {
def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
!strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
!strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
}
// Neon Long 3-argument intrinsics.
// First with only element sizes of 16 and 32 bits:
multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt, SDPatternOperator IntOp> {
def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
}
multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
string OpcodeStr, string Dt, SDPatternOperator IntOp> {
def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
}
// ....then also with element size of 8 bits:
multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt, SDPatternOperator IntOp>
: N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
}
// ....with explicit extend (VABAL).
multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
IntOp, ExtOp, OpNode>;
def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
IntOp, ExtOp, OpNode>;
def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
IntOp, ExtOp, OpNode>;
}
// Neon Pairwise long 2-register intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
// 128-bit vector types.
def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
}
// Neon Pairwise long 2-register accumulate intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
// 128-bit vector types.
def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
}
// Neon 2-register vector shift by immediate,
// with f of either N2RegVShLFrm or N2RegVShRFrm
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
// imm6 = xxxxxx
}
multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
string baseOpc, SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
// imm6 = xxxxxx
}
// Neon Shift-Accumulate vector operations,
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, SDNode ShOp> {
// 64-bit vector types.
def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
// imm6 = xxxxxx
}
// Neon Shift-Insert vector operations,
// with f of either N2RegVShLFrm or N2RegVShRFrm
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr> {
// 64-bit vector types.
def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
// imm6 = xxxxxx
}
multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr> {
// 64-bit vector types.
def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
// imm6 = xxxxxx
}
// Neon Shift Long operations,
// element sizes of 8, 16, 32 bits:
multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
}
// Neon Shift Narrow operations,
// element sizes of 16, 32, 64 bits:
multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
SDNode OpNode> {
def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
OpcodeStr, !strconcat(Dt, "16"),
v8i8, v8i16, shr_imm8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
OpcodeStr, !strconcat(Dt, "32"),
v4i16, v4i32, shr_imm16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
OpcodeStr, !strconcat(Dt, "64"),
v2i32, v2i64, shr_imm32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
}
//===----------------------------------------------------------------------===//
// Instruction Definitions.
//===----------------------------------------------------------------------===//
// Vector Add Operations.
// VADD : Vector Add (integer and floating-point)
defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
add, 1>;
def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
v2f32, v2f32, fadd, 1>;
def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
v4f32, v4f32, fadd, 1>;
// VADDL : Vector Add Long (Q = D + D)
defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
"vaddl", "s", add, sext, 1>;
defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
"vaddl", "u", add, zext, 1>;
// VADDW : Vector Add Wide (Q = Q + D)
defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
// VHADD : Vector Halving Add
defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vhadd", "s", int_arm_neon_vhadds, 1>;
defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vhadd", "u", int_arm_neon_vhaddu, 1>;
// VRHADD : Vector Rounding Halving Add
defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vrhadd", "s", int_arm_neon_vrhadds, 1>;
defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vrhadd", "u", int_arm_neon_vrhaddu, 1>;
// VQADD : Vector Saturating Add
defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vqadd", "s", int_arm_neon_vqadds, 1>;
defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vqadd", "u", int_arm_neon_vqaddu, 1>;
// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
int_arm_neon_vraddhn, 1>;
def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
(VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
(VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
(VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
// Vector Multiply Operations.
// VMUL : Vector Multiply (integer, polynomial and floating-point)
defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
"p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
"p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
v2f32, fmul>;
def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
(v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
(v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
(v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
(v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
(v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
(v4f32 (VMULslfq (v4f32 QPR:$src1),
(v2f32 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
(VMULslfd DPR:$Rn,
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
(i32 0))>;
def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
(VMULslfq QPR:$Rn,
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
(i32 0))>;
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
"vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
"vqdmulh", "s", int_arm_neon_vqdmulh>;
def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
(v8i16 (NEONvduplane (v8i16 QPR:$src2),
imm:$lane)))),
(v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
(v4i32 (NEONvduplane (v4i32 QPR:$src2),
imm:$lane)))),
(v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
"vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
"vqrdmulh", "s", int_arm_neon_vqrdmulh>;
def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
(v8i16 (NEONvduplane (v8i16 QPR:$src2),
imm:$lane)))),
(v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(v4i32 (NEONvduplane (v4i32 QPR:$src2),
imm:$lane)))),
(v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
DecoderNamespace = "NEONData" in {
defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
"vmull", "s", NEONvmulls, 1>;
defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
"vmull", "u", NEONvmullu, 1>;
def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
v8i16, v8i8, int_arm_neon_vmullp, 1>;
def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
"vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
Requires<[HasV8, HasCrypto]>;
}
defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
"vqdmull", "s", int_arm_neon_vqdmull, 1>;
defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
"vqdmull", "s", int_arm_neon_vqdmull>;
// Vector Multiply-Accumulate and Multiply-Subtract Operations.
// VMLA : Vector Multiply Accumulate (integer and floating-point)
defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
v2f32, fmul_su, fadd_mlx>,
Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
v2f32, fmul_su, fadd_mlx>,
Requires<[HasNEON, UseFPVMLx]>;
def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
v4f32, v2f32, fmul_su, fadd_mlx>,
Requires<[HasNEON, UseFPVMLx]>;
def : Pat<(v8i16 (add (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
(v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
(v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (add (v4i32 QPR:$src1),
(mul (v4i32 QPR:$src2),
(v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
(v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
(fmul_su (v4f32 QPR:$src2),
(v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
(v4f32 (VMLAslfq (v4f32 QPR:$src1),
(v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>,
Requires<[HasNEON, UseFPVMLx]>;
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
"vmlal", "s", NEONvmulls, add>;
defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
"vmlal", "u", NEONvmullu, add>;
defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlal", "s", null_frag>;
defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
(v4i16 DPR:$Vm))))),
(VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
(v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
(v2i32 DPR:$Vm))))),
(VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
(v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
imm:$lane)))))),
(VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
(v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
(v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
(VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
// VMLS : Vector Multiply Subtract (integer and floating-point)
defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
v2f32, fmul_su, fsub_mlx>,
Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
v2f32, fmul_su, fsub_mlx>,
Requires<[HasNEON, UseFPVMLx]>;
def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
v4f32, v2f32, fmul_su, fsub_mlx>,
Requires<[HasNEON, UseFPVMLx]>;
def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
(v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
(v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
(mul (v4i32 QPR:$src2),
(v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
(v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
(fmul_su (v4f32 QPR:$src2),
(v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
(v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>,
Requires<[HasNEON, UseFPVMLx]>;
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
"vmlsl", "s", NEONvmulls, sub>;
defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
"vmlsl", "u", NEONvmullu, sub>;
defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlsl", "s", null_frag>;
defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>;
def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
(v4i16 DPR:$Vm))))),
(VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
(v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
(v2i32 DPR:$Vm))))),
(VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
(v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
imm:$lane)))))),
(VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
(v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
(v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
(VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
v2f32, fmul_su, fadd_mlx>,
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
v2f32, fmul_su, fsub_mlx>,
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;
def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
(VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasVFP4]>;
def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
(VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;
def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
(VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasVFP4]>;
// Vector Subtract Operations.
// VSUB : Vector Subtract (integer and floating-point)
defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
"vsub", "i", sub, 0>;
def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
v2f32, v2f32, fsub, 0>;
def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
v4f32, v4f32, fsub, 0>;
// VSUBL : Vector Subtract Long (Q = D - D)
defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
"vsubl", "s", sub, sext, 0>;
defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
"vsubl", "u", sub, zext, 0>;
// VSUBW : Vector Subtract Wide (Q = Q - D)
defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
// VHSUB : Vector Halving Subtract
defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vhsub", "s", int_arm_neon_vhsubs, 0>;
defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vhsub", "u", int_arm_neon_vhsubu, 0>;
// VQSUB : Vector Saturing Subtract
defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vqsub", "s", int_arm_neon_vqsubs, 0>;
defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vqsub", "u", int_arm_neon_vqsubu, 0>;
// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
int_arm_neon_vrsubhn, 0>;
def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
(VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
(VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
(VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
// Vector Comparisons.
// VCEQ : Vector Compare Equal
defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
NEONvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
"$Vd, $Vm, #0", NEONvceqz>;
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
"$Vd, $Vm, #0", NEONvcgez>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
"$Vd, $Vm, #0", NEONvclez>;
}
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
NEONvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
"$Vd, $Vm, #0", NEONvcgtz>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
"$Vd, $Vm, #0", NEONvcltz>;
}
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
"f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
"f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
"f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
"f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
// VTST : Vector Test Bits
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
(VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
(VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
(VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
(VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
(VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
(VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
(VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
(VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
// Vector Bitwise Operations.
def vnotd : PatFrag<(ops node:$in),
(xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
def vnotq : PatFrag<(ops node:$in),
(xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
// VAND : Vector Bitwise AND
def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
v2i32, v2i32, and, 1>;
def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
v4i32, v4i32, and, 1>;
// VEOR : Vector Bitwise Exclusive OR
def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
v2i32, v2i32, xor, 1>;
def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
v4i32, v4i32, xor, 1>;
// VORR : Vector Bitwise OR
def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
v2i32, v2i32, or, 1>;
def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
v4i32, v4i32, or, 1>;
def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
(outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
IIC_VMOVImm,
"vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
(v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
(outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
IIC_VMOVImm,
"vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
(v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
let Inst{10-9} = SIMM{10-9};
}
def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
(outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
IIC_VMOVImm,
"vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
(v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
(outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
IIC_VMOVImm,
"vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
(v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
let Inst{10-9} = SIMM{10-9};
}
// VBIC : Vector Bitwise Bit Clear (AND NOT)
let TwoOperandAliasConstraint = "$Vn = $Vd" in {
def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
(ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
"vbic", "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (v2i32 (and DPR:$Vn,
(vnotd DPR:$Vm))))]>;
def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
"vbic", "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (v4i32 (and QPR:$Vn,
(vnotq QPR:$Vm))))]>;
}
def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
(outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
IIC_VMOVImm,
"vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
(v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
(outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
IIC_VMOVImm,
"vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
(v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
let Inst{10-9} = SIMM{10-9};
}
def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
(outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
IIC_VMOVImm,
"vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
(v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
(outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
IIC_VMOVImm,
"vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
(v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
let Inst{10-9} = SIMM{10-9};
}
// VORN : Vector Bitwise OR NOT
def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
(ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
"vorn", "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (v2i32 (or DPR:$Vn,
(vnotd DPR:$Vm))))]>;
def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
"vorn", "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (v4i32 (or QPR:$Vn,
(vnotq QPR:$Vm))))]>;
// VMVN : Vector Bitwise NOT (Immediate)
let isReMaterializable = 1 in {
def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
(ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmvn", "i16", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
(ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmvn", "i16", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
(ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmvn", "i32", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
(ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmvn", "i32", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
}
// VMVN : Vector Bitwise NOT
def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
(outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
"vmvn", "$Vd, $Vm", "",
[(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
"vmvn", "$Vd, $Vm", "",
[(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
// VBSL : Vector Bitwise Select
def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VCNTiD,
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set DPR:$Vd,
(v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
(v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
(v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
(v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
(v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
(VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
(VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VCNTiQ,
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set QPR:$Vd,
(v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
(v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
(v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
(v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
(v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
(VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
(VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
// FIXME: This instruction's encoding MAY NOT BE correct.
def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
"vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[]>;
def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
"vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[]>;
// VBIT : Vector Bitwise Insert if True
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
// FIXME: This instruction's encoding MAY NOT BE correct.
def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
"vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[]>;
def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
"vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[]>;
// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
// for equivalent operations with different register constraints; it just
// inserts copies.
// Vector Absolute Differences.
// VABD : Vector Absolute Difference
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vabd", "s", int_arm_neon_vabds, 1>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vabd", "u", int_arm_neon_vabdu, 1>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
"vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
"vabdl", "s", int_arm_neon_vabds, zext, 1>;
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
// VABA : Vector Absolute Difference and Accumulate
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
"vaba", "s", int_arm_neon_vabds, add>;
defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
"vaba", "u", int_arm_neon_vabdu, add>;
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
"vabal", "s", int_arm_neon_vabds, zext, add>;
defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
"vabal", "u", int_arm_neon_vabdu, zext, add>;
// Vector Maximum and Minimum.
// VMAX : Vector Maximum
defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vmax", "s", int_arm_neon_vmaxs, 1>;
defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vmax", "u", int_arm_neon_vmaxu, 1>;
def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmax", "f32",
v2f32, v2f32, int_arm_neon_vmaxs, 1>;
def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmax", "f32",
v4f32, v4f32, int_arm_neon_vmaxs, 1>;
// VMAXNM
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f32",
v2f32, v2f32, int_arm_neon_vmaxnm, 1>,
Requires<[HasV8, HasNEON]>;
def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f32",
v4f32, v4f32, int_arm_neon_vmaxnm, 1>,
Requires<[HasV8, HasNEON]>;
}
// VMIN : Vector Minimum
defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vmin", "s", int_arm_neon_vmins, 1>;
defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vmin", "u", int_arm_neon_vminu, 1>;
def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmin", "f32",
v2f32, v2f32, int_arm_neon_vmins, 1>;
def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmin", "f32",
v4f32, v4f32, int_arm_neon_vmins, 1>;
// VMINNM
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vminnm", "f32",
v2f32, v2f32, int_arm_neon_vminnm, 1>,
Requires<[HasV8, HasNEON]>;
def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vminnm", "f32",
v4f32, v4f32, int_arm_neon_vminnm, 1>,
Requires<[HasV8, HasNEON]>;
}
// Vector Pairwise Operations.
// VPADD : Vector Pairwise Add
def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
"vpadd", "i8",
v8i8, v8i8, int_arm_neon_vpadd, 0>;
def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
"vpadd", "i16",
v4i16, v4i16, int_arm_neon_vpadd, 0>;
def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
"vpadd", "i32",
v2i32, v2i32, int_arm_neon_vpadd, 0>;
def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
IIC_VPBIND, "vpadd", "f32",
v2f32, v2f32, int_arm_neon_vpadd, 0>;
// VPADDL : Vector Pairwise Add Long
defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
int_arm_neon_vpaddls>;
defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
int_arm_neon_vpaddlu>;
// VPADAL : Vector Pairwise Add and Accumulate Long
defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
int_arm_neon_vpadals>;
defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
int_arm_neon_vpadalu>;
// VPMAX : Vector Pairwise Maximum
def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
"f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
// VPMIN : Vector Pairwise Minimum
def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
"f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
// VRECPE : Vector Reciprocal Estimate
def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
IIC_VUNAD, "vrecpe", "u32",
v2i32, v2i32, int_arm_neon_vrecpe>;
def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
IIC_VUNAQ, "vrecpe", "u32",
v4i32, v4i32, int_arm_neon_vrecpe>;
def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
IIC_VUNAD, "vrecpe", "f32",
v2f32, v2f32, int_arm_neon_vrecpe>;
def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
IIC_VUNAQ, "vrecpe", "f32",
v4f32, v4f32, int_arm_neon_vrecpe>;
// VRECPS : Vector Reciprocal Step
def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
IIC_VRECSD, "vrecps", "f32",
v2f32, v2f32, int_arm_neon_vrecps, 1>;
def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrecps", "f32",
v4f32, v4f32, int_arm_neon_vrecps, 1>;
// VRSQRTE : Vector Reciprocal Square Root Estimate
def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
IIC_VUNAD, "vrsqrte", "u32",
v2i32, v2i32, int_arm_neon_vrsqrte>;
def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
IIC_VUNAQ, "vrsqrte", "u32",
v4i32, v4i32, int_arm_neon_vrsqrte>;
def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
IIC_VUNAD, "vrsqrte", "f32",
v2f32, v2f32, int_arm_neon_vrsqrte>;
def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
IIC_VUNAQ, "vrsqrte", "f32",
v4f32, v4f32, int_arm_neon_vrsqrte>;
// VRSQRTS : Vector Reciprocal Square Root Step
def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
IIC_VRECSD, "vrsqrts", "f32",
v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrsqrts", "f32",
v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
// Vector Shifts.
// VSHL : Vector Shift
defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
"vshl", "s", int_arm_neon_vshifts>;
defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
"vshl", "u", int_arm_neon_vshiftu>;
// VSHL : Vector Shift Left (Immediate)
defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
// VSHR : Vector Shift Right (Immediate)
defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
NEONvshrs>;
defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
NEONvshru>;
// VSHLL : Vector Shift Left Long
defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
ResTy, OpTy, ImmTy, OpNode> {
let Inst{21-16} = op21_16;
let DecoderMethod = "DecodeVSHLMaxInstruction";
}
def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
v8i16, v8i8, imm8, NEONvshlli>;
def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
v4i32, v4i16, imm16, NEONvshlli>;
def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
v2i64, v2i32, imm32, NEONvshlli>;
// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
NEONvshrn>;
// VRSHL : Vector Rounding Shift
defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vrshl", "s", int_arm_neon_vrshifts>;
defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vrshl", "u", int_arm_neon_vrshiftu>;
// VRSHR : Vector Rounding Shift Right
defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
NEONvrshrs>;
defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
NEONvrshru>;
// VRSHRN : Vector Rounding Shift Right and Narrow
defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
NEONvrshrn>;
// VQSHL : Vector Saturating Shift
defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vqshl", "s", int_arm_neon_vqshifts>;
defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vqshl", "u", int_arm_neon_vqshiftu>;
// VQSHL : Vector Saturating Shift Left (Immediate)
defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
// VQSHRN : Vector Saturating Shift Right and Narrow
defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
NEONvqshrns>;
defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
NEONvqshrnu>;
// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
NEONvqshrnsu>;
// VQRSHL : Vector Saturating Rounding Shift
defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vqrshl", "s", int_arm_neon_vqrshifts>;
defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vqrshl", "u", int_arm_neon_vqrshiftu>;
// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
NEONvqrshrns>;
defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
NEONvqrshrnu>;
// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
NEONvqrshrnsu>;
// VSRA : Vector Shift Right and Accumulate
defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
// VRSRA : Vector Rounding Shift Right and Accumulate
defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
// VSLI : Vector Shift Left and Insert
defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
// VSRI : Vector Shift Right and Insert
defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
// Vector Absolute and Saturating Absolute.
// VABS : Vector Absolute Value
defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
int_arm_neon_vabs>;
def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
"vabs", "f32",
v2f32, v2f32, fabs>;
def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
"vabs", "f32",
v4f32, v4f32, fabs>;
def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
(v2i32 (bitconvert (v8i8 (add DPR:$src,
(NEONvshrs DPR:$src, (i32 7))))))),
(VABSv8i8 DPR:$src)>;
def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))),
(v2i32 (bitconvert (v4i16 (add DPR:$src,
(NEONvshrs DPR:$src, (i32 15))))))),
(VABSv4i16 DPR:$src)>;
def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))),
(v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))),
(VABSv2i32 DPR:$src)>;
def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))),
(v4i32 (bitconvert (v16i8 (add QPR:$src,
(NEONvshrs QPR:$src, (i32 7))))))),
(VABSv16i8 QPR:$src)>;
def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))),
(v4i32 (bitconvert (v8i16 (add QPR:$src,
(NEONvshrs QPR:$src, (i32 15))))))),
(VABSv8i16 QPR:$src)>;
def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))),
(v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))),
(VABSv4i32 QPR:$src)>;
def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
// VQABS : Vector Saturating Absolute Value
defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
int_arm_neon_vqabs>;
// Vector Negate.
def vnegd : PatFrag<(ops node:$in),
(sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
def vnegq : PatFrag<(ops node:$in),
(sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
// VNEG : Vector Negate (integer)
def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
// VNEG : Vector Negate (floating-point)
def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
(outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
"vneg", "f32", "$Vd, $Vm", "",
[(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
"vneg", "f32", "$Vd, $Vm", "",
[(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
// VQNEG : Vector Saturating Negate
defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
int_arm_neon_vqneg>;
// Vector Bit Counting Operations.
// VCLS : Vector Count Leading Sign Bits
defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
int_arm_neon_vcls>;
// VCLZ : Vector Count Leading Zeros
defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
ctlz>;
// VCNT : Vector Count One Bits
def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiD, "vcnt", "8",
v8i8, v8i8, ctpop>;
def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiQ, "vcnt", "8",
v16i8, v16i8, ctpop>;
// Vector Swap
def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
(outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
[]>;
def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
(outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
[]>;
// Vector Move Operations.
// VMOV : Vector Move (Register)
def : NEONInstAlias<"vmov${p} $Vd, $Vm",
(VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
def : NEONInstAlias<"vmov${p} $Vd, $Vm",
(VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
// VMOV : Vector Move (Immediate)
let isReMaterializable = 1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
(ins nImmSplatI64:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
(ins nImmSplatI64:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
"vmov", "f32", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
"vmov", "f32", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
} // isReMaterializable
// VMOV : Vector Get Lane (move scalar to ARM core register)
def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
(outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
}
def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
(outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
}
def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
(outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
}
def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
(outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
}
def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
(outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
imm:$lane))]>,
Requires<[HasNEON, HasFastVGETLNi32]> {
let Inst{21} = lane{0};
}
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
(VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i8_reg imm:$lane))),
(SubReg_i8_lane imm:$lane))>;
def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
(VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane))>;
def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
(VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i8_reg imm:$lane))),
(SubReg_i8_lane imm:$lane))>;
def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
(VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane))>;
def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane))>,
Requires<[HasNEON, HasFastVGETLNi32]>;
def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
(COPY_TO_REGCLASS
(i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
Requires<[HasNEON, HasSlowVGETLNi32]>;
def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(COPY_TO_REGCLASS
(i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
Requires<[HasNEON, HasSlowVGETLNi32]>;
def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
(EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
(EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
(EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
// VMOV : Vector Set Lane (move ARM core register to scalar)
let Constraints = "$src1 = $V" in {
def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
(ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
[(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
}
def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
(ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
[(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
}
def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
(ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
[(set DPR:$V, (insertelt (v2i32 DPR:$src1),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{0};
}
}
def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
(v16i8 (INSERT_SUBREG QPR:$src1,
(v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
(DSubReg_i8_reg imm:$lane))),
GPR:$src2, (SubReg_i8_lane imm:$lane))),
(DSubReg_i8_reg imm:$lane)))>;
def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
(v8i16 (INSERT_SUBREG QPR:$src1,
(v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
(DSubReg_i16_reg imm:$lane))),
GPR:$src2, (SubReg_i16_lane imm:$lane))),
(DSubReg_i16_reg imm:$lane)))>;
def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
(v4i32 (INSERT_SUBREG QPR:$src1,
(v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
(DSubReg_i32_reg imm:$lane))),
GPR:$src2, (SubReg_i32_lane imm:$lane))),
(DSubReg_i32_reg imm:$lane)))>;
def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
(INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
(INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
(INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
(VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
(VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
(VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
dsub_0)>;
def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
dsub_0)>;
def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
dsub_0)>;
// VDUP : Vector Duplicate (from ARM core register to all elements)
class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
: NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
IIC_VMOVIS, "vdup", Dt, "$V, $R",
[(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
: NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
IIC_VMOVIS, "vdup", Dt, "$V, $R",
[(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
Requires<[HasNEON, HasFastVDUP32]>;
def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
// NEONvdup patterns for uarchs with fast VDUP.32.
def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
Requires<[HasNEON,HasFastVDUP32]>;
def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
Requires<[HasNEON,HasSlowVDUP32]>;
def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
Requires<[HasNEON,HasSlowVDUP32]>;
// VDUP : Vector Duplicate Lane (from scalar to all elements)
class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
ValueType Ty, Operand IdxTy>
: NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
[(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Operand IdxTy>
: NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
[(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
VectorIndex32:$lane)))]>;
// Inst{19-16} is partially specified depending on the element size.
def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
bits<3> lane;
let Inst{19-17} = lane{2-0};
}
def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
bits<2> lane;
let Inst{19-18} = lane{1-0};
}
def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
bits<1> lane;
let Inst{19} = lane{0};
}
def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
bits<3> lane;
let Inst{19-17} = lane{2-0};
}
def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
bits<2> lane;
let Inst{19-18} = lane{1-0};
}
def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
bits<1> lane;
let Inst{19} = lane{0};
}
def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
(VDUPLN32d DPR:$Vm, imm:$lane)>;
def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
(VDUPLN32q DPR:$Vm, imm:$lane)>;
def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
(v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i8_reg imm:$lane))),
(SubReg_i8_lane imm:$lane)))>;
def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
(v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
(v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
(v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
[(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
// VMOVN : Vector Narrowing Move
defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
"vmovn", "i", trunc>;
// VQMOVN : Vector Saturating Narrowing Move
defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
"vqmovn", "s", int_arm_neon_vqmovns>;
defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
"vqmovn", "u", int_arm_neon_vqmovnu>;
defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
"vqmovun", "s", int_arm_neon_vqmovnsu>;
// VMOVL : Vector Lengthening Move
defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
// Vector Conversions.
// VCVT : Vector Convert Between Floating-Point and Integers
def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
v2i32, v2f32, fp_to_sint>;
def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
v2i32, v2f32, fp_to_uint>;
def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
v2f32, v2i32, sint_to_fp>;
def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v2f32, v2i32, uint_to_fp>;
def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
v4i32, v4f32, fp_to_sint>;
def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
v4i32, v4f32, fp_to_uint>;
def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
v4f32, v4i32, sint_to_fp>;
def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v4f32, v4i32, uint_to_fp>;
// VCVT{A, N, P, M}
multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
SDPatternOperator IntU> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
"s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
"s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
"u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
"u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
}
}
defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
let DecoderMethod = "DecodeVCVTD" in {
def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
}
let DecoderMethod = "DecodeVCVTQ" in {
def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
}
def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
(VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
(VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
(VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
(VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
(VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
(VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
(VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
(VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
IIC_VUNAQ, "vcvt", "f16.f32",
v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
Requires<[HasNEON, HasFP16]>;
def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
IIC_VUNAQ, "vcvt", "f32.f16",
v4f32, v4i16, int_arm_neon_vcvthf2fp>,
Requires<[HasNEON, HasFP16]>;
// Vector Reverse.
// VREV64 : Vector Reverse elements within 64-bit doublewords
class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
(ins DPR:$Vm), IIC_VMOVD,
OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
(ins QPR:$Vm), IIC_VMOVQ,
OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
// VREV32 : Vector Reverse elements within 32-bit words
class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
(ins DPR:$Vm), IIC_VMOVD,
OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
(ins QPR:$Vm), IIC_VMOVQ,
OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
// VREV16 : Vector Reverse elements within 16-bit halfwords
class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
(ins DPR:$Vm), IIC_VMOVD,
OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
(ins QPR:$Vm), IIC_VMOVQ,
OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
// Other Vector Shuffles.
// Aligned extractions: really just dropping registers
class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
: Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
(EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
// VEXT : Vector Extract
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd" in {
class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
(ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
(Ty DPR:$Vm), imm:$index)))]> {
bits<3> index;
let Inst{11} = 0b0;
let Inst{10-8} = index{2-0};
}
class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
(ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
(Ty QPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
}
def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
let Inst{10-8} = index{2-0};
}
def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
let Inst{10-9} = index{1-0};
let Inst{8} = 0b0;
}
def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
let Inst{10} = index{0};
let Inst{9-8} = 0b00;
}
def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
(v2f32 DPR:$Vm),
(i32 imm:$index))),
(VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
let Inst{11-8} = index{3-0};
}
def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
let Inst{11} = index{0};
let Inst{10-8} = 0b000;
}
def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
(v4f32 QPR:$Vm),
(i32 imm:$index))),
(VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
// VTRN : Vector Transpose
def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
// VUZP : Vector Unzip (Deinterleave)
def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
(VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
// VZIP : Vector Zip (Interleave)
def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
(VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
// Vector Table Lookup and Table Extension.
// VTBL : Vector Table Lookup
let DecoderMethod = "DecodeTBLInstruction" in {
def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
(ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
"vtbl", "8", "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
(ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
"vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
(ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
"vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
(ins VecListFourD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTB4,
"vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
} // hasExtraSrcRegAllocReq = 1
def VTBL3Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
def VTBL4Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
// VTBX : Vector Table Extension
def VTBX1
: N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
(ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
"vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
[(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
(ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
"vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
(ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTBX3,
"vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
def VTBX4
: N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
(ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
"vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
} // hasExtraSrcRegAllocReq = 1
def VTBX3Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
IIC_VTBX3, "$orig = $dst", []>;
def VTBX4Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
IIC_VTBX4, "$orig = $dst", []>;
} // DecoderMethod = "DecodeTBLInstruction"
// VRINT : Vector Rounding
multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary,
!strconcat("vrint", op), "f32",
v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
let Inst{9-7} = op9_7;
}
def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary,
!strconcat("vrint", op), "f32",
v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
let Inst{9-7} = op9_7;
}
}
def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
(!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
(!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
}
defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
// Cryptography instructions
let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
DecoderNamespace = "v8Crypto" in {
class AES<string op, bit op7, bit op6, SDPatternOperator Int>
: N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
!strconcat("aes", op), "8", v16i8, v16i8, Int>,
Requires<[HasV8, HasCrypto]>;
class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
: N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
!strconcat("aes", op), "8", v16i8, v16i8, Int>,
Requires<[HasV8, HasCrypto]>;
class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
SDPatternOperator Int>
: N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
!strconcat("sha", op), "32", v4i32, v4i32, Int>,
Requires<[HasV8, HasCrypto]>;
class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
SDPatternOperator Int>
: N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
!strconcat("sha", op), "32", v4i32, v4i32, Int>,
Requires<[HasV8, HasCrypto]>;
class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
: N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
!strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
Requires<[HasV8, HasCrypto]>;
}
def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>;
def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>;
def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>;
def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>;
def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
class N2VSPat<SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$a)),
(EXTRACT_SUBREG
(v2f32 (COPY_TO_REGCLASS (Inst
(INSERT_SUBREG
(v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
class N3VSPat<SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
(EXTRACT_SUBREG
(v2f32 (COPY_TO_REGCLASS (Inst
(INSERT_SUBREG
(v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
SPR:$a, ssub_0),
(INSERT_SUBREG
(v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
(EXTRACT_SUBREG
(v2f32 (COPY_TO_REGCLASS (Inst
(INSERT_SUBREG
(v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
SPR:$acc, ssub_0),
(INSERT_SUBREG
(v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
SPR:$a, ssub_0),
(INSERT_SUBREG
(v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
def : N3VSPat<NEONfmax, VMAXfd>;
def : N3VSPat<NEONfmin, VMINfd>;
def : N2VSPat<arm_ftosi, VCVTf2sd>;
def : N2VSPat<arm_ftoui, VCVTf2ud>;
def : N2VSPat<arm_sitof, VCVTs2fd>;
def : N2VSPat<arm_uitof, VCVTu2fd>;
// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
def : Pat<(f32 (bitconvert GPR:$a)),
(EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
Requires<[HasNEON, DontUseVMOVSR]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
// bit_convert
def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
// Fold extracting an element out of a v2i32 into a vfp register.
def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
(f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
// Lengthen_Single<"8", "i16", "8"> =
// Pat<(v8i16 (extloadvi8 addrmode6:$addr))
// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
// (f64 (IMPLICIT_DEF)), (i32 0)))>;
multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
let AddedComplexity = 10 in {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
(!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
(!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
(!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
}
}
// extload, zextload and sextload for a lengthening load which only uses
// half the lanes available. Example:
// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
// (f64 (IMPLICIT_DEF)), (i32 0))),
// dsub_0)>;
multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
string InsnLanes, string InsnTy> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
}
// extload, zextload and sextload for a lengthening load followed by another
// lengthening load, to quadruple the initial length.
//
// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
// (EXTRACT_SUBREG (VMOVLuv4i32
// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
// (f64 (IMPLICIT_DEF)),
// (i32 0))),
// dsub_0)),
// dsub_0)>;
multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0))>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0))>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0))>;
}
// extload, zextload and sextload for a lengthening load followed by another
// lengthening load, to quadruple the initial length, but which ends up only
// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
//
// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
// Pat<(v2i32 (extloadvi8 addrmode6:$addr))
// (EXTRACT_SUBREG (VMOVLuv4i32
// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
// (f64 (IMPLICIT_DEF)), (i32 0))),
// dsub_0)),
// dsub_0)>;
multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)),
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)),
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
(VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)),
dsub_0)>;
}
defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
// Double lengthening - v4i8 -> v4i16 -> v4i32
defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
// v2i8 -> v2i16 -> v2i32
defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
// v2i16 -> v2i32 -> v2i64
defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
(VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
(VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
(VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
(VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
//===----------------------------------------------------------------------===//
// Assembler aliases
//
def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
(VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
(VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
(VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
(VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
(VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
(VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
(VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
(VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
(VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
(VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
// ... two-operand aliases
defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
(VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
(VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
(VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
(VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
(VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
(VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
// VLD1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
(ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
(ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
(ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD1LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
(ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD1LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
(ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD1LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
(ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD1LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
(ins VecListOneDByteIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD1LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
(ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD1LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
(ins VecListOneDWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VST1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
(ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
(ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
(ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST1LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
(ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST1LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
(ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST1LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
(ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST1LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
(ins VecListOneDByteIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST1LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
(ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST1LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
(ins VecListOneDWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VLD2 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
(ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
(ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
(ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
(ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
(ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD2LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
(ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD2LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
(ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD2LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
(ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD2LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
(ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD2LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
(ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD2LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
(ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
(ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
(ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
(ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
(ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VST2 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
(ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
(ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
(ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
(ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
(ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST2LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
(ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST2LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
(ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST2LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
(ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST2LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
(ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST2LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
(ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST2LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
(ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
(ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
(ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
(ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
(ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VLD3 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPqWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPqWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VLD3 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
(ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
(ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
(ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
(ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
(ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD3LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
(ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD3LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
(ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD3LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
(ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD3LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
(ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD3LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
(ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD3LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
(ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
(ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
(ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
(ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
(ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VLD3 multiple structure pseudo-instructions. These need special handling for
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VLD3dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VLD3dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VLD3dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VLD3qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VLD3qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VLD3qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VLD3dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
(ins VecListThreeD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
(ins VecListThreeD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
(ins VecListThreeD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
(ins VecListThreeQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
(ins VecListThreeQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
(ins VecListThreeQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VST3 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
(ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
(ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
(ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
(ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
(ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST3LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
(ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST3LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
(ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST3LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
(ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST3LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
(ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST3LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
(ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST3LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
(ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST3LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
(ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST3LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
(ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST3LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
(ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST3LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
(ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VST3 multiple structure pseudo-instructions. These need special handling for
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VST3dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VST3dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VST3dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
(ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
def VST3qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VST3qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VST3qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
(ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
def VST3dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
(ins VecListThreeD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST3dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
(ins VecListThreeD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST3dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
(ins VecListThreeD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST3qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
(ins VecListThreeQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST3qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
(ins VecListThreeQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST3qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
(ins VecListThreeQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VLD4 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPqWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
(ins VecListFourDAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
(ins VecListFourDAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourDAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPqWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
(ins VecListFourQAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
(ins VecListFourQAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourQAllLanes:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VLD4 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
(ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
(ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
(ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
(ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
(ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD4LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
(ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD4LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
(ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD4LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
(ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD4LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
(ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD4LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
(ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VLD4LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
(ins VecListFourDByteIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
(ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourDWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
(ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourQWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VLD4 multiple structure pseudo-instructions. These need special handling for
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VLD4dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VLD4dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VLD4dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VLD4qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VLD4qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VLD4qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VLD4dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
(ins VecListFourD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
(ins VecListFourD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
(ins VecListFourQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
(ins VecListFourQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VST4 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
(ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
(ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
(ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
(ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
(ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST4LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
(ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST4LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
(ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST4LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
(ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST4LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
(ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST4LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
(ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
def VST4LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
(ins VecListFourDByteIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST4LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
(ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST4LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourDWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST4LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
(ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST4LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourQWordIndexed:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VST4 multiple structure pseudo-instructions. These need special handling for
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VST4dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VST4dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VST4dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
(ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
def VST4qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VST4qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VST4qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
(ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
def VST4dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
(ins VecListFourD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST4dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
(ins VecListFourD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST4dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourD:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST4qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
(ins VecListFourQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST4qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
(ins VecListFourQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
def VST4qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
// VMOV/VMVN takes an optional datatype suffix
defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
(VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
(VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
(VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
(VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
// D-register versions.
def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
(VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
(VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
(VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
(VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
(VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
(VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
(VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
// Q-register versions.
def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
(VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
(VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
(VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
(VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
(VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
(VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
(VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
// D-register versions.
def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
(VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
(VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
(VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
(VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
(VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
(VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
(VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
// Q-register versions.
def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
(VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
(VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
(VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
(VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
(VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
(VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
(VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
// VSWP allows, but does not require, a type suffix.
defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
(VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
(VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
(VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
(VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
(VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
(VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
(VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
(VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
// "vmov Rd, #-imm" can be handled via "vmvn".
def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
(VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
(VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
(VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
(VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
// these should restrict to just the Q register variants, but the register
// classes are enough to match correctly regardless, so we keep it simple
// and just use MnemonicAlias.
def : NEONMnemonicAlias<"vbicq", "vbic">;
def : NEONMnemonicAlias<"vandq", "vand">;
def : NEONMnemonicAlias<"veorq", "veor">;
def : NEONMnemonicAlias<"vorrq", "vorr">;
def : NEONMnemonicAlias<"vmovq", "vmov">;
def : NEONMnemonicAlias<"vmvnq", "vmvn">;
// Explicit versions for floating point so that the FPImm variants get
// handled early. The parser gets confused otherwise.
def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
def : NEONMnemonicAlias<"vaddq", "vadd">;
def : NEONMnemonicAlias<"vsubq", "vsub">;
def : NEONMnemonicAlias<"vminq", "vmin">;
def : NEONMnemonicAlias<"vmaxq", "vmax">;
def : NEONMnemonicAlias<"vmulq", "vmul">;
def : NEONMnemonicAlias<"vabsq", "vabs">;
def : NEONMnemonicAlias<"vshlq", "vshl">;
def : NEONMnemonicAlias<"vshrq", "vshr">;
def : NEONMnemonicAlias<"vcvtq", "vcvt">;
def : NEONMnemonicAlias<"vcleq", "vcle">;
def : NEONMnemonicAlias<"vceqq", "vceq">;
def : NEONMnemonicAlias<"vzipq", "vzip">;
def : NEONMnemonicAlias<"vswpq", "vswp">;
def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
// Alias for loading floating point immediates that aren't representable
// using the vmov.f32 encoding but the bitpattern is representable using
// the .i32 encoding.
def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
(VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
(VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
Index: head/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp (revision 265925)
@@ -1,69 +1,95 @@
//===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declarations of the MCAsmInfoDarwin properties.
//
//===----------------------------------------------------------------------===//
#include "PPCMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
using namespace llvm;
void PPCMCAsmInfoDarwin::anchor() { }
+/// This version of the constructor is here to maintain ABI compatibility with
+/// LLVM 3.4.0
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
IsLittleEndian = false;
CommentString = ";";
ExceptionsType = ExceptionHandling::DwarfCFI;
if (!is64Bit)
Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
AssemblerDialect = 1; // New-Style mnemonics.
SupportsDebugInformation= true; // Debug information.
+}
+
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+ IsLittleEndian = false;
+
+ CommentString = ";";
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ if (!is64Bit)
+ Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
+
+ AssemblerDialect = 1; // New-Style mnemonics.
+ SupportsDebugInformation= true; // Debug information.
+
+ // old assembler lacks some directives
+ // FIXME: this should really be a check on the assembler characteristics
+ // rather than OS version
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
+ HasWeakDefCanBeHiddenDirective = false;
}
void PPCLinuxMCAsmInfo::anchor() { }
PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
IsLittleEndian = false;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
CommentString = "#";
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
// Uses '.section' before '.bss' directive
UsesELFSectionDirectiveForBSS = true;
// Debug Information
SupportsDebugInformation = true;
DollarIsPC = true;
// Set up DWARF directives
HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
MinInstAlignment = 4;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
AssemblerDialect = 1; // New-Style mnemonics.
}
Index: head/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h (revision 265925)
@@ -1,36 +1,40 @@
//===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the MCAsmInfoDarwin class.
//
//===----------------------------------------------------------------------===//
#ifndef PPCTARGETASMINFO_H
#define PPCTARGETASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
+class Triple;
class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
virtual void anchor();
public:
+ /// This version of the constructor is here to maintain ABI compatibility
+ /// with LLVM 3.4.0.
explicit PPCMCAsmInfoDarwin(bool is64Bit);
+ explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
};
class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit PPCLinuxMCAsmInfo(bool is64Bit);
};
} // namespace llvm
#endif
Index: head/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp (revision 265925)
@@ -1,226 +1,226 @@
//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides PowerPC specific target descriptions.
//
//===----------------------------------------------------------------------===//
#include "PPCMCTargetDesc.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "PPCMCAsmInfo.h"
#include "PPCTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "PPCGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
#include "PPCGenSubtargetInfo.inc"
#define GET_REGINFO_MC_DESC
#include "PPCGenRegisterInfo.inc"
using namespace llvm;
// Pin the vtable to this file.
PPCTargetStreamer::~PPCTargetStreamer() {}
static MCInstrInfo *createPPCMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitPPCMCInstrInfo(X);
return X;
}
static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
Triple TheTriple(TT);
bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
TheTriple.getArch() == Triple::ppc64le);
unsigned Flavour = isPPC64 ? 0 : 1;
unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
MCRegisterInfo *X = new MCRegisterInfo();
InitPPCMCRegisterInfo(X, RA, Flavour, Flavour);
return X;
}
static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
InitPPCMCSubtargetInfo(X, TT, CPU, FS);
return X;
}
static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
Triple TheTriple(TT);
bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
TheTriple.getArch() == Triple::ppc64le);
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin())
- MAI = new PPCMCAsmInfoDarwin(isPPC64);
+ MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
else
MAI = new PPCLinuxMCAsmInfo(isPPC64);
// Initial state of the frame pointer is R1.
unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
MCCFIInstruction Inst =
MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0);
MAI->addInitialFrameState(Inst);
return MAI;
}
static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default) {
Triple T(TT);
if (T.isOSDarwin())
RM = Reloc::DynamicNoPIC;
else
RM = Reloc::Static;
}
if (CM == CodeModel::Default) {
Triple T(TT);
if (!T.isOSDarwin() &&
(T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le))
CM = CodeModel::Medium;
}
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
namespace {
class PPCTargetAsmStreamer : public PPCTargetStreamer {
formatted_raw_ostream &OS;
public:
PPCTargetAsmStreamer(formatted_raw_ostream &OS) : OS(OS) {}
virtual void emitTCEntry(const MCSymbol &S) {
OS << "\t.tc ";
OS << S.getName();
OS << "[TC],";
OS << S.getName();
OS << '\n';
}
};
class PPCTargetELFStreamer : public PPCTargetStreamer {
virtual void emitTCEntry(const MCSymbol &S) {
// Creates a R_PPC64_TOC relocation
Streamer->EmitSymbolValue(&S, 8);
}
};
}
// This is duplicated code. Refactor this.
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
bool RelaxAll,
bool NoExecStack) {
if (Triple(TT).isOSDarwin())
return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
PPCTargetStreamer *S = new PPCTargetELFStreamer();
return createELFStreamer(Ctx, S, MAB, OS, Emitter, RelaxAll, NoExecStack);
}
static MCStreamer *
createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc, bool useCFI,
bool useDwarfDirectory, MCInstPrinter *InstPrint,
MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) {
PPCTargetStreamer *S = new PPCTargetAsmStreamer(OS);
return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
useDwarfDirectory, InstPrint, CE, TAB,
ShowInst);
}
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin();
return new PPCInstPrinter(MAI, MII, MRI, isDarwin);
}
extern "C" void LLVMInitializePowerPCTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
RegisterMCAsmInfoFn E(ThePPC64LETarget, createPPCMCAsmInfo);
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(ThePPC64LETarget,
createPPCMCCodeGenInfo);
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(ThePPC64LETarget,
createPPCMCInstrInfo);
// Register the MC register info.
TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
TargetRegistry::RegisterMCRegInfo(ThePPC64LETarget, createPPCMCRegisterInfo);
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
createPPCMCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
createPPCMCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(ThePPC64LETarget,
createPPCMCSubtargetInfo);
// Register the MC Code Emitter
TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
TargetRegistry::RegisterMCCodeEmitter(ThePPC64LETarget,
createPPCMCCodeEmitter);
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
TargetRegistry::RegisterMCAsmBackend(ThePPC64LETarget, createPPCAsmBackend);
// Register the object streamer.
TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer);
// Register the asm streamer.
TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer);
TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer);
TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer);
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(ThePPC64LETarget,
createPPCMCInstPrinter);
}
Index: head/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp (revision 265925)
@@ -1,1150 +1,1143 @@
//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a printer that converts from our internal representation
// of machine-dependent LLVM code to PowerPC assembly language. This printer is
// the output mechanism used by `llc'.
//
// Documentation at http://developer.apple.com/documentation/DeveloperTools/
// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asmprinter"
#include "PPC.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "PPCTargetStreamer.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/DebugInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
namespace {
class PPCAsmPrinter : public AsmPrinter {
protected:
MapVector<MCSymbol*, MCSymbol*> TOC;
const PPCSubtarget &Subtarget;
uint64_t TOCLabelID;
public:
explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer),
Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
virtual const char *getPassName() const {
return "PowerPC Assembly Printer";
}
MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
virtual void EmitInstruction(const MachineInstr *MI);
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O);
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O);
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
class PPCLinuxAsmPrinter : public PPCAsmPrinter {
public:
explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: PPCAsmPrinter(TM, Streamer) {}
virtual const char *getPassName() const {
return "Linux PPC Assembly Printer";
}
bool doFinalization(Module &M);
virtual void EmitFunctionEntryLabel();
void EmitFunctionBodyEnd();
};
/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
/// OS X
class PPCDarwinAsmPrinter : public PPCAsmPrinter {
public:
explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: PPCAsmPrinter(TM, Streamer) {}
virtual const char *getPassName() const {
return "Darwin PPC Assembly Printer";
}
bool doFinalization(Module &M);
void EmitStartOfAsmFile(Module &M);
void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
};
} // end of anonymous namespace
/// stripRegisterPrefix - This method strips the character prefix from a
/// register name so that only the number is left. Used by for linux asm.
static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
case 'v': return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
return RegName;
}
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
case MachineOperand::MO_Register: {
const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
// Linux assembler (Others?) does not take register mnemonics.
// FIXME - What about special registers used in mfspr/mtspr?
if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
O << RegName;
return;
}
case MachineOperand::MO_Immediate:
O << MO.getImm();
return;
case MachineOperand::MO_MachineBasicBlock:
O << *MO.getMBB()->getSymbol();
return;
case MachineOperand::MO_JumpTableIndex:
O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
<< '_' << MO.getIndex();
// FIXME: PIC relocation model
return;
case MachineOperand::MO_ConstantPoolIndex:
O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
<< '_' << MO.getIndex();
return;
case MachineOperand::MO_BlockAddress:
O << *GetBlockAddressSymbol(MO.getBlockAddress());
return;
case MachineOperand::MO_ExternalSymbol: {
// Computing the address of an external symbol, not calling it.
if (TM.getRelocationModel() == Reloc::Static) {
O << *GetExternalSymbolSymbol(MO.getSymbolName());
return;
}
MCSymbol *NLPSym =
OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
MO.getSymbolName()+"$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
O << *NLPSym;
return;
}
case MachineOperand::MO_GlobalAddress: {
// Computing the address of a global symbol, not calling it.
const GlobalValue *GV = MO.getGlobal();
MCSymbol *SymToPrint;
// External or weakly linked global variables need non-lazily-resolved stubs
if (TM.getRelocationModel() != Reloc::Static &&
(GV->isDeclaration() || GV->isWeakForLinker())) {
if (!GV->hasHiddenVisibility()) {
SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>()
.getGVStubEntry(SymToPrint);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
GV->hasAvailableExternallyLinkage()) {
SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().
getHiddenGVStubEntry(SymToPrint);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else {
SymToPrint = getSymbol(GV);
}
} else {
SymToPrint = getSymbol(GV);
}
O << *SymToPrint;
printOffset(MO.getOffset(), O);
return;
}
default:
O << "<unknown operand type: " << MO.getType() << ">";
return;
}
}
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
default:
// See if this is a generic print operand
return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'c': // Don't print "$" before a global var name or constant.
break; // PPC never has a prefix.
case 'L': // Write second word of DImode reference.
// Verify that this operand has two consecutive registers.
if (!MI->getOperand(OpNo).isReg() ||
OpNo+1 == MI->getNumOperands() ||
!MI->getOperand(OpNo+1).isReg())
return true;
++OpNo; // Return the high-part.
break;
case 'I':
// Write 'i' if an integer constant, otherwise nothing. Used to print
// addi vs add, etc.
if (MI->getOperand(OpNo).isImm())
O << "i";
return false;
}
}
printOperand(MI, OpNo, O);
return false;
}
// At the moment, all inline asm memory operands are a single register.
// In any case, the output of this routine should always be just one
// assembler operand.
bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
default: return true; // Unknown modifier.
case 'y': // A memory reference for an X-form instruction
{
const char *RegName = "r0";
if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
O << RegName << ", ";
printOperand(MI, OpNo, O);
return false;
}
}
}
assert(MI->getOperand(OpNo).isReg());
O << "0(";
printOperand(MI, OpNo, O);
O << ")";
return false;
}
/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
MCSymbol *&TOCEntry = TOC[Sym];
// To avoid name clash check if the name already exists.
while (TOCEntry == 0) {
if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
"C" + Twine(TOCLabelID++)) == 0) {
TOCEntry = GetTempSymbol("C", TOCLabelID);
}
}
return TOCEntry;
}
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
default: break;
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
case PPC::MovePCtoLR:
case PPC::MovePCtoLR8: {
// Transform %LR = MovePCtoLR
// Into this, where the label is the PIC base:
// bl L1$pb
// L1$pb:
MCSymbol *PICBase = MF->getPICBaseSymbol();
// Emit the 'bl'.
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL)
// FIXME: We would like an efficient form for this, so we don't have to do
// a lot of extra uniquing.
.addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
// Emit the label.
OutStreamer.EmitLabel(PICBase);
return;
}
case PPC::LDtocJTI:
case PPC::LDtocCPT:
case PPC::LDtoc: {
// Transform %X3 = LDtoc <ga:@min1>, %X2
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
// Change the opcode to LD, and the global address operand to be a
// reference to the TOC entry we will synthesize later.
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
// Map symbol -> label of TOC entry
assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
MCSymbol *MOSymbol = 0;
if (MO.isGlobal())
MOSymbol = getSymbol(MO.getGlobal());
else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
return;
}
case PPC::ADDIStocHA: {
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
// Change the opcode to ADDIS8. If the global address is external,
// has common linkage, is a function address, or is a jump table
// address, then generate a TOC entry and reference that. Otherwise
// reference the symbol directly.
TmpInst.setOpcode(PPC::ADDIS8);
const MachineOperand &MO = MI->getOperand(2);
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
"Invalid operand for ADDIStocHA!");
MCSymbol *MOSymbol = 0;
bool IsExternal = false;
bool IsFunction = false;
bool IsCommon = false;
bool IsAvailExt = false;
if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
IsCommon = GVar && RealGValue->hasCommonLinkage();
IsFunction = !GVar;
IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() ||
TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
OutContext);
TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
return;
}
case PPC::LDtocL: {
// Transform %Xd = LDtocL <ga:@sym>, %Xs
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
// Change the opcode to LD. If the global address is external, has
// common linkage, or is a jump table address, then reference the
// associated TOC entry. Otherwise reference the symbol directly.
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
"Invalid operand for LDtocL!");
MCSymbol *MOSymbol = 0;
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
if (TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
RealGValue->hasAvailableExternallyLinkage() ||
TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
return;
}
case PPC::ADDItocL: {
// Transform %Xd = ADDItocL %Xs, <ga:@sym>
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
// Change the opcode to ADDI8. If the global address is external, then
// generate a TOC entry and reference that. Otherwise reference the
// symbol directly.
TmpInst.setOpcode(PPC::ADDI8);
const MachineOperand &MO = MI->getOperand(2);
assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
MCSymbol *MOSymbol = 0;
bool IsExternal = false;
bool IsFunction = false;
if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
IsFunction = !GVar;
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
OutContext);
TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
return;
}
case PPC::ADDISgotTprelHA: {
// Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
// Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X2)
.addExpr(SymGotTprel));
return;
}
case PPC::LDgotTprelL: {
// Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
// Change the opcode to LD.
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
return;
}
case PPC::ADDIStlsgdHA: {
// Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
// Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X2)
.addExpr(SymGotTlsGD));
return;
}
case PPC::ADDItlsgdL: {
// Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsGD));
return;
}
case PPC::GETtlsADDR: {
// Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
// Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd)
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
StringRef Name = "__tls_get_addr";
MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
const MCSymbolRefExpr *TlsRef =
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
}
case PPC::ADDIStlsldHA: {
// Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
// Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X2)
.addExpr(SymGotTlsLD));
return;
}
case PPC::ADDItlsldL: {
// Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsLD));
return;
}
case PPC::GETtlsldADDR: {
// Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
// Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld)
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
StringRef Name = "__tls_get_addr";
MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
const MCSymbolRefExpr *TlsRef =
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
}
case PPC::ADDISdtprelHA: {
// Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
// Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X3)
.addExpr(SymDtprel));
return;
}
case PPC::ADDIdtprelL: {
// Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@dtprel@l
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymDtprel));
return;
}
case PPC::MFOCRF:
case PPC::MFOCRF8:
if (!Subtarget.hasMFOCRF()) {
// Transform: %R3 = MFOCRF %CR7
// Into: %R3 = MFCR ;; cr7
unsigned NewOpcode =
MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8;
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(1).getReg()));
OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode)
.addReg(MI->getOperand(0).getReg()));
return;
}
break;
case PPC::MTOCRF:
case PPC::MTOCRF8:
if (!Subtarget.hasMFOCRF()) {
// Transform: %CR7 = MTOCRF %R3
// Into: MTCRF mask, %R3 ;; cr7
unsigned NewOpcode =
MI->getOpcode() == PPC::MTOCRF ? PPC::MTCRF : PPC::MTCRF8;
unsigned Mask = 0x80 >> OutContext.getRegisterInfo()
->getEncodingValue(MI->getOperand(0).getReg());
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(0).getReg()));
OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode)
.addImm(Mask)
.addReg(MI->getOperand(1).getReg()));
return;
}
break;
- case PPC::SYNC:
- // In Book E sync is called msync, handle this special case here...
- if (Subtarget.isBookE()) {
- OutStreamer.EmitRawText(StringRef("\tmsync"));
- return;
- }
- break;
case PPC::LD:
case PPC::STD:
case PPC::LWA_32:
case PPC::LWA: {
// Verify alignment is legal, so we don't create relocations
// that can't be supported.
// FIXME: This test is currently disabled for Darwin. The test
// suite shows a handful of test cases that fail this check for
// Darwin. Those need to be investigated before this sanity test
// can be enabled for those subtargets.
if (!Subtarget.isDarwin()) {
unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
}
// Now process the instruction normally.
break;
}
}
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
OutStreamer.EmitInstruction(TmpInst);
}
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label.
return AsmPrinter::EmitFunctionEntryLabel();
// Emit an official procedure descriptor.
MCSectionSubPair Current = OutStreamer.getCurrentSection();
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
SectionKind::getReadOnly());
OutStreamer.SwitchSection(Section);
OutStreamer.EmitLabel(CurrentFnSym);
OutStreamer.EmitValueToAlignment(8);
MCSymbol *Symbol1 =
OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
8 /*size*/);
MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
8/*size*/);
// Emit a null environment pointer.
OutStreamer.EmitIntValue(0, 8 /* size */);
OutStreamer.SwitchSection(Current.first, Current.second);
MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
".L." + Twine(CurrentFnSym->getName()));
OutStreamer.EmitLabel(RealFnSym);
CurrentFnSymForSize = RealFnSym;
}
bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
const DataLayout *TD = TM.getDataLayout();
bool isPPC64 = TD->getPointerSizeInBits() == 64;
PPCTargetStreamer &TS =
static_cast<PPCTargetStreamer &>(OutStreamer.getTargetStreamer());
if (isPPC64 && !TOC.empty()) {
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
SectionKind::getReadOnly());
OutStreamer.SwitchSection(Section);
for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
E = TOC.end(); I != E; ++I) {
OutStreamer.EmitLabel(I->second);
MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
TS.emitTCEntry(*S);
}
}
MachineModuleInfoELF &MMIELF =
MMI->getObjFileInfo<MachineModuleInfoELF>();
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
OutStreamer.EmitLabel(Stubs[i].first);
// .long _foo
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
OutContext),
isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
}
return AsmPrinter::doFinalization(M);
}
/// EmitFunctionBodyEnd - Print the traceback table before the .size
/// directive.
///
void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
// Only the 64-bit target requires a traceback table. For now,
// we only emit the word of zeroes that GDB requires to find
// the end of the function, and zeroes for the eight-byte
// mandatory fields.
// FIXME: We should fill in the eight-byte mandatory fields as described in
// the PPC64 ELF ABI (this is a low-priority item because GDB does not
// currently make use of these fields).
if (Subtarget.isPPC64()) {
OutStreamer.EmitIntValue(0, 4/*size*/);
OutStreamer.EmitIntValue(0, 8/*size*/);
}
}
void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
static const char *const CPUDirectives[] = {
"",
"ppc",
"ppc440",
"ppc601",
"ppc602",
"ppc603",
"ppc7400",
"ppc750",
"ppc970",
"ppcA2",
"ppce500mc",
"ppce5500",
"power3",
"power4",
"power5",
"power5x",
"power6",
"power6x",
"power7",
"ppc64",
"ppc64le"
};
unsigned Directive = Subtarget.getDarwinDirective();
if (Subtarget.hasMFOCRF() && Directive < PPC::DIR_970)
Directive = PPC::DIR_970;
if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
Directive = PPC::DIR_7400;
if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
Directive = PPC::DIR_64;
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
// FIXME: This is a total hack, finish mc'izing the PPC backend.
if (OutStreamer.hasRawTextSupport()) {
assert(Directive < array_lengthof(CPUDirectives) &&
"CPUDirectives[] might not be up-to-date!");
OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
}
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
if (TM.getRelocationModel() == Reloc::PIC_) {
OutStreamer.SwitchSection(
OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
MCSectionMachO::S_SYMBOL_STUBS |
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
32, SectionKind::getText()));
} else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
OutStreamer.SwitchSection(
OutContext.getMachOSection("__TEXT","__symbol_stub1",
MCSectionMachO::S_SYMBOL_STUBS |
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
16, SectionKind::getText()));
}
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
}
static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
// Remove $stub suffix, add $lazy_ptr.
StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
}
static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
// Add $tmp suffix to $stub, yielding $stub$tmp.
return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
}
void PPCDarwinAsmPrinter::
EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
bool isDarwin = Subtarget.isDarwin();
const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
// .lazy_symbol_pointer
const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
// Output stubs for dynamically-linked functions
if (TM.getRelocationModel() == Reloc::PIC_) {
const MCSection *StubSection =
OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
MCSectionMachO::S_SYMBOL_STUBS |
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
32, SectionKind::getText());
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.SwitchSection(StubSection);
EmitAlignment(4);
MCSymbol *Stub = Stubs[i].first;
MCSymbol *RawSym = Stubs[i].second.getPointer();
MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
OutStreamer.EmitLabel(Stub);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
const MCExpr *Sub =
MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext);
// mflr r0
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
// bcl 20, 31, AnonSymbol
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon));
OutStreamer.EmitLabel(AnonSymbol);
// mflr r11
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
// addis r11, r11, ha16(LazyPtr - AnonSymbol)
const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, isDarwin, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
.addReg(PPC::R11)
.addReg(PPC::R11)
.addExpr(SubHa16));
// mtlr r0
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
// ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
// lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, isDarwin, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
.addReg(PPC::R12)
.addExpr(SubLo16).addExpr(SubLo16)
.addReg(PPC::R11));
// mtctr r12
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
// bctr
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
MCSymbol *DyldStubBindingHelper =
OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
if (isPPC64) {
// .quad dyld_stub_binding_helper
OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
} else {
// .long dyld_stub_binding_helper
OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
}
}
OutStreamer.AddBlankLine();
return;
}
const MCSection *StubSection =
OutContext.getMachOSection("__TEXT","__symbol_stub1",
MCSectionMachO::S_SYMBOL_STUBS |
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
16, SectionKind::getText());
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
MCSymbol *Stub = Stubs[i].first;
MCSymbol *RawSym = Stubs[i].second.getPointer();
MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
OutStreamer.SwitchSection(StubSection);
EmitAlignment(4);
OutStreamer.EmitLabel(Stub);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
// lis r11, ha16(LazyPtr)
const MCExpr *LazyPtrHa16 =
PPCMCExpr::CreateHa(LazyPtrExpr, isDarwin, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
.addReg(PPC::R11)
.addExpr(LazyPtrHa16));
// ldu r12, lo16(LazyPtr)(r11)
// lwzu r12, lo16(LazyPtr)(r11)
const MCExpr *LazyPtrLo16 =
PPCMCExpr::CreateLo(LazyPtrExpr, isDarwin, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
.addReg(PPC::R12)
.addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
.addReg(PPC::R11));
// mtctr r12
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
// bctr
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
MCSymbol *DyldStubBindingHelper =
OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
if (isPPC64) {
// .quad dyld_stub_binding_helper
OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
} else {
// .long dyld_stub_binding_helper
OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
}
}
OutStreamer.AddBlankLine();
}
bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
// Darwin/PPC always uses mach-o.
const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
MachineModuleInfoMachO &MMIMacho =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
if (!Stubs.empty())
EmitFunctionStubs(Stubs);
if (MAI->doesSupportExceptionHandling() && MMI) {
// Add the (possibly multiple) personalities to the set of global values.
// Only referenced functions get into the Personalities list.
const std::vector<const Function*> &Personalities = MMI->getPersonalities();
for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
E = Personalities.end(); I != E; ++I) {
if (*I) {
MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMIMacho.getGVStubEntry(NLPSym);
StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
}
}
}
// Output stubs for dynamically-linked functions.
Stubs = MMIMacho.GetGVStubList();
// Output macho stubs for external and common global variables.
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
EmitAlignment(isPPC64 ? 3 : 2);
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
OutStreamer.EmitLabel(Stubs[i].first);
// .indirect_symbol _foo
MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
if (MCSym.getInt())
// External to current translation unit.
OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
else
// Internal to current translation unit.
//
// When we place the LSDA into the TEXT section, the type info pointers
// need to be indirect and pc-rel. We accomplish this by using NLPs.
// However, sometimes the types are local to the file. So we need to
// fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
}
Stubs = MMIMacho.GetHiddenGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
EmitAlignment(isPPC64 ? 3 : 2);
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
OutStreamer.EmitLabel(Stubs[i].first);
// .long _foo
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
OutContext),
isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
}
// Funny Darwin hack: This flag tells the linker that no global symbols
// contain code that falls through to other global symbols (e.g. the obvious
// implementation of multiple entry points). If this doesn't occur, the
// linker can safely perform dead code stripping. Since LLVM never generates
// code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
return AsmPrinter::doFinalization(M);
}
/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
/// for a MachineFunction to the given output stream, in a format that the
/// Darwin assembler can deal with.
///
static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
MCStreamer &Streamer) {
const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
if (Subtarget->isDarwin())
return new PPCDarwinAsmPrinter(tm, Streamer);
return new PPCLinuxAsmPrinter(tm, Streamer);
}
// Force static initialization.
extern "C" void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
}
Index: head/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp (revision 265925)
@@ -1,649 +1,654 @@
//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass identifies loops where we can generate the PPC branch instructions
// that decrement and test the count register (CTR) (bdnz and friends).
//
// The pattern that defines the induction variable can changed depending on
// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
// normalizes induction variables, and the Loop Strength Reduction pass
// run by 'llc' may also make changes to the induction variable.
//
// Criteria for CTR loops:
// - Countable loops (w/ ind. var for a trip count)
// - Try inner-most loops first
// - No nested CTR loops.
// - No function calls in loops.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ctrloops"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "PPCTargetMachine.h"
#include "PPC.h"
#ifndef NDEBUG
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#endif
#include <algorithm>
#include <vector>
using namespace llvm;
#ifndef NDEBUG
static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
#endif
STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
namespace llvm {
void initializePPCCTRLoopsPass(PassRegistry&);
#ifndef NDEBUG
void initializePPCCTRLoopsVerifyPass(PassRegistry&);
#endif
}
namespace {
struct PPCCTRLoops : public FunctionPass {
#ifndef NDEBUG
static int Counter;
#endif
public:
static char ID;
PPCCTRLoops() : FunctionPass(ID), TM(0) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
AU.addRequired<ScalarEvolution>();
}
private:
bool mightUseCTR(const Triple &TT, BasicBlock *BB);
bool convertToCTRLoop(Loop *L);
private:
PPCTargetMachine *TM;
LoopInfo *LI;
ScalarEvolution *SE;
DataLayout *TD;
DominatorTree *DT;
const TargetLibraryInfo *LibInfo;
};
char PPCCTRLoops::ID = 0;
#ifndef NDEBUG
int PPCCTRLoops::Counter = 0;
#endif
#ifndef NDEBUG
struct PPCCTRLoopsVerify : public MachineFunctionPass {
public:
static char ID;
PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
virtual bool runOnMachineFunction(MachineFunction &MF);
private:
MachineDominatorTree *MDT;
};
char PPCCTRLoopsVerify::ID = 0;
#endif // NDEBUG
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
return new PPCCTRLoops(TM);
}
#ifndef NDEBUG
INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
"PowerPC CTR Loops Verify", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
"PowerPC CTR Loops Verify", false, false)
FunctionPass *llvm::createPPCCTRLoopsVerify() {
return new PPCCTRLoopsVerify();
}
#endif // NDEBUG
bool PPCCTRLoops::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<DataLayout>();
LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
bool MadeChange = false;
for (LoopInfo::iterator I = LI->begin(), E = LI->end();
I != E; ++I) {
Loop *L = *I;
if (!L->getParentLoop())
MadeChange |= convertToCTRLoop(L);
}
return MadeChange;
}
+static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ return ITy->getBitWidth() > (Is32Bit ? 32 : 64);
+
+ return false;
+}
+
bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
if (CallInst *CI = dyn_cast<CallInst>(J)) {
if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
// Inline ASM is okay, unless it clobbers the ctr register.
InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
InlineAsm::ConstraintInfo &C = CIV[i];
if (C.Type != InlineAsm::isInput)
for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
return true;
}
continue;
}
if (!TM)
return true;
const TargetLowering *TLI = TM->getTargetLowering();
if (Function *F = CI->getCalledFunction()) {
// Most intrinsics don't become function calls, but some might.
// sin, cos, exp and log are always calls.
unsigned Opcode;
if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
switch (F->getIntrinsicID()) {
default: continue;
// VisualStudio defines setjmp as _setjmp
#if defined(_MSC_VER) && defined(setjmp) && \
!defined(setjmp_undefined_for_msvc)
# pragma push_macro("setjmp")
# undef setjmp
# define setjmp_undefined_for_msvc
#endif
case Intrinsic::setjmp:
#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
// let's return it to _setjmp state
# pragma pop_macro("setjmp")
# undef setjmp_undefined_for_msvc
#endif
case Intrinsic::longjmp:
// Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
// because, although it does clobber the counter register, the
// control can't then return to inside the loop unless there is also
// an eh_sjlj_setjmp.
case Intrinsic::eh_sjlj_setjmp:
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
case Intrinsic::powi:
case Intrinsic::log:
case Intrinsic::log2:
case Intrinsic::log10:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::pow:
case Intrinsic::sin:
case Intrinsic::cos:
return true;
case Intrinsic::copysign:
if (CI->getArgOperand(0)->getType()->getScalarType()->
isPPC_FP128Ty())
return true;
else
continue; // ISD::FCOPYSIGN is never a library call.
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
case Intrinsic::round: Opcode = ISD::FROUND; break;
}
}
// PowerPC does not use [US]DIVREM or other library calls for
// operations on regular types which are not otherwise library calls
// (i.e. soft float or atomics). If adapting for targets that do,
// additional care is required here.
LibFunc::Func Func;
if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
LibInfo->getLibFunc(F->getName(), Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
// Non-read-only functions are never treated as intrinsics.
if (!CI->onlyReadsMemory())
return true;
// Conversion happens only for FP calls.
if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
return true;
switch (Func) {
default: return true;
case LibFunc::copysign:
case LibFunc::copysignf:
continue; // ISD::FCOPYSIGN is never a library call.
case LibFunc::copysignl:
return true;
case LibFunc::fabs:
case LibFunc::fabsf:
case LibFunc::fabsl:
continue; // ISD::FABS is never a library call.
case LibFunc::sqrt:
case LibFunc::sqrtf:
case LibFunc::sqrtl:
Opcode = ISD::FSQRT; break;
case LibFunc::floor:
case LibFunc::floorf:
case LibFunc::floorl:
Opcode = ISD::FFLOOR; break;
case LibFunc::nearbyint:
case LibFunc::nearbyintf:
case LibFunc::nearbyintl:
Opcode = ISD::FNEARBYINT; break;
case LibFunc::ceil:
case LibFunc::ceilf:
case LibFunc::ceill:
Opcode = ISD::FCEIL; break;
case LibFunc::rint:
case LibFunc::rintf:
case LibFunc::rintl:
Opcode = ISD::FRINT; break;
case LibFunc::round:
case LibFunc::roundf:
case LibFunc::roundl:
Opcode = ISD::FROUND; break;
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
Opcode = ISD::FTRUNC; break;
}
MVT VTy =
TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
if (VTy == MVT::Other)
return true;
if (TLI->isOperationLegalOrCustom(Opcode, VTy))
continue;
else if (VTy.isVector() &&
TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
continue;
return true;
}
}
return true;
} else if (isa<BinaryOperator>(J) &&
J->getType()->getScalarType()->isPPC_FP128Ty()) {
// Most operations on ppc_f128 values become calls.
return true;
} else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
CastInst *CI = cast<CastInst>(J);
if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- (TT.isArch32Bit() &&
- (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
- CI->getDestTy()->getScalarType()->isIntegerTy(64))
- ))
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
return true;
- } else if (TT.isArch32Bit() &&
- J->getType()->getScalarType()->isIntegerTy(64) &&
+ } else if (isLargeIntegerTy(TT.isArch32Bit(),
+ J->getType()->getScalarType()) &&
(J->getOpcode() == Instruction::UDiv ||
J->getOpcode() == Instruction::SDiv ||
J->getOpcode() == Instruction::URem ||
J->getOpcode() == Instruction::SRem)) {
return true;
} else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
// On PowerPC, indirect jumps use the counter register.
return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
if (!TM)
return true;
const TargetLowering *TLI = TM->getTargetLowering();
if (TLI->supportJumpTables() &&
SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
return true;
}
}
return false;
}
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
bool MadeChange = false;
Triple TT = Triple(L->getHeader()->getParent()->getParent()->
getTargetTriple());
if (!TT.isArch32Bit() && !TT.isArch64Bit())
return MadeChange; // Unknown arch. type.
// Process nested loops first.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
MadeChange |= convertToCTRLoop(*I);
}
// If a nested loop has been converted, then we can't convert this loop.
if (MadeChange)
return MadeChange;
#ifndef NDEBUG
// Stop trying after reaching the limit (if any).
int Limit = CTRLoopLimit;
if (Limit >= 0) {
if (Counter >= CTRLoopLimit)
return false;
Counter++;
}
#endif
// We don't want to spill/restore the counter register, and so we don't
// want to use the counter register if the loop contains calls.
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
I != IE; ++I)
if (mightUseCTR(TT, *I))
return MadeChange;
SmallVector<BasicBlock*, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
BasicBlock *CountedExitBlock = 0;
const SCEV *ExitCount = 0;
BranchInst *CountedExitBranch = 0;
for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
IE = ExitingBlocks.end(); I != IE; ++I) {
const SCEV *EC = SE->getExitCount(L, *I);
DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
(*I)->getName() << ": " << *EC << "\n");
if (isa<SCEVCouldNotCompute>(EC))
continue;
if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
if (ConstEC->getValue()->isZero())
continue;
} else if (!SE->isLoopInvariant(EC, L))
continue;
if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
continue;
// We now have a loop-invariant count of loop iterations (which is not the
// constant zero) for which we know that this loop will not exit via this
// exisiting block.
// We need to make sure that this block will run on every loop iteration.
// For this to be true, we must dominate all blocks with backedges. Such
// blocks are in-loop predecessors to the header block.
bool NotAlways = false;
for (pred_iterator PI = pred_begin(L->getHeader()),
PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
if (!L->contains(*PI))
continue;
if (!DT->dominates(*I, *PI)) {
NotAlways = true;
break;
}
}
if (NotAlways)
continue;
// Make sure this blocks ends with a conditional branch.
Instruction *TI = (*I)->getTerminator();
if (!TI)
continue;
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (!BI->isConditional())
continue;
CountedExitBranch = BI;
} else
continue;
// Note that this block may not be the loop latch block, even if the loop
// has a latch block.
CountedExitBlock = *I;
ExitCount = EC;
break;
}
if (!CountedExitBlock)
return MadeChange;
BasicBlock *Preheader = L->getLoopPreheader();
// If we don't have a preheader, then insert one. If we already have a
// preheader, then we can use it (except if the preheader contains a use of
// the CTR register because some such uses might be reordered by the
// selection DAG after the mtctr instruction).
if (!Preheader || mightUseCTR(TT, Preheader))
Preheader = InsertPreheaderForLoop(L, this);
if (!Preheader)
return MadeChange;
DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
// Insert the count into the preheader and replace the condition used by the
// selected branch.
MadeChange = true;
SCEVExpander SCEVE(*SE, "loopcnt");
LLVMContext &C = SE->getContext();
Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
Type::getInt32Ty(C);
if (!ExitCount->getType()->isPointerTy() &&
ExitCount->getType() != CountType)
ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
ExitCount = SE->getAddExpr(ExitCount,
SE->getConstant(CountType, 1));
Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
Preheader->getTerminator());
IRBuilder<> CountBuilder(Preheader->getTerminator());
Module *M = Preheader->getParent()->getParent();
Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
CountType);
CountBuilder.CreateCall(MTCTRFunc, ECValue);
IRBuilder<> CondBuilder(CountedExitBranch);
Value *DecFunc =
Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
Value *NewCond = CondBuilder.CreateCall(DecFunc);
Value *OldCond = CountedExitBranch->getCondition();
CountedExitBranch->setCondition(NewCond);
// The false branch must exit the loop.
if (!L->contains(CountedExitBranch->getSuccessor(0)))
CountedExitBranch->swapSuccessors();
// The old condition may be dead now, and may have even created a dead PHI
// (the original induction variable).
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
DeleteDeadPHIs(CountedExitBlock);
++NumCTRLoops;
return MadeChange;
}
#ifndef NDEBUG
static bool clobbersCTR(const MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
return true;
} else if (MO.isRegMask()) {
if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
return true;
}
}
return false;
}
static bool verifyCTRBranch(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator BI = I;
SmallSet<MachineBasicBlock *, 16> Visited;
SmallVector<MachineBasicBlock *, 8> Preds;
bool CheckPreds;
if (I == MBB->begin()) {
Visited.insert(MBB);
goto queue_preds;
} else
--I;
check_block:
Visited.insert(MBB);
if (I == MBB->end())
goto queue_preds;
CheckPreds = true;
for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
unsigned Opc = I->getOpcode();
if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
CheckPreds = false;
break;
}
if (I != BI && clobbersCTR(I)) {
DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
MBB->getFullName() << ") instruction " << *I <<
" clobbers CTR, invalidating " << "BB#" <<
BI->getParent()->getNumber() << " (" <<
BI->getParent()->getFullName() << ") instruction " <<
*BI << "\n");
return false;
}
if (I == IE)
break;
}
if (!CheckPreds && Preds.empty())
return true;
if (CheckPreds) {
queue_preds:
if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" <<
BI->getParent()->getNumber() << " (" <<
BI->getParent()->getFullName() << ") instruction " <<
*BI << "\n");
return false;
}
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
PIE = MBB->pred_end(); PI != PIE; ++PI)
Preds.push_back(*PI);
}
do {
MBB = Preds.pop_back_val();
if (!Visited.count(MBB)) {
I = MBB->getLastNonDebugInstr();
goto check_block;
}
} while (!Preds.empty());
return true;
}
bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
MDT = &getAnalysis<MachineDominatorTree>();
// Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
// any other instructions that might clobber the ctr register.
for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
I != IE; ++I) {
MachineBasicBlock *MBB = I;
if (!MDT->isReachableFromEntry(MBB))
continue;
for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
MIIE = MBB->end(); MII != MIIE; ++MII) {
unsigned Opc = MII->getOpcode();
if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
Opc == PPC::BDZ8 || Opc == PPC::BDZ)
if (!verifyCTRBranch(MBB, MII))
llvm_unreachable("Invalid PPC CTR loop!");
}
}
return false;
}
#endif // NDEBUG
Index: head/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp (revision 265925)
@@ -1,2236 +1,2238 @@
//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the PowerPC-specific support for the FastISel class. Some
// of the target-specific code is generated by tablegen in the file
// PPCGenFastISel.inc, which is #included here.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ppcfastisel"
#include "PPC.h"
#include "PPCISelLowering.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
//===----------------------------------------------------------------------===//
//
// TBD:
// FastLowerArguments: Handle simple cases.
// PPCMaterializeGV: Handle TLS.
// SelectCall: Handle function pointers.
// SelectCall: Handle multi-register return values.
// SelectCall: Optimize away nops for local calls.
// processCallArgs: Handle bit-converted arguments.
// finishCall: Handle multi-register return values.
// PPCComputeAddress: Handle parameter references as FrameIndex's.
// PPCEmitCmp: Handle immediate as operand 1.
// SelectCall: Handle small byval arguments.
// SelectIntrinsicCall: Implement.
// SelectSelect: Implement.
// Consider factoring isTypeLegal into the base class.
// Implement switches and jump tables.
//
//===----------------------------------------------------------------------===//
using namespace llvm;
namespace {
typedef struct Address {
enum {
RegBase,
FrameIndexBase
} BaseType;
union {
unsigned Reg;
int FI;
} Base;
long Offset;
// Innocuous defaults for our address.
Address()
: BaseType(RegBase), Offset(0) {
Base.Reg = 0;
}
} Address;
class PPCFastISel : public FastISel {
const TargetMachine &TM;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
const PPCSubtarget &PPCSubTarget;
LLVMContext *Context;
public:
explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo),
TM(FuncInfo.MF->getTarget()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
PPCSubTarget(
*((static_cast<const PPCTargetMachine *>(&TM))->getSubtargetImpl())
),
Context(&FuncInfo.Fn->getContext()) { }
// Backend specific FastISel code.
private:
virtual bool TargetSelectInstruction(const Instruction *I);
virtual unsigned TargetMaterializeConstant(const Constant *C);
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
virtual bool FastLowerArguments();
virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm);
virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm);
virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill);
virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
// Instruction selection routines.
private:
bool SelectLoad(const Instruction *I);
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
bool SelectIndirectBr(const Instruction *I);
bool SelectCmp(const Instruction *I);
bool SelectFPExt(const Instruction *I);
bool SelectFPTrunc(const Instruction *I);
bool SelectIToFP(const Instruction *I, bool IsSigned);
bool SelectFPToI(const Instruction *I, bool IsSigned);
bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
bool SelectCall(const Instruction *I);
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
// Utility routines.
private:
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt, unsigned DestReg);
bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
const TargetRegisterClass *RC, bool IsZExt = true,
unsigned FP64LoadOpc = PPC::LFD);
bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
bool PPCComputeAddress(const Value *Obj, Address &Addr);
void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
unsigned &IndexReg);
bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg, bool IsZExt);
unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
unsigned PPCMaterializeInt(const Constant *C, MVT VT);
unsigned PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMaterialize64BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
unsigned SrcReg, bool IsSigned);
unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
// Call handling routines.
private:
bool processCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC,
unsigned &NumBytes,
bool IsVarArg);
void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes, bool IsVarArg);
CCAssignFn *usePPC32CCs(unsigned Flag);
private:
#include "PPCGenFastISel.inc"
};
} // end anonymous namespace
#include "PPCGenCallingConv.inc"
// Function whose sole purpose is to kill compiler warnings
// stemming from unused functions included from PPCGenCallingConv.inc.
CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
if (Flag == 1)
return CC_PPC32_SVR4;
else if (Flag == 2)
return CC_PPC32_SVR4_ByVal;
else if (Flag == 3)
return CC_PPC32_SVR4_VarArg;
else
return RetCC_PPC;
}
static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
switch (Pred) {
// These are not representable with any single compare.
case CmpInst::FCMP_FALSE:
case CmpInst::FCMP_UEQ:
case CmpInst::FCMP_UGT:
case CmpInst::FCMP_UGE:
case CmpInst::FCMP_ULT:
case CmpInst::FCMP_ULE:
case CmpInst::FCMP_UNE:
case CmpInst::FCMP_TRUE:
default:
return Optional<PPC::Predicate>();
case CmpInst::FCMP_OEQ:
case CmpInst::ICMP_EQ:
return PPC::PRED_EQ;
case CmpInst::FCMP_OGT:
case CmpInst::ICMP_UGT:
case CmpInst::ICMP_SGT:
return PPC::PRED_GT;
case CmpInst::FCMP_OGE:
case CmpInst::ICMP_UGE:
case CmpInst::ICMP_SGE:
return PPC::PRED_GE;
case CmpInst::FCMP_OLT:
case CmpInst::ICMP_ULT:
case CmpInst::ICMP_SLT:
return PPC::PRED_LT;
case CmpInst::FCMP_OLE:
case CmpInst::ICMP_ULE:
case CmpInst::ICMP_SLE:
return PPC::PRED_LE;
case CmpInst::FCMP_ONE:
case CmpInst::ICMP_NE:
return PPC::PRED_NE;
case CmpInst::FCMP_ORD:
return PPC::PRED_NU;
case CmpInst::FCMP_UNO:
return PPC::PRED_UN;
}
}
// Determine whether the type Ty is simple enough to be handled by
// fast-isel, and return its equivalent machine type in VT.
// FIXME: Copied directly from ARM -- factor into base class?
bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT Evt = TLI.getValueType(Ty, true);
// Only handle simple types.
if (Evt == MVT::Other || !Evt.isSimple()) return false;
VT = Evt.getSimpleVT();
// Handle all legal types, i.e. a register that will directly hold this
// value.
return TLI.isTypeLegal(VT);
}
// Determine whether the type Ty is simple enough to be handled by
// fast-isel as a load target, and return its equivalent machine type in VT.
bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT)) return true;
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now.
if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
return true;
}
return false;
}
// Given a value Obj, create an Address object Addr that represents its
// address. Return false if we can't handle it.
bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
// another block, otherwise it may not have a virtual register assigned.
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
Opcode = I->getOpcode();
U = I;
}
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
Opcode = C->getOpcode();
U = C;
}
switch (Opcode) {
default:
break;
case Instruction::BitCast:
// Look through bitcasts.
return PPCComputeAddress(U->getOperand(0), Addr);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return PPCComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
return PPCComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
long TmpOffset = Addr.Offset;
// Iterate through the GEP folding the constants into offsets where
// we can.
gep_type_iterator GTI = gep_type_begin(U);
for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
II != IE; ++II, ++GTI) {
const Value *Op = *II;
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
for (;;) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
break;
}
if (canFoldAddIntoGEP(U, Op)) {
// A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
// Iterate on the other operand.
Op = cast<AddOperator>(Op)->getOperand(0);
continue;
}
// Unsupported
goto unsupported_gep;
}
}
}
// Try to grab the base operand now.
Addr.Offset = TmpOffset;
if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
// We failed, restore everything and try the other options.
Addr = SavedAddr;
unsupported_gep:
break;
}
case Instruction::Alloca: {
const AllocaInst *AI = cast<AllocaInst>(Obj);
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
Addr.BaseType = Address::FrameIndexBase;
Addr.Base.FI = SI->second;
return true;
}
break;
}
}
// FIXME: References to parameters fall through to the behavior
// below. They should be able to reference a frame index since
// they are stored to the stack, so we can get "ld rx, offset(r1)"
// instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
// just contain the parameter. Try to handle this with a FI.
// Try to get this in a register if nothing else has worked.
if (Addr.Base.Reg == 0)
Addr.Base.Reg = getRegForValue(Obj);
// Prevent assignment of base register to X0, which is inappropriate
// for loads and stores alike.
if (Addr.Base.Reg != 0)
MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
return Addr.Base.Reg != 0;
}
// Fix up some addresses that can't be used directly. For example, if
// an offset won't fit in an instruction field, we may need to move it
// into an index register.
void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
unsigned &IndexReg) {
// Check whether the offset fits in the instruction field.
if (!isInt<16>(Addr.Offset))
UseOffset = false;
// If this is a stack pointer and the offset needs to be simplified then
// put the alloca address into a register, set the base type back to
// register and continue. This should almost never happen.
if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
Addr.Base.Reg = ResultReg;
Addr.BaseType = Address::RegBase;
}
if (!UseOffset) {
IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context)
: Type::getInt64Ty(*Context));
const ConstantInt *Offset =
ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
IndexReg = PPCMaterializeInt(Offset, MVT::i64);
assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
}
}
// Emit a load instruction if possible, returning true if we succeeded,
// otherwise false. See commentary below for how the register class of
// the load is determined.
bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
const TargetRegisterClass *RC,
bool IsZExt, unsigned FP64LoadOpc) {
unsigned Opc;
bool UseOffset = true;
// If ResultReg is given, it determines the register class of the load.
// Otherwise, RC is the register class to use. If the result of the
// load isn't anticipated in this block, both may be zero, in which
// case we must make a conservative guess. In particular, don't assign
// R0 or X0 to the result register, as the result may be used in a load,
// store, add-immediate, or isel that won't permit this. (Though
// perhaps the spill and reload of live-exit values would handle this?)
const TargetRegisterClass *UseRC =
(ResultReg ? MRI.getRegClass(ResultReg) :
(RC ? RC :
(VT == MVT::f64 ? &PPC::F8RCRegClass :
(VT == MVT::f32 ? &PPC::F4RCRegClass :
(VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
&PPC::GPRC_and_GPRC_NOR0RegClass)))));
bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
switch (VT.SimpleTy) {
default: // e.g., vector types not handled
return false;
case MVT::i8:
Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
break;
case MVT::i16:
Opc = (IsZExt ?
(Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
(Is32BitInt ? PPC::LHA : PPC::LHA8));
break;
case MVT::i32:
Opc = (IsZExt ?
(Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
(Is32BitInt ? PPC::LWA_32 : PPC::LWA));
if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
UseOffset = false;
break;
case MVT::i64:
Opc = PPC::LD;
assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
"64-bit load with 32-bit target??");
UseOffset = ((Addr.Offset & 3) == 0);
break;
case MVT::f32:
Opc = PPC::LFS;
break;
case MVT::f64:
Opc = FP64LoadOpc;
break;
}
// If necessary, materialize the offset into a register and use
// the indexed form. Also handle stack pointers with special needs.
unsigned IndexReg = 0;
PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
if (ResultReg == 0)
ResultReg = createResultReg(UseRC);
// Note: If we still have a frame index here, we know the offset is
// in range, as otherwise PPCSimplifyAddress would have converted it
// into a RegBase.
if (Addr.BaseType == Address::FrameIndexBase) {
MachineMemOperand *MMO =
FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
// Base reg with offset in range.
} else if (UseOffset) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addReg(Addr.Base.Reg);
// Indexed form.
} else {
// Get the RR opcode corresponding to the RI one. FIXME: It would be
// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
// is hard to get at.
switch (Opc) {
default: llvm_unreachable("Unexpected opcode!");
case PPC::LBZ: Opc = PPC::LBZX; break;
case PPC::LBZ8: Opc = PPC::LBZX8; break;
case PPC::LHZ: Opc = PPC::LHZX; break;
case PPC::LHZ8: Opc = PPC::LHZX8; break;
case PPC::LHA: Opc = PPC::LHAX; break;
case PPC::LHA8: Opc = PPC::LHAX8; break;
case PPC::LWZ: Opc = PPC::LWZX; break;
case PPC::LWZ8: Opc = PPC::LWZX8; break;
case PPC::LWA: Opc = PPC::LWAX; break;
case PPC::LWA_32: Opc = PPC::LWAX_32; break;
case PPC::LD: Opc = PPC::LDX; break;
case PPC::LFS: Opc = PPC::LFSX; break;
case PPC::LFD: Opc = PPC::LFDX; break;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
.addReg(Addr.Base.Reg).addReg(IndexReg);
}
return true;
}
// Attempt to fast-select a load instruction.
bool PPCFastISel::SelectLoad(const Instruction *I) {
// FIXME: No atomic loads are supported.
if (cast<LoadInst>(I)->isAtomic())
return false;
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(I->getType(), VT))
return false;
// See if we can handle this address.
Address Addr;
if (!PPCComputeAddress(I->getOperand(0), Addr))
return false;
// Look at the currently assigned register for this instruction
// to determine the required register class. This is necessary
// to constrain RA from using R0/X0 when this is not legal.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
return false;
UpdateValueMap(I, ResultReg);
return true;
}
// Emit a store instruction to store SrcReg at Addr.
bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
assert(SrcReg && "Nothing to store!");
unsigned Opc;
bool UseOffset = true;
const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
switch (VT.SimpleTy) {
default: // e.g., vector types not handled
return false;
case MVT::i8:
Opc = Is32BitInt ? PPC::STB : PPC::STB8;
break;
case MVT::i16:
Opc = Is32BitInt ? PPC::STH : PPC::STH8;
break;
case MVT::i32:
assert(Is32BitInt && "Not GPRC for i32??");
Opc = PPC::STW;
break;
case MVT::i64:
Opc = PPC::STD;
UseOffset = ((Addr.Offset & 3) == 0);
break;
case MVT::f32:
Opc = PPC::STFS;
break;
case MVT::f64:
Opc = PPC::STFD;
break;
}
// If necessary, materialize the offset into a register and use
// the indexed form. Also handle stack pointers with special needs.
unsigned IndexReg = 0;
PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
// Note: If we still have a frame index here, we know the offset is
// in range, as otherwise PPCSimplifyAddress would have converted it
// into a RegBase.
if (Addr.BaseType == Address::FrameIndexBase) {
MachineMemOperand *MMO =
FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg)
.addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
// Base reg with offset in range.
} else if (UseOffset)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
.addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
// Indexed form.
else {
// Get the RR opcode corresponding to the RI one. FIXME: It would be
// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
// is hard to get at.
switch (Opc) {
default: llvm_unreachable("Unexpected opcode!");
case PPC::STB: Opc = PPC::STBX; break;
case PPC::STH : Opc = PPC::STHX; break;
case PPC::STW : Opc = PPC::STWX; break;
case PPC::STB8: Opc = PPC::STBX8; break;
case PPC::STH8: Opc = PPC::STHX8; break;
case PPC::STW8: Opc = PPC::STWX8; break;
case PPC::STD: Opc = PPC::STDX; break;
case PPC::STFS: Opc = PPC::STFSX; break;
case PPC::STFD: Opc = PPC::STFDX; break;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
.addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
}
return true;
}
// Attempt to fast-select a store instruction.
bool PPCFastISel::SelectStore(const Instruction *I) {
Value *Op0 = I->getOperand(0);
unsigned SrcReg = 0;
// FIXME: No atomics loads are supported.
if (cast<StoreInst>(I)->isAtomic())
return false;
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(Op0->getType(), VT))
return false;
// Get the value to be stored into a register.
SrcReg = getRegForValue(Op0);
if (SrcReg == 0)
return false;
// See if we can handle this address.
Address Addr;
if (!PPCComputeAddress(I->getOperand(1), Addr))
return false;
if (!PPCEmitStore(VT, SrcReg, Addr))
return false;
return true;
}
// Attempt to fast-select a branch instruction.
bool PPCFastISel::SelectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
MachineBasicBlock *BrBB = FuncInfo.MBB;
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
// For now, just try the simplest case where it's fed by a compare.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
if (!OptPPCPred)
return false;
PPC::Predicate PPCPred = OptPPCPred.getValue();
// Take advantage of fall-through opportunities.
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
PPCPred = PPC::InvertPredicate(PPCPred);
}
unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
CondReg))
return false;
BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC))
.addImm(PPCPred).addReg(CondReg).addMBB(TBB);
FastEmitBranch(FBB, DL);
FuncInfo.MBB->addSuccessor(TBB);
return true;
} else if (const ConstantInt *CI =
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
FastEmitBranch(Target, DL);
return true;
}
// FIXME: ARM looks for a case where the block containing the compare
// has been split from the block containing the branch. If this happens,
// there is a vreg available containing the result of the compare. I'm
// not sure we can do much, as we've lost the predicate information with
// the compare instruction -- we have a 4-bit CR but don't know which bit
// to test here.
return false;
}
// Attempt to emit a compare of the two source values. Signed and unsigned
// comparisons are supported. Return false if we can't handle it.
bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
bool IsZExt, unsigned DestReg) {
Type *Ty = SrcValue1->getType();
EVT SrcEVT = TLI.getValueType(Ty, true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
// See if operand 2 is an immediate encodeable in the compare.
// FIXME: Operands are not in canonical order at -O0, so an immediate
// operand in position 1 is a lost opportunity for now. We are
// similar to ARM in this regard.
long Imm = 0;
bool UseImm = false;
// Only 16-bit integer constants can be represented in compares for
// PowerPC. Others will be materialized into a register.
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
SrcVT == MVT::i8 || SrcVT == MVT::i1) {
const APInt &CIVal = ConstInt->getValue();
Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
UseImm = true;
}
}
unsigned CmpOpc;
bool NeedsExt = false;
switch (SrcVT.SimpleTy) {
default: return false;
case MVT::f32:
CmpOpc = PPC::FCMPUS;
break;
case MVT::f64:
CmpOpc = PPC::FCMPUD;
break;
case MVT::i1:
case MVT::i8:
case MVT::i16:
NeedsExt = true;
// Intentional fall-through.
case MVT::i32:
if (!UseImm)
CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
else
CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
break;
case MVT::i64:
if (!UseImm)
CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
else
CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
break;
}
unsigned SrcReg1 = getRegForValue(SrcValue1);
if (SrcReg1 == 0)
return false;
unsigned SrcReg2 = 0;
if (!UseImm) {
SrcReg2 = getRegForValue(SrcValue2);
if (SrcReg2 == 0)
return false;
}
if (NeedsExt) {
unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
return false;
SrcReg1 = ExtReg;
if (!UseImm) {
unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
return false;
SrcReg2 = ExtReg;
}
}
if (!UseImm)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
.addReg(SrcReg1).addReg(SrcReg2);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
.addReg(SrcReg1).addImm(Imm);
return true;
}
// Attempt to fast-select a floating-point extend instruction.
bool PPCFastISel::SelectFPExt(const Instruction *I) {
Value *Src = I->getOperand(0);
EVT SrcVT = TLI.getValueType(Src->getType(), true);
EVT DestVT = TLI.getValueType(I->getType(), true);
if (SrcVT != MVT::f32 || DestVT != MVT::f64)
return false;
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
// No code is generated for a FP extend.
UpdateValueMap(I, SrcReg);
return true;
}
// Attempt to fast-select a floating-point truncate instruction.
bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
Value *Src = I->getOperand(0);
EVT SrcVT = TLI.getValueType(Src->getType(), true);
EVT DestVT = TLI.getValueType(I->getType(), true);
if (SrcVT != MVT::f64 || DestVT != MVT::f32)
return false;
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
// Round the result to single precision.
unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg)
.addReg(SrcReg);
UpdateValueMap(I, DestReg);
return true;
}
// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
// FIXME: When direct register moves are implemented (see PowerISA 2.08),
// those should be used instead of moving via a stack slot when the
// subtarget permits.
// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
// case to 8 bytes which produces tighter code but wastes stack space.
unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
bool IsSigned) {
// If necessary, extend 32-bit int to 64-bit.
if (SrcVT == MVT::i32) {
unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
return 0;
SrcReg = TmpReg;
}
// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
Address Addr;
Addr.BaseType = Address::FrameIndexBase;
Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
// Store the value from the GPR.
if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
return 0;
// Load the integer value into an FPR. The kind of load used depends
// on a number of conditions.
unsigned LoadOpc = PPC::LFD;
if (SrcVT == MVT::i32) {
- Addr.Offset = 4;
- if (!IsSigned)
+ if (!IsSigned) {
LoadOpc = PPC::LFIWZX;
- else if (PPCSubTarget.hasLFIWAX())
+ Addr.Offset = 4;
+ } else if (PPCSubTarget.hasLFIWAX()) {
LoadOpc = PPC::LFIWAX;
+ Addr.Offset = 4;
+ }
}
const TargetRegisterClass *RC = &PPC::F8RCRegClass;
unsigned ResultReg = 0;
if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
return 0;
return ResultReg;
}
// Attempt to fast-select an integer-to-floating-point conversion.
bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
MVT DstVT;
Type *DstTy = I->getType();
if (!isTypeLegal(DstTy, DstVT))
return false;
if (DstVT != MVT::f32 && DstVT != MVT::f64)
return false;
Value *Src = I->getOperand(0);
EVT SrcEVT = TLI.getValueType(Src->getType(), true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
SrcVT != MVT::i32 && SrcVT != MVT::i64)
return false;
unsigned SrcReg = getRegForValue(Src);
if (SrcReg == 0)
return false;
// We can only lower an unsigned convert if we have the newer
// floating-point conversion operations.
if (!IsSigned && !PPCSubTarget.hasFPCVT())
return false;
// FIXME: For now we require the newer floating-point conversion operations
// (which are present only on P7 and A2 server models) when converting
// to single-precision float. Otherwise we have to generate a lot of
// fiddly code to avoid double rounding. If necessary, the fiddly code
// can be found in PPCTargetLowering::LowerINT_TO_FP().
if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT())
return false;
// Extend the input if necessary.
if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
return false;
SrcVT = MVT::i64;
SrcReg = TmpReg;
}
// Move the integer value to an FPR.
unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
if (FPReg == 0)
return false;
// Determine the opcode for the conversion.
const TargetRegisterClass *RC = &PPC::F8RCRegClass;
unsigned DestReg = createResultReg(RC);
unsigned Opc;
if (DstVT == MVT::f32)
Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
else
Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
// Generate the convert.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addReg(FPReg);
UpdateValueMap(I, DestReg);
return true;
}
// Move the floating-point value in SrcReg into an integer destination
// register, and return the register (or zero if we can't handle it).
// FIXME: When direct register moves are implemented (see PowerISA 2.08),
// those should be used instead of moving via a stack slot when the
// subtarget permits.
unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
unsigned SrcReg, bool IsSigned) {
// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
// Note that if have STFIWX available, we could use a 4-byte stack
// slot for i32, but this being fast-isel we'll just go with the
// easiest code gen possible.
Address Addr;
Addr.BaseType = Address::FrameIndexBase;
Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
// Store the value from the FPR.
if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
return 0;
// Reload it into a GPR. If we want an i32, modify the address
// to have a 4-byte offset so we load from the right place.
if (VT == MVT::i32)
Addr.Offset = 4;
// Look at the currently assigned register for this instruction
// to determine the required register class.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
return 0;
return ResultReg;
}
// Attempt to fast-select a floating-point-to-integer conversion.
bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
MVT DstVT, SrcVT;
Type *DstTy = I->getType();
if (!isTypeLegal(DstTy, DstVT))
return false;
if (DstVT != MVT::i32 && DstVT != MVT::i64)
return false;
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
if (!isTypeLegal(SrcTy, SrcVT))
return false;
if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
return false;
unsigned SrcReg = getRegForValue(Src);
if (SrcReg == 0)
return false;
// Convert f32 to f64 if necessary. This is just a meaningless copy
// to get the register class right. COPY_TO_REGCLASS is needed since
// a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
if (InRC == &PPC::F4RCRegClass) {
unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
.addReg(SrcReg).addImm(PPC::F8RCRegClassID);
SrcReg = TmpReg;
}
// Determine the opcode for the conversion, which takes place
// entirely within FPRs.
unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
unsigned Opc;
if (DstVT == MVT::i32)
if (IsSigned)
Opc = PPC::FCTIWZ;
else
Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
else
Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
// Generate the convert.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addReg(SrcReg);
// Now move the integer value from a float register to an integer register.
unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
if (IntReg == 0)
return false;
UpdateValueMap(I, IntReg);
return true;
}
// Attempt to fast-select a binary integer operation that isn't already
// handled automatically.
bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
EVT DestVT = TLI.getValueType(I->getType(), true);
// We can get here in the case when we have a binary operation on a non-legal
// type and the target independent selector doesn't know how to handle it.
if (DestVT != MVT::i16 && DestVT != MVT::i8)
return false;
// Look at the currently assigned register for this instruction
// to determine the required register class. If there is no register,
// make a conservative choice (don't assign R0).
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
(AssignedReg ? MRI.getRegClass(AssignedReg) :
&PPC::GPRC_and_GPRC_NOR0RegClass);
bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
unsigned Opc;
switch (ISDOpcode) {
default: return false;
case ISD::ADD:
Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
break;
case ISD::OR:
Opc = IsGPRC ? PPC::OR : PPC::OR8;
break;
case ISD::SUB:
Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
break;
}
unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
unsigned SrcReg1 = getRegForValue(I->getOperand(0));
if (SrcReg1 == 0) return false;
// Handle case of small immediate operand.
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
const APInt &CIVal = ConstInt->getValue();
int Imm = (int)CIVal.getSExtValue();
bool UseImm = true;
if (isInt<16>(Imm)) {
switch (Opc) {
default:
llvm_unreachable("Missing case!");
case PPC::ADD4:
Opc = PPC::ADDI;
MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
break;
case PPC::ADD8:
Opc = PPC::ADDI8;
MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
break;
case PPC::OR:
Opc = PPC::ORI;
break;
case PPC::OR8:
Opc = PPC::ORI8;
break;
case PPC::SUBF:
if (Imm == -32768)
UseImm = false;
else {
Opc = PPC::ADDI;
MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
Imm = -Imm;
}
break;
case PPC::SUBF8:
if (Imm == -32768)
UseImm = false;
else {
Opc = PPC::ADDI8;
MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
Imm = -Imm;
}
break;
}
if (UseImm) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
.addReg(SrcReg1).addImm(Imm);
UpdateValueMap(I, ResultReg);
return true;
}
}
}
// Reg-reg case.
unsigned SrcReg2 = getRegForValue(I->getOperand(1));
if (SrcReg2 == 0) return false;
// Reverse operands for subtract-from.
if (ISDOpcode == ISD::SUB)
std::swap(SrcReg1, SrcReg2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2);
UpdateValueMap(I, ResultReg);
return true;
}
// Handle arguments to a call that we're attempting to fast-select.
// Return false if the arguments are too complex for us at the moment.
bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC,
unsigned &NumBytes,
bool IsVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
// Bail out if we can't handle any of the arguments.
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
CCValAssign &VA = ArgLocs[I];
MVT ArgVT = ArgVTs[VA.getValNo()];
// Skip vector arguments for now, as well as long double and
// uint128_t, and anything that isn't passed in a register.
if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 ||
!VA.isRegLoc() || VA.needsCustom())
return false;
// Skip bit-converted arguments for now.
if (VA.getLocInfo() == CCValAssign::BCvt)
return false;
}
// Get a count of how many bytes are to be pushed onto the stack.
NumBytes = CCInfo.getNextStackOffset();
// Issue CALLSEQ_START.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TII.getCallFrameSetupOpcode()))
.addImm(NumBytes);
// Prepare to assign register arguments. Every argument uses up a
// GPR protocol register even if it's passed in a floating-point
// register.
unsigned NextGPR = PPC::X3;
unsigned NextFPR = PPC::F1;
// Process arguments.
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
CCValAssign &VA = ArgLocs[I];
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
// Handle argument promotion and bitcasts.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
llvm_unreachable("Failed to emit a sext!");
ArgVT = DestVT;
Arg = TmpReg;
break;
}
case CCValAssign::AExt:
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
llvm_unreachable("Failed to emit a zext!");
ArgVT = DestVT;
Arg = TmpReg;
break;
}
case CCValAssign::BCvt: {
// FIXME: Not yet handled.
llvm_unreachable("Should have bailed before getting here!");
break;
}
}
// Copy this argument to the appropriate register.
unsigned ArgReg;
if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
ArgReg = NextFPR++;
++NextGPR;
} else
ArgReg = NextGPR++;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
ArgReg).addReg(Arg);
RegArgs.push_back(ArgReg);
}
return true;
}
// For a call that we've determined we can fast-select, finish the
// call sequence and generate a copy to obtain the return value (if any).
void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes, bool IsVarArg) {
// Issue CallSEQ_END.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TII.getCallFrameDestroyOpcode()))
.addImm(NumBytes).addImm(0);
// Next, generate a copy to obtain the return value.
// FIXME: No multi-register return values yet, though I don't foresee
// any real difficulties there.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
CCValAssign &VA = RVLocs[0];
assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
assert(VA.isRegLoc() && "Can only return in registers!");
MVT DestVT = VA.getValVT();
MVT CopyVT = DestVT;
// Ints smaller than a register still arrive in a full 64-bit
// register, so make sure we recognize this.
if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
CopyVT = MVT::i64;
unsigned SourcePhysReg = VA.getLocReg();
unsigned ResultReg = 0;
if (RetVT == CopyVT) {
const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
ResultReg = createResultReg(CpyRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(SourcePhysReg);
// If necessary, round the floating result to single precision.
} else if (CopyVT == MVT::f64) {
ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP),
ResultReg).addReg(SourcePhysReg);
// If only the low half of a general register is needed, generate
// a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
// used along the fast-isel path (not lowered), and downstream logic
// also doesn't like a direct subreg copy on a physical reg.)
} else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
ResultReg = createResultReg(&PPC::GPRCRegClass);
// Convert physical register from G8RC to GPRC.
SourcePhysReg -= PPC::X0 - PPC::R0;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(SourcePhysReg);
}
assert(ResultReg && "ResultReg unset!");
UsedRegs.push_back(SourcePhysReg);
UpdateValueMap(I, ResultReg);
}
}
// Attempt to fast-select a call instruction.
bool PPCFastISel::SelectCall(const Instruction *I) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
// Can't handle inline asm.
if (isa<InlineAsm>(Callee))
return false;
// Allow SelectionDAG isel to handle tail calls.
if (CI->isTailCall())
return false;
// Obtain calling convention.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
bool IsVarArg = FTy->isVarArg();
// Not ready for varargs yet.
if (IsVarArg)
return false;
// Handle simple calls for now, with legal return types and
// those that can be extended.
Type *RetTy = I->getType();
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
RetVT != MVT::i8)
return false;
// FIXME: No multi-register return values yet.
if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
RetVT != MVT::f64) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
if (RVLocs.size() > 1)
return false;
}
// Bail early if more than 8 arguments, as we only currently
// handle arguments passed in registers.
unsigned NumArgs = CS.arg_size();
if (NumArgs > 8)
return false;
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
SmallVector<unsigned, 8> ArgRegs;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
Args.reserve(NumArgs);
ArgRegs.reserve(NumArgs);
ArgVTs.reserve(NumArgs);
ArgFlags.reserve(NumArgs);
for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
II != IE; ++II) {
// FIXME: ARM does something for intrinsic calls here, check into that.
unsigned AttrIdx = II - CS.arg_begin() + 1;
// Only handle easy calls for now. It would be reasonably easy
// to handle <= 8-byte structures passed ByVal in registers, but we
// have to ensure they are right-justified in the register.
if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
CS.paramHasAttr(AttrIdx, Attribute::ByVal))
return false;
ISD::ArgFlagsTy Flags;
if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
Flags.setSExt();
if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
Flags.setZExt();
Type *ArgTy = (*II)->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
return false;
if (ArgVT.isVector())
return false;
unsigned Arg = getRegForValue(*II);
if (Arg == 0)
return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
Args.push_back(*II);
ArgRegs.push_back(Arg);
ArgVTs.push_back(ArgVT);
ArgFlags.push_back(Flags);
}
// Process the arguments.
SmallVector<unsigned, 8> RegArgs;
unsigned NumBytes;
if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
RegArgs, CC, NumBytes, IsVarArg))
return false;
// FIXME: No handling for function pointers yet. This requires
// implementing the function descriptor (OPD) setup.
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
if (!GV)
return false;
// Build direct call with NOP for TOC restore.
// FIXME: We can and should optimize away the NOP for local calls.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(PPC::BL8_NOP));
// Add callee.
MIB.addGlobalAddress(GV);
// Add implicit physical register uses to the call.
for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
MIB.addReg(RegArgs[II], RegState::Implicit);
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);
// Set all unused physregs defs as dead.
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
return true;
}
// Attempt to fast-select a return instruction.
bool PPCFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
const Value *RV = Ret->getOperand(0);
// FIXME: Only one output register for now.
if (ValLocs.size() > 1)
return false;
// Special case for returning a constant integer of any size.
// Materialize the constant as an i64 and copy it to the return
// register. This avoids an unnecessary extend or truncate.
if (isa<ConstantInt>(*RV)) {
const Constant *C = cast<Constant>(RV);
unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
unsigned RetReg = ValLocs[0].getLocReg();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
RetReg).addReg(SrcReg);
RetRegs.push_back(RetReg);
} else {
unsigned Reg = getRegForValue(RV);
if (Reg == 0)
return false;
// Copy the result values into the output registers.
for (unsigned i = 0; i < ValLocs.size(); ++i) {
CCValAssign &VA = ValLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
RetRegs.push_back(VA.getLocReg());
unsigned SrcReg = Reg + VA.getValNo();
EVT RVEVT = TLI.getValueType(RV->getType());
if (!RVEVT.isSimple())
return false;
MVT RVVT = RVEVT.getSimpleVT();
MVT DestVT = VA.getLocVT();
if (RVVT != DestVT && RVVT != MVT::i8 &&
RVVT != MVT::i16 && RVVT != MVT::i32)
return false;
if (RVVT != DestVT) {
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
llvm_unreachable("Full value assign but types don't match?");
case CCValAssign::AExt:
case CCValAssign::ZExt: {
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
return false;
SrcReg = TmpReg;
break;
}
case CCValAssign::SExt: {
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
return false;
SrcReg = TmpReg;
break;
}
}
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::COPY), RetRegs[i])
.addReg(SrcReg);
}
}
}
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(PPC::BLR));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
// Attempt to emit an integer extend of SrcReg into DestReg. Both
// signed and zero extensions are supported. Return false if we
// can't handle it.
bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg, bool IsZExt) {
if (DestVT != MVT::i32 && DestVT != MVT::i64)
return false;
if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
return false;
// Signed extensions use EXTSB, EXTSH, EXTSW.
if (!IsZExt) {
unsigned Opc;
if (SrcVT == MVT::i8)
Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
else if (SrcVT == MVT::i16)
Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
else {
assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
Opc = PPC::EXTSW_32_64;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addReg(SrcReg);
// Unsigned 32-bit extensions use RLWINM.
} else if (DestVT == MVT::i32) {
unsigned MB;
if (SrcVT == MVT::i8)
MB = 24;
else {
assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
MB = 16;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM),
DestReg)
.addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
// Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
} else {
unsigned MB;
if (SrcVT == MVT::i8)
MB = 56;
else if (SrcVT == MVT::i16)
MB = 48;
else
MB = 32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(PPC::RLDICL_32_64), DestReg)
.addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
}
return true;
}
// Attempt to fast-select an indirect branch instruction.
bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
unsigned AddrReg = getRegForValue(I->getOperand(0));
if (AddrReg == 0)
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8))
.addReg(AddrReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8));
const IndirectBrInst *IB = cast<IndirectBrInst>(I);
for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
return true;
}
// Attempt to fast-select an integer truncate instruction.
bool PPCFastISel::SelectTrunc(const Instruction *I) {
Value *Src = I->getOperand(0);
EVT SrcVT = TLI.getValueType(Src->getType(), true);
EVT DestVT = TLI.getValueType(I->getType(), true);
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
return false;
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
return false;
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
// The only interesting case is when we need to switch register classes.
if (SrcVT == MVT::i64) {
unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
ResultReg).addReg(SrcReg, 0, PPC::sub_32);
SrcReg = ResultReg;
}
UpdateValueMap(I, SrcReg);
return true;
}
// Attempt to fast-select an integer extend instruction.
bool PPCFastISel::SelectIntExt(const Instruction *I) {
Type *DestTy = I->getType();
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
bool IsZExt = isa<ZExtInst>(I);
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg) return false;
EVT SrcEVT, DestEVT;
SrcEVT = TLI.getValueType(SrcTy, true);
DestEVT = TLI.getValueType(DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
// If we know the register class needed for the result of this
// instruction, use it. Otherwise pick the register class of the
// correct size that does not contain X0/R0, since we don't know
// whether downstream uses permit that assignment.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
(AssignedReg ? MRI.getRegClass(AssignedReg) :
(DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
&PPC::GPRC_and_GPRC_NOR0RegClass));
unsigned ResultReg = createResultReg(RC);
if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
return false;
UpdateValueMap(I, ResultReg);
return true;
}
// Attempt to fast-select an instruction that wasn't handled by
// the table-generated machinery.
bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Load:
return SelectLoad(I);
case Instruction::Store:
return SelectStore(I);
case Instruction::Br:
return SelectBranch(I);
case Instruction::IndirectBr:
return SelectIndirectBr(I);
case Instruction::FPExt:
return SelectFPExt(I);
case Instruction::FPTrunc:
return SelectFPTrunc(I);
case Instruction::SIToFP:
return SelectIToFP(I, /*IsSigned*/ true);
case Instruction::UIToFP:
return SelectIToFP(I, /*IsSigned*/ false);
case Instruction::FPToSI:
return SelectFPToI(I, /*IsSigned*/ true);
case Instruction::FPToUI:
return SelectFPToI(I, /*IsSigned*/ false);
case Instruction::Add:
return SelectBinaryIntOp(I, ISD::ADD);
case Instruction::Or:
return SelectBinaryIntOp(I, ISD::OR);
case Instruction::Sub:
return SelectBinaryIntOp(I, ISD::SUB);
case Instruction::Call:
if (dyn_cast<IntrinsicInst>(I))
return false;
return SelectCall(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
return SelectTrunc(I);
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
// Here add other flavors of Instruction::XXX that automated
// cases don't catch. For example, switches are terminators
// that aren't yet handled.
default:
break;
}
return false;
}
// Materialize a floating-point constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
// No plans to handle long double here.
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
// All FP constants are loaded from the constant pool.
unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
assert(Align > 0 && "Unexpectedly missing alignment information!");
unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
CodeModel::Model CModel = TM.getCodeModel();
MachineMemOperand *MMO =
FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
(VT == MVT::f32) ? 4 : 8, Align);
unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT),
TmpReg)
.addConstantPoolIndex(Idx).addReg(PPC::X2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addImm(0).addReg(TmpReg).addMemOperand(MMO);
} else {
// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
// But for large code model, we must generate a LDtocL followed
// by the LF[SD].
if (CModel == CodeModel::Large) {
unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addImm(0).addReg(TmpReg2);
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
.addReg(TmpReg)
.addMemOperand(MMO);
}
return DestReg;
}
// Materialize the address of a global value into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
assert(VT == MVT::i64 && "Non-address!");
const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
unsigned DestReg = createResultReg(RC);
// Global values may be plain old object addresses, TLS object
// addresses, constant pool entries, or jump tables. How we generate
// code for these may depend on small, medium, or large code model.
CodeModel::Model CModel = TM.getCodeModel();
// FIXME: Jump tables are not yet required because fast-isel doesn't
// handle switches; if that changes, we need them as well. For now,
// what follows assumes everything's a generic (or TLS) global address.
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
if (!GVar) {
// If GV is an alias, use the aliasee for determining thread-locality.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
}
// FIXME: We don't yet handle the complexity of TLS.
bool IsTLS = GVar && GVar->isThreadLocal();
if (IsTLS)
return 0;
// For small code model, generate a simple TOC load.
if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg)
.addGlobalAddress(GV).addReg(PPC::X2);
else {
// If the address is an externally defined symbol, a symbol with
// common or externally available linkage, a function address, or a
// jump table address (not yet needed), or if we are generating code
// for large code model, we generate:
// LDtocL(GV, ADDIStocHA(%X2, GV))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, GV), GV)
// Either way, start with the ADDIStocHA:
unsigned HighPartReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
// !GVar implies a function address. An external variable is one
// without an initializer.
// If/when switches are implemented, jump tables should be handled
// on the "if" path here.
if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() ||
GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage())
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
else
// Otherwise generate the ADDItocL.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL),
DestReg).addReg(HighPartReg).addGlobalAddress(GV);
}
return DestReg;
}
// Materialize a 32-bit integer constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC) {
unsigned Lo = Imm & 0xFFFF;
unsigned Hi = (Imm >> 16) & 0xFFFF;
unsigned ResultReg = createResultReg(RC);
bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
if (isInt<16>(Imm))
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
.addImm(Imm);
else if (Lo) {
// Both Lo and Hi have nonzero bits.
unsigned TmpReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
.addImm(Hi);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
.addReg(TmpReg).addImm(Lo);
} else
// Just Hi bits.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
.addImm(Hi);
return ResultReg;
}
// Materialize a 64-bit integer constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
const TargetRegisterClass *RC) {
unsigned Remainder = 0;
unsigned Shift = 0;
// If the value doesn't fit in 32 bits, see if we can shift it
// so that it fits in 32 bits.
if (!isInt<32>(Imm)) {
Shift = countTrailingZeros<uint64_t>(Imm);
int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
if (isInt<32>(ImmSh))
Imm = ImmSh;
else {
Remainder = Imm;
Shift = 32;
Imm >>= 32;
}
}
// Handle the high-order 32 bits (if shifted) or the whole 32 bits
// (if not shifted).
unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
if (!Shift)
return TmpReg1;
// If upper 32 bits were not zero, we've built them and need to shift
// them into place.
unsigned TmpReg2;
if (Imm) {
TmpReg2 = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLDICR),
TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
} else
TmpReg2 = TmpReg1;
unsigned TmpReg3, Hi, Lo;
if ((Hi = (Remainder >> 16) & 0xFFFF)) {
TmpReg3 = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORIS8),
TmpReg3).addReg(TmpReg2).addImm(Hi);
} else
TmpReg3 = TmpReg2;
if ((Lo = Remainder & 0xFFFF)) {
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORI8),
ResultReg).addReg(TmpReg3).addImm(Lo);
return ResultReg;
}
return TmpReg3;
}
// Materialize an integer constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
VT != MVT::i8 && VT != MVT::i1)
return 0;
const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
&PPC::GPRCRegClass);
// If the constant is in range, use a load-immediate.
const ConstantInt *CI = cast<ConstantInt>(C);
if (isInt<16>(CI->getSExtValue())) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
unsigned ImmReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg)
.addImm(CI->getSExtValue());
return ImmReg;
}
// Construct the constant piecewise.
int64_t Imm = CI->getZExtValue();
if (VT == MVT::i64)
return PPCMaterialize64BitInt(Imm, RC);
else if (VT == MVT::i32)
return PPCMaterialize32BitInt(Imm, RC);
return 0;
}
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple()) return 0;
MVT VT = CEVT.getSimpleVT();
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return PPCMaterializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return PPCMaterializeGV(GV, VT);
else if (isa<ConstantInt>(C))
return PPCMaterializeInt(C, VT);
return 0;
}
// Materialize the address created by an alloca into a register, and
// return the register number (or zero if we failed to handle it).
unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
MVT VT;
if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
ResultReg).addFrameIndex(SI->second).addImm(0);
return ResultReg;
}
return 0;
}
// Fold loads into extends when possible.
// FIXME: We can have multiple redundant extend/trunc instructions
// following a load. The folding only picks up one. Extend this
// to check subsequent instructions for the same pattern and remove
// them. Thus ResultReg should be the def reg for the last redundant
// instruction in a chain, and all intervening instructions can be
// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
// to add ELF64-NOT: rldicl to the appropriate tests when this works.
bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) {
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(LI->getType(), VT))
return false;
// Combine load followed by zero- or sign-extend.
bool IsZExt = false;
switch(MI->getOpcode()) {
default:
return false;
case PPC::RLDICL:
case PPC::RLDICL_32_64: {
IsZExt = true;
unsigned MB = MI->getOperand(3).getImm();
if ((VT == MVT::i8 && MB <= 56) ||
(VT == MVT::i16 && MB <= 48) ||
(VT == MVT::i32 && MB <= 32))
break;
return false;
}
case PPC::RLWINM:
case PPC::RLWINM8: {
IsZExt = true;
unsigned MB = MI->getOperand(3).getImm();
if ((VT == MVT::i8 && MB <= 24) ||
(VT == MVT::i16 && MB <= 16))
break;
return false;
}
case PPC::EXTSB:
case PPC::EXTSB8:
case PPC::EXTSB8_32_64:
/* There is no sign-extending load-byte instruction. */
return false;
case PPC::EXTSH:
case PPC::EXTSH8:
case PPC::EXTSH8_32_64: {
if (VT != MVT::i16 && VT != MVT::i8)
return false;
break;
}
case PPC::EXTSW:
case PPC::EXTSW_32_64: {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
return false;
break;
}
}
// See if we can handle this address.
Address Addr;
if (!PPCComputeAddress(LI->getOperand(0), Addr))
return false;
unsigned ResultReg = MI->getOperand(0).getReg();
if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt))
return false;
MI->eraseFromParent();
return true;
}
// Attempt to lower call arguments in a faster way than done by
// the selection DAG code.
bool PPCFastISel::FastLowerArguments() {
// Defer to normal argument lowering for now. It's reasonably
// efficient. Consider doing something like ARM to handle the
// case where all args fit in registers, no varargs, no float
// or vector args.
return false;
}
// Handle materializing integer constants into a register. This is not
// automatically generated for PowerPC, so must be explicitly created here.
unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
if (Opc != ISD::Constant)
return 0;
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
VT != MVT::i8 && VT != MVT::i1)
return 0;
const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
&PPC::GPRCRegClass);
if (VT == MVT::i64)
return PPCMaterialize64BitInt(Imm, RC);
else
return PPCMaterialize32BitInt(Imm, RC);
}
// Override for ADDI and ADDI8 to set the correct register class
// on RHS operand 0. The automatic infrastructure naively assumes
// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
// for these cases. At the moment, none of the other automatically
// generated RI instructions require special treatment. However, once
// SelectSelect is implemented, "isel" requires similar handling.
//
// Also be conservative about the output register class. Avoid
// assigning R0 or X0 to the output register for GPRC and G8RC
// register classes, as any such result could be used in ADDI, etc.,
// where those regs have another meaning.
unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm) {
if (MachineInstOpcode == PPC::ADDI)
MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
else if (MachineInstOpcode == PPC::ADDI8)
MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
const TargetRegisterClass *UseRC =
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC,
Op0, Op0IsKill, Imm);
}
// Override for instructions with one register operand to avoid use of
// R0/X0. The automatic infrastructure isn't aware of the context so
// we must be conservative.
unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass* RC,
unsigned Op0, bool Op0IsKill) {
const TargetRegisterClass *UseRC =
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
}
// Override for instructions with two register operands to avoid use
// of R0/X0. The automatic infrastructure isn't aware of the context
// so we must be conservative.
unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass* RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
const TargetRegisterClass *UseRC =
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
Op1, Op1IsKill);
}
namespace llvm {
// Create the fast instruction selector for PowerPC64 ELF.
FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
const TargetMachine &TM = FuncInfo.MF->getTarget();
// Only available on 64-bit ELF for now.
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
if (Subtarget->isPPC64() && Subtarget->isSVR4ABI())
return new PPCFastISel(FuncInfo, LibInfo);
return 0;
}
}
Index: head/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (revision 265925)
@@ -1,1560 +1,1560 @@
//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines a pattern matching instruction selector for PowerPC,
// converting from a legalized dag to a PPC dag.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ppc-codegen"
#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
namespace llvm {
void initializePPCDAGToDAGISelPass(PassRegistry&);
}
namespace {
//===--------------------------------------------------------------------===//
/// PPCDAGToDAGISel - PPC specific code to select PPC machine
/// instructions for SelectionDAG operations.
///
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
const PPCTargetLowering &PPCLowering;
const PPCSubtarget &PPCSubTarget;
unsigned GlobalBaseReg;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
: SelectionDAGISel(tm), TM(tm),
PPCLowering(*TM.getTargetLowering()),
PPCSubTarget(*TM.getSubtargetImpl()) {
initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
virtual bool runOnMachineFunction(MachineFunction &MF) {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
SelectionDAGISel::runOnMachineFunction(MF);
if (!PPCSubTarget.isSVR4ABI())
InsertVRSaveCode(MF);
return true;
}
virtual void PostprocessISelDAG();
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
/// getI64Imm - Return a target constant with the specified value, of type
/// i64.
inline SDValue getI64Imm(uint64_t Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i64);
}
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
}
/// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
/// with any number of 0s on either side. The 1s are allowed to wrap from
/// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
/// 0x0F0F0000 is not, since all 1s are not contiguous.
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
/// rotate and mask opcode and mask operation.
static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
unsigned &SH, unsigned &MB, unsigned &ME);
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
/// base register. Return the virtual register that holds this value.
SDNode *getGlobalBaseReg();
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDNode *Select(SDNode *N);
SDNode *SelectBitfieldInsert(SDNode *N);
/// SelectCC - Select a comparison of the specified values with the
/// specified condition code, returning the CR# of the expression.
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl);
/// SelectAddrImm - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement [r+imm].
bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
}
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
/// immediate field. Note that the operand at this point is already the
/// result of a prior SelectAddressRegImm call.
bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
if (N.getOpcode() == ISD::TargetConstant ||
N.getOpcode() == ISD::TargetGlobalAddress) {
Out = N;
return true;
}
return false;
}
/// SelectAddrIdx - Given the specified addressed, check to see if it can be
/// represented as an indexed [r+r] operation. Returns false if it can
/// be represented by [r+imm], which are preferred.
bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
}
/// SelectAddrIdxOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
}
/// SelectAddrImmX4 - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement that is a multiple of 4.
/// Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
}
// Select an address into a single register.
bool SelectAddr(SDValue N, SDValue &Base) {
Base = N;
return true;
}
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions. It is always correct to compute the value into
/// a register. The case of adding a (possibly relocatable) constant to a
/// register can be improved, but it is wrong to substitute Reg+Reg for
/// Reg in an asm, because the load or store opcode would have to change.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps) {
OutOps.push_back(Op);
return false;
}
void InsertVRSaveCode(MachineFunction &MF);
virtual const char *getPassName() const {
return "PowerPC DAG->DAG Pattern Instruction Selection";
}
// Include the pieces autogenerated from the target description.
#include "PPCGenDAGISel.inc"
private:
SDNode *SelectSETCC(SDNode *N);
};
}
/// InsertVRSaveCode - Once the entire function has been instruction selected,
/// all virtual registers are created and all machine instructions are built,
/// check to see if we need to save/restore VRSAVE. If so, do it.
void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// Check to see if this function uses vector registers, which means we have to
// save and restore the VRSAVE register and update it with the regs we use.
//
// In this case, there will be virtual registers of vector type created
// by the scheduler. Detect them now.
bool HasVectorVReg = false;
for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
HasVectorVReg = true;
break;
}
}
if (!HasVectorVReg) return; // nothing to do.
// If we have a vector register, we want to emit code into the entry and exit
// blocks to save and restore the VRSAVE register. We do this here (instead
// of marking all vector instructions as clobbering VRSAVE) for two reasons:
//
// 1. This (trivially) reduces the load on the register allocator, by not
// having to represent the live range of the VRSAVE register.
// 2. This (more significantly) allows us to create a temporary virtual
// register to hold the saved VRSAVE value, allowing this temporary to be
// register allocated, instead of forcing it to be spilled to the stack.
// Create two vregs - one to hold the VRSAVE register that is live-in to the
// function and one for the value after having bits or'd into it.
unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
const TargetInstrInfo &TII = *TM.getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
DebugLoc dl;
// Emit the following code into the entry block:
// InVRSAVE = MFVRSAVE
// UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
// MTVRSAVE UpdatedVRSAVE
MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
UpdatedVRSAVE).addReg(InVRSAVE);
BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
// Find all return blocks, outputting a restore in each epilog.
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
if (!BB->empty() && BB->back().isReturn()) {
IP = BB->end(); --IP;
// Skip over all terminator instructions, which are part of the return
// sequence.
MachineBasicBlock::iterator I2 = IP;
while (I2 != BB->begin() && (--I2)->isTerminator())
IP = I2;
// Emit: MTVRSAVE InVRSave
BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
}
}
}
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (!GlobalBaseReg) {
const TargetInstrInfo &TII = *TM.getInstrInfo();
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
DebugLoc dl;
if (PPCLowering.getPointerTy() == MVT::i32) {
- GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
} else {
- GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass);
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
}
}
return CurDAG->getRegister(GlobalBaseReg,
PPCLowering.getPointerTy()).getNode();
}
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
/// or 64-bit immediate, and if the value can be accurately represented as a
/// sign extension from a 16-bit value. If so, this returns true and the
/// immediate.
static bool isIntS16Immediate(SDNode *N, short &Imm) {
if (N->getOpcode() != ISD::Constant)
return false;
Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
if (N->getValueType(0) == MVT::i32)
return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
else
return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
}
static bool isIntS16Immediate(SDValue Op, short &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
/// operand. If so Imm will receive the 32-bit value.
static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
Imm = cast<ConstantSDNode>(N)->getZExtValue();
return true;
}
return false;
}
/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
/// operand. If so Imm will receive the 64-bit value.
static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
Imm = cast<ConstantSDNode>(N)->getZExtValue();
return true;
}
return false;
}
// isInt32Immediate - This method tests to see if a constant operand.
// If so Imm will receive the 32 bit value.
static bool isInt32Immediate(SDValue N, unsigned &Imm) {
return isInt32Immediate(N.getNode(), Imm);
}
// isOpcWithIntImmediate - This method tests to see if the node is a specific
// opcode and that it has a immediate integer right operand.
// If so Imm will receive the 32 bit value.
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
return N->getOpcode() == Opc
&& isInt32Immediate(N->getOperand(1).getNode(), Imm);
}
bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
if (!Val)
return false;
if (isShiftedMask_32(Val)) {
// look for the first non-zero bit
MB = countLeadingZeros(Val);
// look for the first zero bit after the run of ones
ME = countLeadingZeros((Val - 1) ^ Val);
return true;
} else {
Val = ~Val; // invert mask
if (isShiftedMask_32(Val)) {
// effectively look for the first zero bit
ME = countLeadingZeros(Val) - 1;
// effectively look for the first one bit after the run of zeros
MB = countLeadingZeros((Val - 1) ^ Val) + 1;
return true;
}
}
// no run present
return false;
}
bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
bool isShiftMask, unsigned &SH,
unsigned &MB, unsigned &ME) {
// Don't even go down this path for i64, since different logic will be
// necessary for rldicl/rldicr/rldimi.
if (N->getValueType(0) != MVT::i32)
return false;
unsigned Shift = 32;
unsigned Indeterminant = ~0; // bit mask marking indeterminant results
unsigned Opcode = N->getOpcode();
if (N->getNumOperands() != 2 ||
!isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
return false;
if (Opcode == ISD::SHL) {
// apply shift left to mask if it comes first
if (isShiftMask) Mask = Mask << Shift;
// determine which bits are made indeterminant by shift
Indeterminant = ~(0xFFFFFFFFu << Shift);
} else if (Opcode == ISD::SRL) {
// apply shift right to mask if it comes first
if (isShiftMask) Mask = Mask >> Shift;
// determine which bits are made indeterminant by shift
Indeterminant = ~(0xFFFFFFFFu >> Shift);
// adjust for the left rotate
Shift = 32 - Shift;
} else if (Opcode == ISD::ROTL) {
Indeterminant = 0;
} else {
return false;
}
// if the mask doesn't intersect any Indeterminant bits
if (Mask && !(Mask & Indeterminant)) {
SH = Shift & 31;
// make sure the mask is still a mask (wrap arounds may not be)
return isRunOfOnes(Mask, MB, ME);
}
return false;
}
/// SelectBitfieldInsert - turn an or of two masked values into
/// the rotate left word immediate then mask insert (rlwimi) instruction.
SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
APInt LKZ, LKO, RKZ, RKO;
CurDAG->ComputeMaskedBits(Op0, LKZ, LKO);
CurDAG->ComputeMaskedBits(Op1, RKZ, RKO);
unsigned TargetMask = LKZ.getZExtValue();
unsigned InsertMask = RKZ.getZExtValue();
if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
unsigned Op0Opc = Op0.getOpcode();
unsigned Op1Opc = Op1.getOpcode();
unsigned Value, SH = 0;
TargetMask = ~TargetMask;
InsertMask = ~InsertMask;
// If the LHS has a foldable shift and the RHS does not, then swap it to the
// RHS so that we can fold the shift into the insert.
if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
Op0.getOperand(0).getOpcode() == ISD::SRL) {
if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
Op1.getOperand(0).getOpcode() != ISD::SRL) {
std::swap(Op0, Op1);
std::swap(Op0Opc, Op1Opc);
std::swap(TargetMask, InsertMask);
}
}
} else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
Op1.getOperand(0).getOpcode() != ISD::SRL) {
std::swap(Op0, Op1);
std::swap(Op0Opc, Op1Opc);
std::swap(TargetMask, InsertMask);
}
}
unsigned MB, ME;
if (isRunOfOnes(InsertMask, MB, ME)) {
SDValue Tmp1, Tmp2;
if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
isInt32Immediate(Op1.getOperand(1), Value)) {
Op1 = Op1.getOperand(0);
SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
}
if (Op1Opc == ISD::AND) {
unsigned SHOpc = Op1.getOperand(0).getOpcode();
if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
// Note that Value must be in range here (less than 32) because
// otherwise there would not be any bits set in InsertMask.
Op1 = Op1.getOperand(0).getOperand(0);
SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
}
}
SH &= 31;
SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
getI32Imm(ME) };
return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
}
}
return 0;
}
/// SelectCC - Select a comparison of the specified values with the specified
/// condition code, returning the CR# of the expression.
SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
ISD::CondCode CC, SDLoc dl) {
// Always select the LHS.
unsigned Opc;
if (LHS.getValueType() == MVT::i32) {
unsigned Imm;
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
if (isInt32Immediate(RHS, Imm)) {
// SETEQ/SETNE comparison with 16-bit immediate, fold it.
if (isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
// If this is a 16-bit signed immediate, fold it.
if (isInt<16>((int)Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
// For non-equality comparisons, the default code would materialize the
// constant, then compare against it, like this:
// lis r2, 4660
// ori r2, r2, 22136
// cmpw cr0, r3, r2
// Since we are just comparing for equality, we can emit this instead:
// xoris r0,r3,0x1234
// cmplwi cr0,r0,0x5678
// beq cr0,L6
SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
getI32Imm(Imm >> 16)), 0);
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
getI32Imm(Imm & 0xFFFF)), 0);
}
Opc = PPC::CMPLW;
} else if (ISD::isUnsignedIntSetCC(CC)) {
if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
Opc = PPC::CMPLW;
} else {
short SImm;
if (isIntS16Immediate(RHS, SImm))
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
getI32Imm((int)SImm & 0xFFFF)),
0);
Opc = PPC::CMPW;
}
} else if (LHS.getValueType() == MVT::i64) {
uint64_t Imm;
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
if (isInt64Immediate(RHS.getNode(), Imm)) {
// SETEQ/SETNE comparison with 16-bit immediate, fold it.
if (isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
// If this is a 16-bit signed immediate, fold it.
if (isInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
// For non-equality comparisons, the default code would materialize the
// constant, then compare against it, like this:
// lis r2, 4660
// ori r2, r2, 22136
// cmpd cr0, r3, r2
// Since we are just comparing for equality, we can emit this instead:
// xoris r0,r3,0x1234
// cmpldi cr0,r0,0x5678
// beq cr0,L6
if (isUInt<32>(Imm)) {
SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
getI64Imm(Imm >> 16)), 0);
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
getI64Imm(Imm & 0xFFFF)), 0);
}
}
Opc = PPC::CMPLD;
} else if (ISD::isUnsignedIntSetCC(CC)) {
if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
getI64Imm(Imm & 0xFFFF)), 0);
Opc = PPC::CMPLD;
} else {
short SImm;
if (isIntS16Immediate(RHS, SImm))
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
getI64Imm(SImm & 0xFFFF)),
0);
Opc = PPC::CMPD;
}
} else if (LHS.getValueType() == MVT::f32) {
Opc = PPC::FCMPUS;
} else {
assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
Opc = PPC::FCMPUD;
}
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
}
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
switch (CC) {
case ISD::SETUEQ:
case ISD::SETONE:
case ISD::SETOLE:
case ISD::SETOGE:
llvm_unreachable("Should be lowered by legalize!");
default: llvm_unreachable("Unknown condition!");
case ISD::SETOEQ:
case ISD::SETEQ: return PPC::PRED_EQ;
case ISD::SETUNE:
case ISD::SETNE: return PPC::PRED_NE;
case ISD::SETOLT:
case ISD::SETLT: return PPC::PRED_LT;
case ISD::SETULE:
case ISD::SETLE: return PPC::PRED_LE;
case ISD::SETOGT:
case ISD::SETGT: return PPC::PRED_GT;
case ISD::SETUGE:
case ISD::SETGE: return PPC::PRED_GE;
case ISD::SETO: return PPC::PRED_NU;
case ISD::SETUO: return PPC::PRED_UN;
// These two are invalid for floating point. Assume we have int.
case ISD::SETULT: return PPC::PRED_LT;
case ISD::SETUGT: return PPC::PRED_GT;
}
}
/// getCRIdxForSetCC - Return the index of the condition register field
/// associated with the SetCC condition, and whether or not the field is
/// treated as inverted. That is, lt = 0; ge = 0 inverted.
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
Invert = false;
switch (CC) {
default: llvm_unreachable("Unknown condition!");
case ISD::SETOLT:
case ISD::SETLT: return 0; // Bit #0 = SETOLT
case ISD::SETOGT:
case ISD::SETGT: return 1; // Bit #1 = SETOGT
case ISD::SETOEQ:
case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
case ISD::SETUO: return 3; // Bit #3 = SETUO
case ISD::SETUGE:
case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
case ISD::SETULE:
case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
case ISD::SETUNE:
case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
case ISD::SETUEQ:
case ISD::SETOGE:
case ISD::SETOLE:
case ISD::SETONE:
llvm_unreachable("Invalid branch code: should be expanded by legalize");
// These are invalid for floating point. Assume integer.
case ISD::SETULT: return 0;
case ISD::SETUGT: return 1;
}
}
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
switch (CC) {
case ISD::SETEQ:
case ISD::SETUEQ:
case ISD::SETNE:
case ISD::SETUNE:
if (VecVT == MVT::v16i8)
return PPC::VCMPEQUB;
else if (VecVT == MVT::v8i16)
return PPC::VCMPEQUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPEQUW;
// v4f32 != v4f32 could be translate to unordered not equal
else if (VecVT == MVT::v4f32)
return PPC::VCMPEQFP;
break;
case ISD::SETLT:
case ISD::SETGT:
case ISD::SETLE:
case ISD::SETGE:
if (VecVT == MVT::v16i8)
return PPC::VCMPGTSB;
else if (VecVT == MVT::v8i16)
return PPC::VCMPGTSH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW;
else if (VecVT == MVT::v4f32)
return PPC::VCMPGTFP;
break;
case ISD::SETULT:
case ISD::SETUGT:
case ISD::SETUGE:
case ISD::SETULE:
if (VecVT == MVT::v16i8)
return PPC::VCMPGTUB;
else if (VecVT == MVT::v8i16)
return PPC::VCMPGTUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTUW;
break;
case ISD::SETOEQ:
if (VecVT == MVT::v4f32)
return PPC::VCMPEQFP;
break;
case ISD::SETOLT:
case ISD::SETOGT:
case ISD::SETOLE:
if (VecVT == MVT::v4f32)
return PPC::VCMPGTFP;
break;
case ISD::SETOGE:
if (VecVT == MVT::v4f32)
return PPC::VCMPGEFP;
break;
default:
break;
}
llvm_unreachable("Invalid integer vector compare condition");
}
// getVCmpEQInst: return the equal compare instruction for the specified vector
// type. Since this is for altivec specific code, only support the altivec
// types (v16i8, v8i16, v4i32, and v4f32).
static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
switch (VecVT) {
case MVT::v16i8:
return PPC::VCMPEQUB;
case MVT::v8i16:
return PPC::VCMPEQUH;
case MVT::v4i32:
return PPC::VCMPEQUW;
case MVT::v4f32:
return PPC::VCMPEQFP;
default:
llvm_unreachable("Invalid integer vector compare condition");
}
}
SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDLoc dl(N);
unsigned Imm;
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
if (isInt32Immediate(N->getOperand(1), Imm)) {
// We can codegen setcc op, imm very efficiently compared to a brcond.
// Check for those cases here.
// setcc op, 0
if (Imm == 0) {
SDValue Op = N->getOperand(0);
switch (CC) {
default: break;
case ISD::SETEQ: {
Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
case ISD::SETNE: {
if (isPPC64) break;
SDValue AD =
SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
Op, getI32Imm(~0U)), 0);
return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
AD.getValue(1));
}
case ISD::SETLT: {
SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
case ISD::SETGT: {
SDValue T =
SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
}
} else if (Imm == ~0U) { // setcc op, -1
SDValue Op = N->getOperand(0);
switch (CC) {
default: break;
case ISD::SETEQ:
if (isPPC64) break;
Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
Op, getI32Imm(1)), 0);
return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
SDValue(CurDAG->getMachineNode(PPC::LI, dl,
MVT::i32,
getI32Imm(0)), 0),
Op.getValue(1));
case ISD::SETNE: {
if (isPPC64) break;
Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
Op, getI32Imm(~0U));
return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
Op, SDValue(AD, 1));
}
case ISD::SETLT: {
SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
getI32Imm(1)), 0);
SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
Op), 0);
SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
case ISD::SETGT: {
SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
0);
return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
getI32Imm(1));
}
}
}
}
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
EVT VecVT = LHS.getValueType();
MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
unsigned int VCmpInst = getVCmpInst(VT, CC);
switch (CC) {
case ISD::SETEQ:
case ISD::SETOEQ:
case ISD::SETUEQ:
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
case ISD::SETNE:
case ISD::SETONE:
case ISD::SETUNE: {
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp);
}
case ISD::SETLT:
case ISD::SETOLT:
case ISD::SETULT:
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
case ISD::SETGT:
case ISD::SETOGT:
case ISD::SETUGT:
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
case ISD::SETGE:
case ISD::SETOGE:
case ISD::SETUGE: {
// Small optimization: Altivec provides a 'Vector Compare Greater Than
// or Equal To' instruction (vcmpgefp), so in this case there is no
// need for extra logic for the equal compare.
if (VecVT.getSimpleVT().isFloatingPoint()) {
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
} else {
SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
unsigned int VCmpEQInst = getVCmpEQInst(VT);
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ);
}
}
case ISD::SETLE:
case ISD::SETOLE:
case ISD::SETULE: {
SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
unsigned int VCmpEQInst = getVCmpEQInst(VT);
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ);
}
default:
llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
}
}
bool Inv;
unsigned Idx = getCRIdxForSetCC(CC, Inv);
SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
SDValue IntCR;
// Force the ccreg into CR7.
SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
SDValue InFlag(0, 0); // Null incoming flag value.
CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
InFlag).getValue(1);
IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
CCReg), 0);
SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
getI32Imm(31), getI32Imm(31) };
if (!Inv)
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
// Get the specified bit.
SDValue Tmp =
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
}
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
N->setNodeId(-1);
return NULL; // Already selected.
}
switch (N->getOpcode()) {
default: break;
case ISD::Constant: {
if (N->getValueType(0) == MVT::i64) {
// Get 64 bit value.
int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
// Assume no remaining bits.
unsigned Remainder = 0;
// Assume no shift required.
unsigned Shift = 0;
// If it can't be represented as a 32 bit value.
if (!isInt<32>(Imm)) {
Shift = countTrailingZeros<uint64_t>(Imm);
int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
// If the shifted value fits 32 bits.
if (isInt<32>(ImmSh)) {
// Go with the shifted value.
Imm = ImmSh;
} else {
// Still stuck with a 64 bit value.
Remainder = Imm;
Shift = 32;
Imm >>= 32;
}
}
// Intermediate operand.
SDNode *Result;
// Handle first 32 bits.
unsigned Lo = Imm & 0xFFFF;
unsigned Hi = (Imm >> 16) & 0xFFFF;
// Simple value.
if (isInt<16>(Imm)) {
// Just the Lo bits.
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
} else if (Lo) {
// Handle the Hi bits.
unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
// And Lo bits.
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
SDValue(Result, 0), getI32Imm(Lo));
} else {
// Just the Hi bits.
Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
}
// If no shift, we're done.
if (!Shift) return Result;
// Shift for next step if the upper 32-bits were not zero.
if (Imm) {
Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
SDValue(Result, 0),
getI32Imm(Shift),
getI32Imm(63 - Shift));
}
// Add in the last bits as required.
if ((Hi = (Remainder >> 16) & 0xFFFF)) {
Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
SDValue(Result, 0), getI32Imm(Hi));
}
if ((Lo = Remainder & 0xFFFF)) {
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
SDValue(Result, 0), getI32Imm(Lo));
}
return Result;
}
break;
}
case ISD::SETCC:
return SelectSETCC(N);
case PPCISD::GlobalBaseReg:
return getGlobalBaseReg();
case ISD::FrameIndex: {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
if (N->hasOneUse())
return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI,
getSmallIPtrImm(0));
return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
getSmallIPtrImm(0));
}
case PPCISD::MFOCRF: {
SDValue InFlag = N->getOperand(1);
return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
N->getOperand(0), InFlag);
}
case ISD::SDIV: {
// FIXME: since this depends on the setting of the carry flag from the srawi
// we should really be making notes about that for the scheduler.
// FIXME: It sure would be nice if we could cheaply recognize the
// srl/add/sra pattern the dag combiner will generate for this as
// sra/addze rather than having to handle sdiv ourselves. oh well.
unsigned Imm;
if (isInt32Immediate(N->getOperand(1), Imm)) {
SDValue N0 = N->getOperand(0);
if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
SDNode *Op =
CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
N0, getI32Imm(Log2_32(Imm)));
return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
SDValue(Op, 0), SDValue(Op, 1));
} else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
SDNode *Op =
CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
N0, getI32Imm(Log2_32(-Imm)));
SDValue PT =
SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
SDValue(Op, 0), SDValue(Op, 1)),
0);
return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
}
}
// Other cases are autogenerated.
break;
}
case ISD::LOAD: {
// Handle preincrement loads.
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
// Normal loads are handled by code generated from the .td file.
if (LD->getAddressingMode() != ISD::PRE_INC)
break;
SDValue Offset = LD->getOffset();
if (Offset.getOpcode() == ISD::TargetConstant ||
Offset.getOpcode() == ISD::TargetGlobalAddress) {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
if (LD->getValueType(0) != MVT::i64) {
// Handle PPC32 integer and normal FP loads.
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::f64: Opcode = PPC::LFDU; break;
case MVT::f32: Opcode = PPC::LFSU; break;
case MVT::i32: Opcode = PPC::LWZU; break;
case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
case MVT::i1:
case MVT::i8: Opcode = PPC::LBZU; break;
}
} else {
assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::i64: Opcode = PPC::LDU; break;
case MVT::i32: Opcode = PPC::LWZU8; break;
case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
case MVT::i1:
case MVT::i8: Opcode = PPC::LBZU8; break;
}
}
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering.getPointerTy(),
MVT::Other, Ops);
} else {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
if (LD->getValueType(0) != MVT::i64) {
// Handle PPC32 integer and normal FP loads.
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
case MVT::i1:
case MVT::i8: Opcode = PPC::LBZUX; break;
}
} else {
assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
"Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::i64: Opcode = PPC::LDUX; break;
case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
case MVT::i1:
case MVT::i8: Opcode = PPC::LBZUX8; break;
}
}
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Base, Offset, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering.getPointerTy(),
MVT::Other, Ops);
}
}
case ISD::AND: {
unsigned Imm, Imm2, SH, MB, ME;
uint64_t Imm64;
// If this is an and of a value rotated between 0 and 31 bits and then and'd
// with a mask, emit rlwinm
if (isInt32Immediate(N->getOperand(1), Imm) &&
isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
SDValue Val = N->getOperand(0).getOperand(0);
SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
// If this is just a masked value where the input is not handled above, and
// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
if (isInt32Immediate(N->getOperand(1), Imm) &&
isRunOfOnes(Imm, MB, ME) &&
N->getOperand(0).getOpcode() != ISD::ROTL) {
SDValue Val = N->getOperand(0);
SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
// If this is a 64-bit zero-extension mask, emit rldicl.
if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
isMask_64(Imm64)) {
SDValue Val = N->getOperand(0);
MB = 64 - CountTrailingOnes_64(Imm64);
SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) };
return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3);
}
// AND X, 0 -> 0, not "rlwinm 32".
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
ReplaceUses(SDValue(N, 0), N->getOperand(1));
return NULL;
}
// ISD::OR doesn't get all the bitfield insertion fun.
// (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
if (isInt32Immediate(N->getOperand(1), Imm) &&
N->getOperand(0).getOpcode() == ISD::OR &&
isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
unsigned MB, ME;
Imm = ~(Imm^Imm2);
if (isRunOfOnes(Imm, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
N->getOperand(0).getOperand(1),
getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
}
}
// Other cases are autogenerated.
break;
}
case ISD::OR:
if (N->getValueType(0) == MVT::i32)
if (SDNode *I = SelectBitfieldInsert(N))
return I;
// Other cases are autogenerated.
break;
case ISD::SHL: {
unsigned Imm, SH, MB, ME;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
// Other cases are autogenerated.
break;
}
case ISD::SRL: {
unsigned Imm, SH, MB, ME;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
// Other cases are autogenerated.
break;
}
case ISD::SELECT_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
if (!isPPC64)
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
if (N1C->isNullValue() && N3C->isNullValue() &&
N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
// FIXME: Implement this optzn for PPC64.
N->getValueType(0) == MVT::i32) {
SDNode *Tmp =
CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
N->getOperand(0), getI32Imm(~0U));
return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
SDValue(Tmp, 0), N->getOperand(0),
SDValue(Tmp, 1));
}
SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
unsigned BROpc = getPredicateForSetCC(CC);
unsigned SelectCCOp;
if (N->getValueType(0) == MVT::i32)
SelectCCOp = PPC::SELECT_CC_I4;
else if (N->getValueType(0) == MVT::i64)
SelectCCOp = PPC::SELECT_CC_I8;
else if (N->getValueType(0) == MVT::f32)
SelectCCOp = PPC::SELECT_CC_F4;
else if (N->getValueType(0) == MVT::f64)
SelectCCOp = PPC::SELECT_CC_F8;
else
SelectCCOp = PPC::SELECT_CC_VRRC;
SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
getI32Imm(BROpc) };
return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
}
case PPCISD::BDNZ:
case PPCISD::BDZ: {
bool IsPPC64 = PPCSubTarget.isPPC64();
SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ?
(IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
MVT::Other, Ops, 2);
}
case PPCISD::COND_BRANCH: {
// Op #0 is the Chain.
// Op #1 is the PPC::PRED_* number.
// Op #2 is the CR#
// Op #3 is the Dest MBB
// Op #4 is the Flag.
// Prevent PPC::PRED_* from being selected into LI.
SDValue Pred =
getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
N->getOperand(0), N->getOperand(4) };
return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
}
case ISD::BR_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
N->getOperand(4), N->getOperand(0) };
return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
}
case ISD::BRIND: {
// FIXME: Should custom lower this.
SDValue Chain = N->getOperand(0);
SDValue Target = N->getOperand(1);
unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
Chain), 0);
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
case PPCISD::TOC_ENTRY: {
assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
// For medium and large code model, we generate two instructions as
// described below. Otherwise we allow SelectCodeCommon to handle this,
// selecting one of LDtoc, LDtocJTI, and LDtocCPT.
CodeModel::Model CModel = TM.getCodeModel();
if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
break;
// The first source operand is a TargetGlobalAddress or a
// TargetJumpTable. If it is an externally defined symbol, a symbol
// with common linkage, a function address, or a jump table address,
// or if we are generating code for large code model, we generate:
// LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
SDValue GA = N->getOperand(0);
SDValue TOCbase = N->getOperand(1);
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
assert((GVar || isa<Function>(RealGValue)) &&
"Unexpected global value subclass!");
// An external variable is one without an initializer. For these,
// for variables with common linkage, and for Functions, generate
// the LDtocL form.
if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
RealGValue->hasAvailableExternallyLinkage())
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
}
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA);
}
case PPCISD::VADD_SPLAT: {
// This expands into one of three sequences, depending on whether
// the first operand is odd or even, positive or negative.
assert(isa<ConstantSDNode>(N->getOperand(0)) &&
isa<ConstantSDNode>(N->getOperand(1)) &&
"Invalid operand on VADD_SPLAT!");
int Elt = N->getConstantOperandVal(0);
int EltSize = N->getConstantOperandVal(1);
unsigned Opc1, Opc2, Opc3;
EVT VT;
if (EltSize == 1) {
Opc1 = PPC::VSPLTISB;
Opc2 = PPC::VADDUBM;
Opc3 = PPC::VSUBUBM;
VT = MVT::v16i8;
} else if (EltSize == 2) {
Opc1 = PPC::VSPLTISH;
Opc2 = PPC::VADDUHM;
Opc3 = PPC::VSUBUHM;
VT = MVT::v8i16;
} else {
assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
Opc1 = PPC::VSPLTISW;
Opc2 = PPC::VADDUWM;
Opc3 = PPC::VSUBUWM;
VT = MVT::v4i32;
}
if ((Elt & 1) == 0) {
// Elt is even, in the range [-32,-18] + [16,30].
//
// Convert: VADD_SPLAT elt, size
// Into: tmp = VSPLTIS[BHW] elt
// VADDU[BHW]M tmp, tmp
// Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
SDValue EltVal = getI32Imm(Elt >> 1);
SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
SDValue TmpVal = SDValue(Tmp, 0);
return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
} else if (Elt > 0) {
// Elt is odd and positive, in the range [17,31].
//
// Convert: VADD_SPLAT elt, size
// Into: tmp1 = VSPLTIS[BHW] elt-16
// tmp2 = VSPLTIS[BHW] -16
// VSUBU[BHW]M tmp1, tmp2
SDValue EltVal = getI32Imm(Elt - 16);
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
EltVal = getI32Imm(-16);
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
SDValue(Tmp2, 0));
} else {
// Elt is odd and negative, in the range [-31,-17].
//
// Convert: VADD_SPLAT elt, size
// Into: tmp1 = VSPLTIS[BHW] elt+16
// tmp2 = VSPLTIS[BHW] -16
// VADDU[BHW]M tmp1, tmp2
SDValue EltVal = getI32Imm(Elt + 16);
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
EltVal = getI32Imm(-16);
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
SDValue(Tmp2, 0));
}
}
}
return SelectCode(N);
}
/// PostProcessISelDAG - Perform some late peephole optimizations
/// on the DAG representation.
void PPCDAGToDAGISel::PostprocessISelDAG() {
// Skip peepholes at -O0.
if (TM.getOptLevel() == CodeGenOpt::None)
return;
// These optimizations are currently supported only for 64-bit SVR4.
if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
return;
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = --Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
unsigned FirstOp;
unsigned StorageOpcode = N->getMachineOpcode();
switch (StorageOpcode) {
default: continue;
case PPC::LBZ:
case PPC::LBZ8:
case PPC::LD:
case PPC::LFD:
case PPC::LFS:
case PPC::LHA:
case PPC::LHA8:
case PPC::LHZ:
case PPC::LHZ8:
case PPC::LWA:
case PPC::LWZ:
case PPC::LWZ8:
FirstOp = 0;
break;
case PPC::STB:
case PPC::STB8:
case PPC::STD:
case PPC::STFD:
case PPC::STFS:
case PPC::STH:
case PPC::STH8:
case PPC::STW:
case PPC::STW8:
FirstOp = 1;
break;
}
// If this is a load or store with a zero offset, we may be able to
// fold an add-immediate into the memory operation.
if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
N->getConstantOperandVal(FirstOp) != 0)
continue;
SDValue Base = N->getOperand(FirstOp + 1);
if (!Base.isMachineOpcode())
continue;
unsigned Flags = 0;
bool ReplaceFlags = true;
// When the feeding operation is an add-immediate of some sort,
// determine whether we need to add relocation information to the
// target flags on the immediate operand when we fold it into the
// load instruction.
//
// For something like ADDItocL, the relocation information is
// inferred from the opcode; when we process it in the AsmPrinter,
// we add the necessary relocation there. A load, though, can receive
// relocation from various flavors of ADDIxxx, so we need to carry
// the relocation information in the target flags.
switch (Base.getMachineOpcode()) {
default: continue;
case PPC::ADDI8:
case PPC::ADDI:
// In some cases (such as TLS) the relocation information
// is already in place on the operand, so copying the operand
// is sufficient.
ReplaceFlags = false;
// For these cases, the immediate may not be divisible by 4, in
// which case the fold is illegal for DS-form instructions. (The
// other cases provide aligned addresses and are always safe.)
if ((StorageOpcode == PPC::LWA ||
StorageOpcode == PPC::LD ||
StorageOpcode == PPC::STD) &&
(!isa<ConstantSDNode>(Base.getOperand(1)) ||
Base.getConstantOperandVal(1) % 4 != 0))
continue;
break;
case PPC::ADDIdtprelL:
Flags = PPCII::MO_DTPREL_LO;
break;
case PPC::ADDItlsldL:
Flags = PPCII::MO_TLSLD_LO;
break;
case PPC::ADDItocL:
Flags = PPCII::MO_TOC_LO;
break;
}
// We found an opportunity. Reverse the operands from the add
// immediate and substitute them into the load or store. If
// needed, update the target flags for the immediate operand to
// reflect the necessary relocation information.
DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
DEBUG(Base->dump(CurDAG));
DEBUG(dbgs() << "\nN: ");
DEBUG(N->dump(CurDAG));
DEBUG(dbgs() << "\n");
SDValue ImmOpnd = Base.getOperand(1);
// If the relocation information isn't already present on the
// immediate operand, add it now.
if (ReplaceFlags) {
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
// We can't perform this optimization for data whose alignment
// is insufficient for the instruction encoding.
if (GV->getAlignment() < 4 &&
(StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
StorageOpcode == PPC::LWA)) {
DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
continue;
}
ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
const Constant *C = CP->getConstVal();
ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
CP->getAlignment(),
0, Flags);
}
}
if (FirstOp == 1) // Store
(void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
Base.getOperand(0), N->getOperand(3));
else // Load
(void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
N->getOperand(2));
// The add-immediate may now be dead, in which case remove it.
if (Base.getNode()->use_empty())
CurDAG->RemoveDeadNode(Base.getNode());
}
}
/// createPPCISelDag - This pass converts a legalized DAG into a
/// PowerPC-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
return new PPCDAGToDAGISel(TM);
}
static void initializePassOnce(PassRegistry &Registry) {
const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0,
false, false);
Registry.registerPass(*PI, true);
}
void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) {
CALL_ONCE_INITIALIZATION(initializePassOnce);
}
Index: head/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp (revision 265925)
@@ -1,7938 +1,7937 @@
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the PPCISelLowering class.
//
//===----------------------------------------------------------------------===//
#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
#include "PPCTargetObjectFile.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
if (TM.getSubtargetImpl()->isSVR4ABI())
return new PPC64LinuxTargetObjectFile();
return new TargetLoweringObjectFileELF();
}
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
setPow2DivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
bool isPPC64 = Subtarget->isPPC64();
setMinStackArgumentAlignment(isPPC64 ? 8:4);
// Set up the register classes.
addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PowerPC has pre-inc load and store's.
setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
// This is used in the ppcf128->int sequence. Note it has different semantics
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
// We do not currently implement these libm ops for PowerPC.
setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
// PowerPC has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Legal);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
if (!Subtarget->hasFSQRT() &&
!(TM.Options.UnsafeFPMath &&
Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
if (!Subtarget->hasFSQRT() &&
!(TM.Options.UnsafeFPMath &&
Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
if (Subtarget->hasFCPSGN()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
} else {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
}
if (Subtarget->hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
// PowerPC does not have BSWAP, CTPOP or CTTZ
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
if (Subtarget->hasPOPCNTD()) {
setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
} else {
setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
}
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
// PowerPC does not have Select
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Expand);
// PowerPC wants to turn select_cc of FP into fsel when possible.
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// PowerPC wants to optimize integer setcc a bit
setOperationAction(ISD::SETCC, MVT::i32, Custom);
// PowerPC does not have BRCOND which requires SetCC
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
// PowerPC does not have [U|S]INT_TO_FP
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
setOperationAction(ISD::BITCAST, MVT::i64, Expand);
setOperationAction(ISD::BITCAST, MVT::f64, Expand);
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
// other SjLj exception interfaces are implemented and please don't build
// your own exception handling based on them.
// LLVM/Clang supports zero-cost DWARF exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
// TRAP is legal.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// TRAMPOLINE is custom lowered.
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
if (Subtarget->isSVR4ABI()) {
if (isPPC64) {
// VAARG always uses double-word chunks, so promote anything smaller.
setOperationAction(ISD::VAARG, MVT::i1, Promote);
AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
setOperationAction(ISD::VAARG, MVT::i8, Promote);
AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
setOperationAction(ISD::VAARG, MVT::i16, Promote);
AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
setOperationAction(ISD::VAARG, MVT::i32, Promote);
AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
} else {
// VAARG is custom lowered with the 32-bit SVR4 ABI.
setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::i64, Custom);
}
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
if (Subtarget->isSVR4ABI() && !isPPC64)
// VACOPY is custom lowered with the 32-bit SVR4 ABI.
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
else
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
// Use the default implementation.
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// To handle counter-based loop conditions.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
// Comparisons that require checking two conditions.
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
if (Subtarget->has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
// This is just the low 32 bits of a (signed) fp->i64 conversion.
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
// With the instructions enabled under FPCVT, we can do everything.
if (PPCSubTarget.hasFPCVT()) {
if (Subtarget->has64BitSupport()) {
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
}
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
}
if (Subtarget->use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// 64-bit PowerPC wants to expand i128 shifts itself.
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
} else {
// 32-bit PowerPC wants to expand i64 shifts itself.
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
if (Subtarget->hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD , VT, Legal);
setOperationAction(ISD::SUB , VT, Legal);
// We promote all shuffles to v16i8.
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
// We promote all non-typed operations to v4i32.
setOperationAction(ISD::AND , VT, Promote);
AddPromotedToType (ISD::AND , VT, MVT::v4i32);
setOperationAction(ISD::OR , VT, Promote);
AddPromotedToType (ISD::OR , VT, MVT::v4i32);
setOperationAction(ISD::XOR , VT, Promote);
AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
setOperationAction(ISD::LOAD , VT, Promote);
AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
setOperationAction(ISD::STORE, VT, Promote);
AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
// No other operations are legal.
setOperationAction(ISD::MUL , VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
setTruncStoreAction(VT, InnerVT, Expand);
}
setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
setOperationAction(ISD::AND , MVT::v4i32, Legal);
setOperationAction(ISD::OR , MVT::v4i32, Legal);
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
if (TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
// Altivec does not contain unordered floating-point compare instructions
setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
}
if (Subtarget->has64BitSupport()) {
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
}
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
setBooleanContents(ZeroOrOneBooleanContent);
// Altivec instructions set fields to all zeros or all ones.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (isPPC64) {
setStackPointerRegisterToSaveRestore(PPC::X1);
setExceptionPointerRegister(PPC::X3);
setExceptionSelectorRegister(PPC::X4);
} else {
setStackPointerRegisterToSaveRestore(PPC::R1);
setExceptionPointerRegister(PPC::R3);
setExceptionSelectorRegister(PPC::R4);
}
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
// Use reciprocal estimates.
if (TM.Options.UnsafeFPMath) {
setTargetDAGCombine(ISD::FDIV);
setTargetDAGCombine(ISD::FSQRT);
}
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget->isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
}
setMinFunctionAlignment(2);
if (PPCSubTarget.isDarwin())
setPrefFunctionAlignment(4);
if (isPPC64 && Subtarget->isJITCodeModel())
// Temporary workaround for the inability of PPC64 JIT to handle jump
// tables.
setSupportJumpTables(false);
setInsertFencesForAtomic(true);
if (Subtarget->enableMachineScheduler())
setSchedulingPreference(Sched::Source);
else
setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties();
// The Freescale cores does better with aggressive inlining of memcpy and
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemsetOptSize = 16;
MaxStoresPerMemcpy = 32;
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
setPrefFunctionAlignment(4);
}
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
unsigned MaxMaxAlign) {
if (MaxAlign == MaxMaxAlign)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
MaxAlign = 32;
else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
MaxAlign = 16;
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned EltAlign = 0;
getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == MaxMaxAlign)
break;
}
}
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
// Darwin passes everything on 4 byte boundary.
if (PPCSubTarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
return Align;
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
case PPCISD::FSEL: return "PPCISD::FSEL";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE";
case PPCISD::LOAD: return "PPCISD::LOAD";
case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LARX: return "PPCISD::LARX";
case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::BDNZ: return "PPCISD::BDNZ";
case PPCISD::BDZ: return "PPCISD::BDZ";
case PPCISD::MFFS: return "PPCISD::MFFS";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
case PPCISD::CR6SET: return "PPCISD::CR6SET";
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
case PPCISD::SC: return "PPCISD::SC";
}
}
EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
//===----------------------------------------------------------------------===//
// Node matching predicates, for use by the tblgen matching code.
//===----------------------------------------------------------------------===//
/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
static bool isFloatingPointZero(SDValue Op) {
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
return CFP->getValueAPF().isZero();
else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
// Maybe this has already been legalized into the constant pool?
if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
return CFP->getValueAPF().isZero();
}
return false;
}
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
/// true if Op is undef or if it matches the specified value.
static bool isConstantOrUndef(int Op, int Val) {
return Op < 0 || Op == Val;
}
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
if (!isUnary) {
for (unsigned i = 0; i != 16; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
return false;
} else {
for (unsigned i = 0; i != 8; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
return false;
}
return true;
}
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
if (!isUnary) {
for (unsigned i = 0; i != 16; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
return false;
} else {
for (unsigned i = 0; i != 8; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
return false;
}
return true;
}
/// isVMerge - Common function, used to match vmrg* shuffles.
///
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned LHSStart, unsigned RHSStart) {
assert(N->getValueType(0) == MVT::v16i8 &&
"PPC only supports shuffles by bytes!");
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!");
for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
LHSStart+j+i*UnitSize) ||
!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
RHSStart+j+i*UnitSize))
return false;
}
return true;
}
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary) {
if (!isUnary)
return isVMerge(N, UnitSize, 8, 24);
return isVMerge(N, UnitSize, 8, 8);
}
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary) {
if (!isUnary)
return isVMerge(N, UnitSize, 0, 16);
return isVMerge(N, UnitSize, 0, 0);
}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
assert(N->getValueType(0) == MVT::v16i8 &&
"PPC only supports shuffles by bytes!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
// Find the first non-undef value in the shuffle mask.
unsigned i;
for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
/*search*/;
if (i == 16) return -1; // all undef.
// Otherwise, check to see if the rest of the elements are consecutively
// numbered from this value.
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
if (!isUnary) {
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
return -1;
} else {
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
return -1;
}
return ShiftAmt;
}
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW.
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
assert(N->getValueType(0) == MVT::v16i8 &&
(EltSize == 1 || EltSize == 2 || EltSize == 4));
// This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector.
unsigned ElementBase = N->getMaskElt(0);
// FIXME: Handle UNDEF elements too!
if (ElementBase >= 16)
return false;
// Check that the indices are consecutive, in the case of a multi-byte element
// splatted with a v16i8 mask.
for (unsigned i = 1; i != EltSize; ++i)
if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
return false;
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
if (N->getMaskElt(i) < 0) continue;
for (unsigned j = 0; j != EltSize; ++j)
if (N->getMaskElt(i+j) != N->getMaskElt(j))
return false;
}
return true;
}
/// isAllNegativeZeroVector - Returns true if all elements of build_vector
/// are -0.0.
bool PPC::isAllNegativeZeroVector(SDNode *N) {
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
APInt APVal, APUndef;
unsigned BitSize;
bool HasAnyUndefs;
if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
return CFP->getValueAPF().isNegZero();
return false;
}
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
return SVOp->getMaskElt(0) / EltSize;
}
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
/// by using a vspltis[bhw] instruction of the specified element size, return
/// the constant being splatted. The ByteSize field indicates the number of
/// bytes of each element [124] -> [bhw].
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
SDValue OpVal(0, 0);
// If ByteSize of the splat is bigger than the element size of the
// build_vector, then we have a case where we are checking for a splat where
// multiple elements of the buildvector are folded together into a single
// logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
unsigned EltSize = 16/N->getNumOperands();
if (EltSize < ByteSize) {
unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
SDValue UniquedVals[4];
assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
// See if all of the elements in the buildvector agree across.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
// If the element isn't a constant, bail fully out.
if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
if (UniquedVals[i&(Multiple-1)].getNode() == 0)
UniquedVals[i&(Multiple-1)] = N->getOperand(i);
else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
return SDValue(); // no match.
}
// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
// either constant or undef values that are identical for each chunk. See
// if these chunks can form into a larger vspltis*.
// Check to see if all of the leading entries are either 0 or -1. If
// neither, then this won't fit into the immediate field.
bool LeadingZero = true;
bool LeadingOnes = true;
for (unsigned i = 0; i != Multiple-1; ++i) {
if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
}
// Finally, check the least significant entry.
if (LeadingZero) {
if (UniquedVals[Multiple-1].getNode() == 0)
return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
if (Val < 16)
return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
}
if (LeadingOnes) {
if (UniquedVals[Multiple-1].getNode() == 0)
return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
return DAG.getTargetConstant(Val, MVT::i32);
}
return SDValue();
}
// Check to see if this buildvec has a single non-undef value in its elements.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
if (OpVal.getNode() == 0)
OpVal = N->getOperand(i);
else if (OpVal != N->getOperand(i))
return SDValue();
}
if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
unsigned ValSizeInBytes = EltSize;
uint64_t Value = 0;
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
Value = CN->getZExtValue();
} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
Value = FloatToBits(CN->getValueAPF().convertToFloat());
}
// If the splat value is larger than the element value, then we can never do
// this splat. The only case that we could fit the replicated bits into our
// immediate field for would be zero, and we prefer to use vxor for it.
if (ValSizeInBytes < ByteSize) return SDValue();
// If the element value is larger than the splat value, cut it in half and
// check to see if the two halves are equal. Continue doing this until we
// get to ByteSize. This allows us to handle 0x01010101 as 0x01.
while (ValSizeInBytes > ByteSize) {
ValSizeInBytes >>= 1;
// If the top half equals the bottom half, we're still ok.
if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
(Value & ((1 << (8*ValSizeInBytes))-1)))
return SDValue();
}
// Properly sign extend the value.
int MaskVal = SignExtend32(Value, ByteSize * 8);
// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
if (MaskVal == 0) return SDValue();
// Finally, if this value fits in a 5 bit sext field, return it
if (SignExtend32<5>(MaskVal) == MaskVal)
return DAG.getTargetConstant(MaskVal, MVT::i32);
return SDValue();
}
//===----------------------------------------------------------------------===//
// Addressing Mode Selection
//===----------------------------------------------------------------------===//
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
/// or 64-bit immediate, and if the value can be accurately represented as a
/// sign extension from a 16-bit value. If so, this returns true and the
/// immediate.
static bool isIntS16Immediate(SDNode *N, short &Imm) {
if (N->getOpcode() != ISD::Constant)
return false;
Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
if (N->getValueType(0) == MVT::i32)
return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
else
return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
}
static bool isIntS16Immediate(SDValue Op, short &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
/// can be more efficiently represented with [r+imm].
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
SDValue &Index,
SelectionDAG &DAG) const {
short imm = 0;
if (N.getOpcode() == ISD::ADD) {
if (isIntS16Immediate(N.getOperand(1), imm))
return false; // r+i
if (N.getOperand(1).getOpcode() == PPCISD::Lo)
return false; // r+i
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
} else if (N.getOpcode() == ISD::OR) {
if (isIntS16Immediate(N.getOperand(1), imm))
return false; // r+i can fold it if we can.
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are provably
// disjoint.
APInt LHSKnownZero, LHSKnownOne;
APInt RHSKnownZero, RHSKnownOne;
DAG.ComputeMaskedBits(N.getOperand(0),
LHSKnownZero, LHSKnownOne);
if (LHSKnownZero.getBoolValue()) {
DAG.ComputeMaskedBits(N.getOperand(1),
RHSKnownZero, RHSKnownOne);
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
if (~(LHSKnownZero | RHSKnownZero) == 0) {
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
}
}
}
return false;
}
// If we happen to be doing an i64 load or store into a stack slot that has
// less than a 4-byte alignment, then the frame-index elimination may need to
// use an indexed load or store instruction (because the offset may not be a
// multiple of 4). The extra register needed to hold the offset comes from the
// register scavenger, and it is possible that the scavenger will need to use
// an emergency spill slot. As a result, we need to make sure that a spill slot
// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
// stack slot.
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
// FIXME: This does not handle the LWA case.
if (VT != MVT::i64)
return;
// NOTE: We'll exclude negative FIs here, which come from argument
// lowering, because there are no known test cases triggering this problem
// using packed structures (or similar). We can remove this exclusion if
// we find such a test case. The reason why this is so test-case driven is
// because this entire 'fixup' is only to prevent crashes (from the
// register scavenger) on not-really-valid inputs. For example, if we have:
// %a = alloca i1
// %b = bitcast i1* %a to i64*
// store i64* a, i64 b
// then the store should really be marked as 'align 1', but is not. If it
// were marked as 'align 1' then the indexed form would have been
// instruction-selected initially, and the problem this 'fixup' is preventing
// won't happen regardless.
if (FrameIdx < 0)
return;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Align = MFI->getObjectAlignment(FrameIdx);
if (Align >= 4)
return;
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setHasNonRISpills();
}
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
/// represented as reg+reg. If Aligned is true, only accept displacements
/// suitable for STD and friends, i.e. multiples of 4.
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
SDValue &Base,
SelectionDAG &DAG,
bool Aligned) const {
// FIXME dl should come from parent load or store, not from address
SDLoc dl(N);
// If this can be more profitably realized as r+r, fail.
if (SelectAddressRegReg(N, Disp, Base, DAG))
return false;
if (N.getOpcode() == ISD::ADD) {
short imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
(!Aligned || (imm & 3) == 0)) {
Disp = DAG.getTargetConstant(imm, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else {
Base = N.getOperand(0);
}
return true; // [r+i]
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))).
assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
Disp = N.getOperand(1).getOperand(0); // The global address.
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
Disp.getOpcode() == ISD::TargetConstantPool ||
Disp.getOpcode() == ISD::TargetJumpTable);
Base = N.getOperand(0);
return true; // [&g+r]
}
} else if (N.getOpcode() == ISD::OR) {
short imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
(!Aligned || (imm & 3) == 0)) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
APInt LHSKnownZero, LHSKnownOne;
DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
Base = N.getOperand(0);
Disp = DAG.getTargetConstant(imm, N.getValueType());
return true;
}
}
} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
// Loading from a constant address.
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0"
short Imm;
if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
// Handle 32-bit sext immediates with LIS + addr mode.
if ((CN->getValueType(0) == MVT::i32 ||
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
(!Aligned || (CN->getZExtValue() & 3) == 0)) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
return true;
}
}
Disp = DAG.getTargetConstant(0, getPointerTy());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else
Base = N;
return true; // [r+0]
}
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
SDValue &Index,
SelectionDAG &DAG) const {
// Check to see if we can easily represent this as an [r+r] address. This
// will fail if it thinks that the address is more profitably represented as
// reg+imm, e.g. where imm = 0.
if (SelectAddressRegReg(N, Base, Index, DAG))
return true;
// If the operand is an addition, always emit this as [r+r], since this is
// better (for code size, and execution, as the memop does the add for free)
// than emitting an explicit add.
if (N.getOpcode() == ISD::ADD) {
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
}
// Otherwise, do it the hard way, using R0 as the base register.
Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
}
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
if (DisablePPCPreinc) return false;
bool isLoad = true;
SDValue Ptr;
EVT VT;
unsigned Alignment;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
Alignment = LD->getAlignment();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
Alignment = ST->getAlignment();
isLoad = false;
} else
return false;
// PowerPC doesn't have preinc load/store instructions for vectors.
if (VT.isVector())
return false;
if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
// Common code will reject creating a pre-inc form if the base pointer
// is a frame index, or if N is a store and the base pointer is either
// the same as or a predecessor of the value being stored. Check for
// those situations here, and try with swapped Base/Offset instead.
bool Swap = false;
if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
Swap = true;
else if (!isLoad) {
SDValue Val = cast<StoreSDNode>(N)->getValue();
if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
Swap = true;
}
if (Swap)
std::swap(Base, Offset);
AM = ISD::PRE_INC;
return true;
}
// LDU/STU can only handle immediates that are a multiple of 4.
if (VT != MVT::i64) {
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
return false;
} else {
// LDU/STU need an address with at least 4-byte alignment.
if (Alignment < 4)
return false;
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
return false;
}
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
// PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
// sext i32 to i64 when addr mode is r+i.
if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
LD->getExtensionType() == ISD::SEXTLOAD &&
isa<ConstantSDNode>(Offset))
return false;
}
AM = ISD::PRE_INC;
return true;
}
//===----------------------------------------------------------------------===//
// LowerOperation implementation
//===----------------------------------------------------------------------===//
/// GetLabelAccessInfo - Return true if we should reference labels using a
/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
unsigned &LoOpFlags, const GlobalValue *GV = 0) {
HiOpFlags = PPCII::MO_HA;
LoOpFlags = PPCII::MO_LO;
// Don't use the pic base if not in PIC relocation model. Or if we are on a
// non-darwin platform. We don't support PIC on other platforms yet.
bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
TM.getSubtarget<PPCSubtarget>().isDarwin();
if (isPIC) {
HiOpFlags |= PPCII::MO_PIC_FLAG;
LoOpFlags |= PPCII::MO_PIC_FLAG;
}
// If this is a reference to a global value that requires a non-lazy-ptr, make
// sure that instruction lowering adds it.
if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
HiOpFlags |= PPCII::MO_NLP_FLAG;
LoOpFlags |= PPCII::MO_NLP_FLAG;
if (GV->hasHiddenVisibility()) {
HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
}
}
return isPIC;
}
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
SelectionDAG &DAG) {
EVT PtrVT = HiPart.getValueType();
SDValue Zero = DAG.getConstant(0, PtrVT);
SDLoc DL(HiPart);
SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
// With PIC, the first instruction is actually "GR+hi(&G)".
if (isPIC)
Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
// Generate non-pic code that has direct accesses to the constant pool.
// The address of the global is just (hi(&g)+lo(&g)).
return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
}
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
const Constant *C = CP->getConstVal();
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue CPIHi =
DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
SDValue CPILo =
DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
}
SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
}
SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
// FIXME: TLS addresses currently use medium model code sequences,
// which is the most useful form. Eventually support for small and
// large models could be added if users need it, at the cost of
// additional complexity.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
bool is64bit = PPCSubTarget.isPPC64();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
if (Model == TLSModel::LocalExec) {
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TPREL_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TPREL_LO);
SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
is64bit ? MVT::i64 : MVT::i32);
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
}
if (!is64bit)
llvm_unreachable("only local-exec is currently supported for ppc32");
if (Model == TLSModel::InitialExec) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TLS);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
PtrVT, GOTReg, TGA);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
PtrVT, TGA, TPOffsetHi);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
}
if (Model == TLSModel::GeneralDynamic) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
GOTReg, TGA);
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
GOTEntryHi, TGA);
// We need a chain node, and don't have one handy. The underlying
// call has no side effects, so using the function entry node
// suffices.
SDValue Chain = DAG.getEntryNode();
Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
PtrVT, ParmReg, TGA);
// The return value from GET_TLS_ADDR really is in X3 already, but
// some hacks are needed here to tie everything together. The extra
// copies dissolve during subsequent transforms.
Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
}
if (Model == TLSModel::LocalDynamic) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
GOTReg, TGA);
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
GOTEntryHi, TGA);
// We need a chain node, and don't have one handy. The underlying
// call has no side effects, so using the function entry node
// suffices.
SDValue Chain = DAG.getEntryNode();
Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
PtrVT, ParmReg, TGA);
// The return value from GET_TLSLD_ADDR really is in X3 already, but
// some hacks are needed here to tie everything together. The extra
// copies dissolve during subsequent transforms.
Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
Chain, ParmReg, TGA);
return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
}
llvm_unreachable("Unknown TLS model!");
}
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
SDLoc DL(GSDN);
const GlobalValue *GV = GSDN->getGlobal();
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
SDValue GAHi =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
SDValue GALo =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
// If the global reference is actually to a non-lazy-pointer, we have to do an
// extra load to get the address of the global.
if (MOHiFlag & PPCII::MO_NLP_FLAG)
Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
false, false, false, 0);
return Ptr;
}
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
// If we're comparing for equality to zero, expose the fact that this is
// implented as a ctlz/srl pair on ppc, so that the dag combiner can
// fold the new nodes.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
if (C->isNullValue() && CC == ISD::SETEQ) {
EVT VT = Op.getOperand(0).getValueType();
SDValue Zext = Op.getOperand(0);
if (VT.bitsLT(MVT::i32)) {
VT = MVT::i32;
Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
}
unsigned Log2b = Log2_32(VT.getSizeInBits());
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
DAG.getConstant(Log2b, MVT::i32));
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
}
// Leave comparisons against 0 and -1 alone for now, since they're usually
// optimized. FIXME: revisit this when we can custom lower all setcc
// optimizations.
if (C->isAllOnesValue() || C->isNullValue())
return SDValue();
}
// If we have an integer seteq/setne, turn it into a compare against zero
// by xor'ing the rhs with the lhs, which is faster than setting a
// condition register, reading it back out, and masking the correct bit. The
// normal approach here uses sub to do this instead of xor. Using xor exposes
// the result to other bit-twiddling opportunities.
EVT LHSVT = Op.getOperand(0).getValueType();
if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
EVT VT = Op.getValueType();
SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
Op.getOperand(1));
return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
}
return SDValue();
}
SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
SDNode *Node = Op.getNode();
EVT VT = Node->getValueType(0);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue InChain = Node->getOperand(0);
SDValue VAListPtr = Node->getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
SDLoc dl(Node);
assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
// gpr_index
SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
VAListPtr, MachinePointerInfo(SV), MVT::i8,
false, false, 0);
InChain = GprIndex.getValue(1);
if (VT == MVT::i64) {
// Check if GprIndex is even
SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
DAG.getConstant(1, MVT::i32));
SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
DAG.getConstant(0, MVT::i32), ISD::SETNE);
SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
DAG.getConstant(1, MVT::i32));
// Align GprIndex to be even if it isn't
GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
GprIndex);
}
// fpr index is 1 byte after gpr
SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
DAG.getConstant(1, MVT::i32));
// fpr
SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
FprPtr, MachinePointerInfo(SV), MVT::i8,
false, false, 0);
InChain = FprIndex.getValue(1);
SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
DAG.getConstant(8, MVT::i32));
SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
DAG.getConstant(4, MVT::i32));
// areas
SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
MachinePointerInfo(), false, false,
false, 0);
InChain = OverflowArea.getValue(1);
SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
MachinePointerInfo(), false, false,
false, 0);
InChain = RegSaveArea.getValue(1);
// select overflow_area if index > 8
SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
DAG.getConstant(8, MVT::i32), ISD::SETLT);
// adjustment constant gpr_index * 4/8
SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
VT.isInteger() ? GprIndex : FprIndex,
DAG.getConstant(VT.isInteger() ? 4 : 8,
MVT::i32));
// OurReg = RegSaveArea + RegConstant
SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
RegConstant);
// Floating types are 32 bytes into RegSaveArea
if (VT.isFloatingPoint())
OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
DAG.getConstant(32, MVT::i32));
// increase {f,g}pr_index by 1 (or 2 if VT is i64)
SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
VT.isInteger() ? GprIndex : FprIndex,
DAG.getConstant(VT == MVT::i64 ? 2 : 1,
MVT::i32));
InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
VT.isInteger() ? VAListPtr : FprPtr,
MachinePointerInfo(SV),
MVT::i8, false, false, 0);
// determine if we should load from reg_save_area or overflow_area
SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
// increase overflow_area by 4/8 if gpr/fpr > 8
SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
DAG.getConstant(VT.isInteger() ? 4 : 8,
MVT::i32));
OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
OverflowAreaPlusN);
InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
OverflowAreaPtr,
MachinePointerInfo(),
MVT::i32, false, false, 0);
return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
false, false, false, 0);
}
SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
// We have to copy the entire va_list struct:
// 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
return DAG.getMemcpy(Op.getOperand(0), Op,
Op.getOperand(1), Op.getOperand(2),
DAG.getConstant(12, MVT::i32), 8, false, true,
MachinePointerInfo(), MachinePointerInfo());
}
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
return Op.getOperand(0);
}
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
SDLoc dl(Op);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
Type *IntPtrTy =
DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Ty = IntPtrTy;
Entry.Node = Trmp; Args.push_back(Entry);
// TrampSize == (isPPC64 ? 48 : 40);
Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
isPPC64 ? MVT::i64 : MVT::i32);
Args.push_back(Entry);
Entry.Node = FPtr; Args.push_back(Entry);
Entry.Node = Nest; Args.push_back(Entry);
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
TargetLowering::CallLoweringInfo CLI(Chain,
Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0,
CallingConv::C,
/*isTailCall=*/false,
/*doesNotRet=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
Args, DAG, dl);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}
SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
MachineFunction &MF = DAG.getMachineFunction();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
SDLoc dl(Op);
if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
MachinePointerInfo(SV),
false, false, 0);
}
// For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
// We suppose the given va_list is already allocated.
//
// typedef struct {
// char gpr; /* index into the array of 8 GPRs
// * stored in the register save area
// * gpr=0 corresponds to r3,
// * gpr=1 to r4, etc.
// */
// char fpr; /* index into the array of 8 FPRs
// * stored in the register save area
// * fpr=0 corresponds to f1,
// * fpr=1 to f2, etc.
// */
// char *overflow_arg_area;
// /* location on stack that holds
// * the next overflow argument
// */
// char *reg_save_area;
// /* where r3:r10 and f1:f8 (if saved)
// * are stored
// */
// } va_list[1];
SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
PtrVT);
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
PtrVT);
uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
uint64_t FPROffset = 1;
SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
// Store first byte : number of int regs
SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
Op.getOperand(1),
MachinePointerInfo(SV),
MVT::i8, false, false, 0);
uint64_t nextOffset = FPROffset;
SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
ConstFPROffset);
// Store second byte : number of float regs
SDValue secondStore =
DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
MachinePointerInfo(SV, nextOffset), MVT::i8,
false, false, 0);
nextOffset += StackOffset;
nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
// Store second word : arguments given on stack
SDValue thirdStore =
DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
MachinePointerInfo(SV, nextOffset),
false, false, 0);
nextOffset += FrameOffset;
nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
// Store third word : arguments given in registers
return DAG.getStore(thirdStore, dl, FR, nextPtr,
MachinePointerInfo(SV, nextOffset),
false, false, 0);
}
#include "PPCGenCallingConv.inc"
// Function whose sole purpose is to kill compiler warnings
// stemming from unused functions included from PPCGenCallingConv.inc.
CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
}
bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
return true;
}
bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
const unsigned NumArgRegs = array_lengthof(ArgRegs);
unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
// Skip one register if the first unallocated register has an even register
// number and there are still argument registers available which have not been
// allocated yet. RegNum is actually an index into ArgRegs, which means we
// need to skip a register if RegNum is odd.
if (RegNum != NumArgRegs && RegNum % 2 == 1) {
State.AllocateReg(ArgRegs[RegNum]);
}
// Always return false here, as this function only makes sure that the first
// unallocated register has an odd register number and does not actually
// allocate a register for the current argument.
return false;
}
bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
const unsigned NumArgRegs = array_lengthof(ArgRegs);
unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
// If there is only one Floating-point register left we need to put both f64
// values of a split ppc_fp128 value on the stack.
if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
State.AllocateReg(ArgRegs[RegNum]);
}
// Always return false here, as this function only makes sure that the two f64
// values a ppc_fp128 value is split into are both passed in registers or both
// passed on the stack and does not actually allocate a register for the
// current argument.
return false;
}
/// GetFPR - Get the set of FP registers that should be allocated for arguments,
/// on Darwin.
static const uint16_t *GetFPR() {
static const uint16_t FPR[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
};
return FPR;
}
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
/// the stack.
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
unsigned PtrByteSize) {
unsigned ArgSize = ArgVT.getSizeInBits()/8;
if (Flags.isByVal())
ArgSize = Flags.getByValSize();
ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
return ArgSize;
}
SDValue
PPCTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
if (PPCSubTarget.isSVR4ABI()) {
if (PPCSubTarget.isPPC64())
return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
dl, DAG, InVals);
else
return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
dl, DAG, InVals);
} else {
return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
dl, DAG, InVals);
}
}
SDValue
PPCTargetLowering::LowerFormalArguments_32SVR4(
SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// 32-bit SVR4 ABI Stack Frame Layout:
// +-----------------------------------+
// +--> | Back chain |
// | +-----------------------------------+
// | | Floating-point register save area |
// | +-----------------------------------+
// | | General register save area |
// | +-----------------------------------+
// | | CR save word |
// | +-----------------------------------+
// | | VRSAVE save word |
// | +-----------------------------------+
// | | Alignment padding |
// | +-----------------------------------+
// | | Vector register save area |
// | +-----------------------------------+
// | | Local variable space |
// | +-----------------------------------+
// | | Parameter list area |
// | +-----------------------------------+
// | | LR save word |
// | +-----------------------------------+
// SP--> +--- | Back chain |
// +-----------------------------------+
//
// Specifications:
// System V Application Binary Interface PowerPC Processor Supplement
// AltiVec Technology Programming Interface Manual
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = 4;
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// Arguments stored in registers.
if (VA.isRegLoc()) {
const TargetRegisterClass *RC;
EVT ValVT = VA.getValVT();
switch (ValVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("ValVT not supported by formal arguments Lowering");
case MVT::i32:
RC = &PPC::GPRCRegClass;
break;
case MVT::f32:
RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
RC = &PPC::F8RCRegClass;
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v4f32:
RC = &PPC::VRRCRegClass;
break;
}
// Transform the arguments stored in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
InVals.push_back(ArgValue);
} else {
// Argument stored in memory.
assert(VA.isMemLoc());
unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
isImmutable);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
MachinePointerInfo(),
false, false, false, 0));
}
}
// Assign locations to all of the incoming aggregate by value arguments.
// Aggregates passed by value are stored in the local variable space of the
// caller's stack frame, right above the parameter list area.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
// Set the size that is at least reserved in caller of this function. Tail
// call optimized function's reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
MinReservedArea =
std::max(MinReservedArea,
PPCFrameLowering::getMinCallFrameSize(false, false));
unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
getStackAlignment();
unsigned AlignMask = TargetAlign-1;
MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
FI->setMinReservedArea(MinReservedArea);
SmallVector<SDValue, 8> MemOps;
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
static const uint16_t GPArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
static const uint16_t FPArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
NumGPArgRegs));
FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
NumFPArgRegs));
// Make room for NumGPArgRegs and NumFPArgRegs.
int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
FuncInfo->setVarArgsStackOffset(
MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
CCInfo.getNextStackOffset(), true));
FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// The fixed integer arguments of a variadic function are stored to the
// VarArgsFrameIndex on the stack so that they may be loaded by deferencing
// the result of va_next.
for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
// Get an existing live-in vreg, or add a new one.
unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
if (!VReg)
VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
// FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
// is set.
// The double arguments are stored to the VarArgsFrameIndex
// on the stack.
for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
// Get an existing live-in vreg, or add a new one.
unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
if (!VReg)
VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by eight for the next argument to store
SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl,
MVT::Other, &MemOps[0], MemOps.size());
return Chain;
}
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
SDValue
PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
SelectionDAG &DAG, SDValue ArgVal,
SDLoc dl) const {
if (Flags.isSExt())
ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
DAG.getValueType(ObjectVT));
else if (Flags.isZExt())
ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
DAG.getValueType(ObjectVT));
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
}
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
void
PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
unsigned nAltivecParamsAtEnd,
unsigned MinReservedArea,
bool isPPC64) const {
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
// Add the Altivec parameters at the end, if needed.
if (nAltivecParamsAtEnd) {
MinReservedArea = ((MinReservedArea+15)/16)*16;
MinReservedArea += 16*nAltivecParamsAtEnd;
}
MinReservedArea =
std::max(MinReservedArea,
PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
unsigned TargetAlign
= DAG.getMachineFunction().getTarget().getFrameLowering()->
getStackAlignment();
unsigned AlignMask = TargetAlign-1;
MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
FI->setMinReservedArea(MinReservedArea);
}
SDValue
PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = 8;
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
static const uint16_t GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
static const uint16_t *FPR = GetFPR();
static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
// although the first ones are often in registers.
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[ArgNo].OrigArgIndex;
unsigned CurArgOffset = ArgOffset;
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
if (isVarArg) {
MinReservedArea = ((MinReservedArea+15)/16)*16;
MinReservedArea += CalculateStackSlotSize(ObjectVT,
Flags,
PtrByteSize);
} else
nAltivecParamsAtEnd++;
} else
// Calculate min reserved area.
MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
Flags,
PtrByteSize);
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
// Empty aggregate parameters do not take up registers. Examples:
// struct { } a;
// union { } b;
// int c[0];
// etc. However, we have to provide a place-holder in InVals, so
// pretend we have an 8-byte item at the current address for that
// purpose.
if (!ObjSize) {
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
continue;
}
unsigned BVAlign = Flags.getByValAlign();
if (BVAlign > 8) {
ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
CurArgOffset = ArgOffset;
}
// All aggregates smaller than 8 bytes must be passed right-justified.
if (ObjSize < PtrByteSize)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
// The value of the object is its address.
int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
if (ObjSize < 8) {
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store;
if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
EVT ObjType = (ObjSize == 1 ? MVT::i8 :
(ObjSize == 2 ? MVT::i16 : MVT::i32));
Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, CurArgOffset),
+ MachinePointerInfo(FuncArg),
ObjType, false, false, 0);
} else {
// For sizes that don't fit a truncating store (3, 5, 6, 7),
// store the whole register as-is to the parameter save area
// slot. The address of the parameter was already calculated
// above (InVals.push_back(FIN)) to be the right-justified
// offset within the slot. For this store, we need a new
// frame index that points at the beginning of the slot.
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, ArgOffset),
+ MachinePointerInfo(FuncArg),
false, false, 0);
}
MemOps.push_back(Store);
++GPR_idx;
}
// Whether we copied from a register or not, advance the offset
// into the parameter save area by a full doubleword.
ArgOffset += PtrByteSize;
continue;
}
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
// Store whatever pieces of the object are in registers
// to memory. ArgOffset will be the address of the beginning
// of the object.
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg;
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, ArgOffset),
+ MachinePointerInfo(FuncArg, j),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
ArgOffset += PtrByteSize;
} else {
ArgOffset += ArgSize - j;
break;
}
}
continue;
}
switch (ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
case MVT::i32:
case MVT::i64:
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::i32)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
++GPR_idx;
} else {
needsLoad = true;
ArgSize = PtrByteSize;
}
ArgOffset += 8;
break;
case MVT::f32:
case MVT::f64:
// Every 8 bytes of argument space consumes one of the GPRs available for
// argument passing.
if (GPR_idx != Num_GPR_Regs) {
++GPR_idx;
}
if (FPR_idx != Num_FPR_Regs) {
unsigned VReg;
if (ObjectVT == MVT::f32)
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
} else {
needsLoad = true;
ArgSize = PtrByteSize;
}
ArgOffset += 8;
break;
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
// Note that vector arguments in registers don't reserve stack space,
// except in varargs functions.
if (VR_idx != Num_VR_Regs) {
unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
if (isVarArg) {
while ((ArgOffset % 16) != 0) {
ArgOffset += PtrByteSize;
if (GPR_idx != Num_GPR_Regs)
GPR_idx++;
}
ArgOffset += 16;
GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
}
++VR_idx;
} else {
// Vectors are aligned.
ArgOffset = ((ArgOffset+15)/16)*16;
CurArgOffset = ArgOffset;
ArgOffset += 16;
needsLoad = true;
}
break;
}
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type.
if (needsLoad) {
int FI = MFI->CreateFixedObject(ObjSize,
CurArgOffset + (ArgSize - ObjSize),
isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
false, false, false, 0);
}
InVals.push_back(ArgVal);
}
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
int Depth = ArgOffset;
FuncInfo->setVarArgsFrameIndex(
MFI->CreateFixedObject(PtrByteSize, Depth, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// If this function is vararg, store any remaining integer argument regs
// to their spots on the stack so that they may be loaded by deferencing the
// result of va_next.
for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl,
MVT::Other, &MemOps[0], MemOps.size());
return Chain;
}
SDValue
PPCTargetLowering::LowerFormalArguments_Darwin(
SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
static const uint16_t GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
static const uint16_t GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
static const uint16_t *FPR = GetFPR();
static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof( VR);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
// In 32-bit non-varargs functions, the stack space for vectors is after the
// stack space for non-vectors. We do not use this space unless we have
// too many vectors to fit in registers, something that only occurs in
// constructed examples:), but we have to walk the arglist to figure
// that out...for the pathological case, compute VecArgOffset as the
// start of the vector parameter area. Computing VecArgOffset is the
// entire point of the following loop.
unsigned VecArgOffset = ArgOffset;
if (!isVarArg && !isPPC64) {
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
++ArgNo) {
EVT ObjectVT = Ins[ArgNo].VT;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
if (Flags.isByVal()) {
// ObjSize is the true size, ArgSize rounded up to multiple of regs.
unsigned ObjSize = Flags.getByValSize();
unsigned ArgSize =
((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
VecArgOffset += ArgSize;
continue;
}
switch(ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
case MVT::i32:
case MVT::f32:
VecArgOffset += 4;
break;
case MVT::i64: // PPC64
case MVT::f64:
// FIXME: We are guaranteed to be !isPPC64 at this point.
// Does MVT::i64 apply?
VecArgOffset += 8;
break;
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
// Nothing to do, we're only looking at Nonvector args here.
break;
}
}
}
// We've found where the vector parameter area in memory is. Skip the
// first 12 parameters; these don't use that memory.
VecArgOffset = ((VecArgOffset+15)/16)*16;
VecArgOffset += 12*16;
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
// although the first ones are often in registers.
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[ArgNo].OrigArgIndex;
unsigned CurArgOffset = ArgOffset;
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
if (isVarArg || isPPC64) {
MinReservedArea = ((MinReservedArea+15)/16)*16;
MinReservedArea += CalculateStackSlotSize(ObjectVT,
Flags,
PtrByteSize);
} else nAltivecParamsAtEnd++;
} else
// Calculate min reserved area.
MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
Flags,
PtrByteSize);
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
// Objects of size 1 and 2 are right justified, everything else is
// left justified. This means the memory address is adjusted forwards.
if (ObjSize==1 || ObjSize==2) {
CurArgOffset = CurArgOffset + (4 - ObjSize);
}
// The value of the object is its address.
int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg;
if (isPPC64)
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
else
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg,
- CurArgOffset),
+ MachinePointerInfo(FuncArg),
ObjType, false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
}
ArgOffset += PtrByteSize;
continue;
}
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
// Store whatever pieces of the object are in registers
// to memory. ArgOffset will be the address of the beginning
// of the object.
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg;
if (isPPC64)
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
else
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, ArgOffset),
+ MachinePointerInfo(FuncArg, j),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
ArgOffset += PtrByteSize;
} else {
ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
break;
}
}
continue;
}
switch (ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
case MVT::i32:
if (!isPPC64) {
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
++GPR_idx;
} else {
needsLoad = true;
ArgSize = PtrByteSize;
}
// All int arguments reserve stack space in the Darwin ABI.
ArgOffset += PtrByteSize;
break;
}
// FALLTHROUGH
case MVT::i64: // PPC64
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::i32)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
++GPR_idx;
} else {
needsLoad = true;
ArgSize = PtrByteSize;
}
// All int arguments reserve stack space in the Darwin ABI.
ArgOffset += 8;
break;
case MVT::f32:
case MVT::f64:
// Every 4 bytes of argument space consumes one of the GPRs available for
// argument passing.
if (GPR_idx != Num_GPR_Regs) {
++GPR_idx;
if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
++GPR_idx;
}
if (FPR_idx != Num_FPR_Regs) {
unsigned VReg;
if (ObjectVT == MVT::f32)
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
} else {
needsLoad = true;
}
// All FP arguments reserve stack space in the Darwin ABI.
ArgOffset += isPPC64 ? 8 : ObjSize;
break;
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
// Note that vector arguments in registers don't reserve stack space,
// except in varargs functions.
if (VR_idx != Num_VR_Regs) {
unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
if (isVarArg) {
while ((ArgOffset % 16) != 0) {
ArgOffset += PtrByteSize;
if (GPR_idx != Num_GPR_Regs)
GPR_idx++;
}
ArgOffset += 16;
GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
}
++VR_idx;
} else {
if (!isVarArg && !isPPC64) {
// Vectors go after all the nonvectors.
CurArgOffset = VecArgOffset;
VecArgOffset += 16;
} else {
// Vectors are aligned.
ArgOffset = ((ArgOffset+15)/16)*16;
CurArgOffset = ArgOffset;
ArgOffset += 16;
}
needsLoad = true;
}
break;
}
// We need to load the argument to a virtual register if we determined above
// that we ran out of physical registers of the appropriate type.
if (needsLoad) {
int FI = MFI->CreateFixedObject(ObjSize,
CurArgOffset + (ArgSize - ObjSize),
isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
false, false, false, 0);
}
InVals.push_back(ArgVal);
}
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
int Depth = ArgOffset;
FuncInfo->setVarArgsFrameIndex(
MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
Depth, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// If this function is vararg, store any remaining integer argument regs
// to their spots on the stack so that they may be loaded by deferencing the
// result of va_next.
for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
unsigned VReg;
if (isPPC64)
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
else
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl,
MVT::Other, &MemOps[0], MemOps.size());
return Chain;
}
/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
static unsigned
CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
bool isPPC64,
bool isVarArg,
unsigned CC,
const SmallVectorImpl<ISD::OutputArg>
&Outs,
const SmallVectorImpl<SDValue> &OutVals,
unsigned &nAltivecParamsAtEnd) {
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
unsigned NumOps = Outs.size();
unsigned PtrByteSize = isPPC64 ? 8 : 4;
// Add up all the space actually used.
// In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
// they all go in registers, but we must reserve stack space for them for
// possible use by the caller. In varargs or 64-bit calls, parameters are
// assigned stack space in order, with padding so Altivec parameters are
// 16-byte aligned.
nAltivecParamsAtEnd = 0;
for (unsigned i = 0; i != NumOps; ++i) {
ISD::ArgFlagsTy Flags = Outs[i].Flags;
EVT ArgVT = Outs[i].VT;
// Varargs Altivec parameters are padded to a 16 byte boundary.
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
if (!isVarArg && !isPPC64) {
// Non-varargs Altivec parameters go after all the non-Altivec
// parameters; handle those later so we know how much padding we need.
nAltivecParamsAtEnd++;
continue;
}
// Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
NumBytes = ((NumBytes+15)/16)*16;
}
NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
}
// Allow for Altivec parameters at the end, if needed.
if (nAltivecParamsAtEnd) {
NumBytes = ((NumBytes+15)/16)*16;
NumBytes += 16*nAltivecParamsAtEnd;
}
// The prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if its varargs.
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
NumBytes = std::max(NumBytes,
PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
// Tail call needs the stack to be aligned.
if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
unsigned TargetAlign = DAG.getMachineFunction().getTarget().
getFrameLowering()->getStackAlignment();
unsigned AlignMask = TargetAlign-1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
}
return NumBytes;
}
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
/// adjusted to accommodate the arguments for the tailcall.
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
unsigned ParamSize) {
if (!isTailCall) return 0;
PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
unsigned CallerMinReservedArea = FI->getMinReservedArea();
int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
// Remember only if the new adjustement is bigger.
if (SPDiff < FI->getTailCallSPDelta())
FI->setTailCallSPDelta(SPDiff);
return SPDiff;
}
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
bool
PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
if (!getTargetMachine().Options.GuaranteedTailCallOpt)
return false;
// Variable argument functions are not supported.
if (isVarArg)
return false;
MachineFunction &MF = DAG.getMachineFunction();
CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
// Functions containing by val parameters are not supported.
for (unsigned i = 0; i != Ins.size(); i++) {
ISD::ArgFlagsTy Flags = Ins[i].Flags;
if (Flags.isByVal()) return false;
}
// Non PIC/GOT tail calls are supported.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
return true;
// At the moment we can only do local tail calls (in same module, hidden
// or protected) if we are generating PIC.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
return G->getGlobal()->hasHiddenVisibility()
|| G->getGlobal()->hasProtectedVisibility();
}
return false;
}
/// isCallCompatibleAddress - Return the immediate to use if the specified
/// 32-bit value is representable in the immediate field of a BxA instruction.
static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
if (!C) return 0;
int Addr = C->getZExtValue();
if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
SignExtend32<26>(Addr) != Addr)
return 0; // Top 6 bits have to be sext of immediate.
return DAG.getConstant((int)C->getZExtValue() >> 2,
DAG.getTargetLoweringInfo().getPointerTy()).getNode();
}
namespace {
struct TailCallArgumentInfo {
SDValue Arg;
SDValue FrameIdxOp;
int FrameIdx;
TailCallArgumentInfo() : FrameIdx(0) {}
};
}
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static void
StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
SDValue Chain,
const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
SmallVectorImpl<SDValue> &MemOpChains,
SDLoc dl) {
for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
SDValue Arg = TailCallArgs[i].Arg;
SDValue FIN = TailCallArgs[i].FrameIdxOp;
int FI = TailCallArgs[i].FrameIdx;
// Store relative to framepointer.
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
/// the appropriate stack slot for the tail call optimized function call.
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
MachineFunction &MF,
SDValue Chain,
SDValue OldRetAddr,
SDValue OldFP,
int SPDiff,
bool isPPC64,
bool isDarwinABI,
SDLoc dl) {
if (SPDiff) {
// Calculate the new stack slot for the return address.
int SlotSize = isPPC64 ? 8 : 4;
int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
isDarwinABI);
int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
NewRetAddrLoc, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
MachinePointerInfo::getFixedStack(NewRetAddr),
false, false, 0);
// When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
// slot as the FP is never overwritten.
if (isDarwinABI) {
int NewFPLoc =
SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
true);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
MachinePointerInfo::getFixedStack(NewFPIdx),
false, false, 0);
}
}
return Chain;
}
/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
/// the position of the argument.
static void
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
SDValue Arg, int SPDiff, unsigned ArgOffset,
SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
int Offset = ArgOffset + SPDiff;
uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue FIN = DAG.getFrameIndex(FI, VT);
TailCallArgumentInfo Info;
Info.Arg = Arg;
Info.FrameIdxOp = FIN;
Info.FrameIdx = FI;
TailCallArguments.push_back(Info);
}
/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
/// stack slot. Returns the chain as result and the loaded frame pointers in
/// LROpOut/FPOpout. Used when tail calling.
SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
int SPDiff,
SDValue Chain,
SDValue &LROpOut,
SDValue &FPOpOut,
bool isDarwinABI,
SDLoc dl) const {
if (SPDiff) {
// Load the LR and FP stack slot for later adjusting.
EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
LROpOut = getReturnAddrFrameIndex(DAG);
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
false, false, false, 0);
Chain = SDValue(LROpOut.getNode(), 1);
// When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
// slot as the FP is never overwritten.
if (isDarwinABI) {
FPOpOut = getFramePointerFrameIndex(DAG);
FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
false, false, false, 0);
Chain = SDValue(FPOpOut.getNode(), 1);
}
}
return Chain;
}
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" of size "Size". Alignment information is
/// specified by the specific parameter attribute. The copy will be passed as
/// a byval function parameter.
/// Sometimes what we are copying is the end of a larger object, the part that
/// does not fit in registers.
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
SDLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
false, false, MachinePointerInfo(0),
MachinePointerInfo(0));
}
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
/// tail calls.
static void
LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
SDValue Arg, SDValue PtrOff, int SPDiff,
unsigned ArgOffset, bool isPPC64, bool isTailCall,
bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
SDLoc dl) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
if (!isTailCall) {
if (isVector) {
SDValue StackPtr;
if (isPPC64)
StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
else
StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
DAG.getConstant(ArgOffset, PtrVT));
}
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0));
// Calculate and remember argument location.
} else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
TailCallArguments);
}
static
void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
SDValue LROp, SDValue FPOp, bool isDarwinABI,
SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
MachineFunction &MF = DAG.getMachineFunction();
// Emit a sequence of copyto/copyfrom virtual registers for arguments that
// might overwrite each other in case of tail call optimization.
SmallVector<SDValue, 8> MemOpChains2;
// Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
MemOpChains2, dl);
if (!MemOpChains2.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
isPPC64, isDarwinABI, dl);
// Emit callseq_end just before tailcall node.
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
const PPCSubtarget &PPCSubTarget) {
bool isPPC64 = PPCSubTarget.isPPC64();
bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
NodeTys.push_back(MVT::Other); // Returns a chain
NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
unsigned CallOpc = PPCISD::CALL;
bool needIndirectCall = true;
if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
// If this is an absolute destination address, use the munged value.
Callee = SDValue(Dest, 0);
needIndirectCall = false;
}
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
// Use indirect calls for ALL functions calls in JIT mode, since the
// far-call stubs may be outside relocation limits for a BL instruction.
if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
unsigned OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
(PPCSubTarget.getTargetTriple().isMacOSX() &&
PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
(G->getGlobal()->isDeclaration() ||
G->getGlobal()->isWeakForLinker())) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
OpFlags = PPCII::MO_DARWIN_STUB;
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common,
// every direct call is) turn it into a TargetGlobalAddress /
// TargetExternalSymbol node so that legalize doesn't hack it.
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
Callee.getValueType(),
0, OpFlags);
needIndirectCall = false;
}
}
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
(PPCSubTarget.getTargetTriple().isMacOSX() &&
PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
OpFlags = PPCII::MO_DARWIN_STUB;
}
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
OpFlags);
needIndirectCall = false;
}
if (needIndirectCall) {
// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
if (isSVR4ABI && isPPC64) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
// entry point, but to the function descriptor (the function entry point
// address is part of the function descriptor though).
// The function descriptor is a three doubleword structure with the
// following fields: function entry point, TOC base address and
// environment pointer.
// Thus for a call through a function pointer, the following actions need
// to be performed:
// 1. Save the TOC of the caller in the TOC save area of its stack
// frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
// 2. Load the address of the function entry point from the function
// descriptor.
// 3. Load the TOC of the callee from the function descriptor into r2.
// 4. Load the environment pointer from the function descriptor into
// r11.
// 5. Branch to the function entry point address.
// 6. On return of the callee, the TOC of the caller needs to be
// restored (this is done in FinishCall()).
//
// All those operations are flagged together to ensure that no other
// operations can be scheduled in between. E.g. without flagging the
// operations together, a TOC access in the caller could be scheduled
// between the load of the callee TOC and the branch to the callee, which
// results in the TOC access going through the TOC of the callee instead
// of going through the TOC of the caller, which leads to incorrect code.
// Load the address of the function entry point from the function
// descriptor.
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
InFlag.getNode() ? 3 : 2);
Chain = LoadFuncPtr.getValue(1);
InFlag = LoadFuncPtr.getValue(2);
// Load environment pointer into r11.
// Offset of the environment pointer within the function descriptor.
SDValue PtrOff = DAG.getIntPtrConstant(16);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
InFlag);
Chain = LoadEnvPtr.getValue(1);
InFlag = LoadEnvPtr.getValue(2);
SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
InFlag);
Chain = EnvVal.getValue(0);
InFlag = EnvVal.getValue(1);
// Load TOC of the callee into r2. We are using a target-specific load
// with r2 hard coded, because the result of a target-independent load
// would never go directly into r2, since r2 is a reserved register (which
// prevents the register allocator from allocating it), resulting in an
// additional register being allocated and an unnecessary move instruction
// being generated.
VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
Callee, InFlag);
Chain = LoadTOCPtr.getValue(0);
InFlag = LoadTOCPtr.getValue(1);
MTCTROps[0] = Chain;
MTCTROps[1] = LoadFuncPtr;
MTCTROps[2] = InFlag;
}
Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
2 + (InFlag.getNode() != 0));
InFlag = Chain.getValue(1);
NodeTys.clear();
NodeTys.push_back(MVT::Other);
NodeTys.push_back(MVT::Glue);
Ops.push_back(Chain);
CallOpc = PPCISD::BCTRL;
Callee.setNode(0);
// Add use of X11 (holding environment pointer)
if (isSVR4ABI && isPPC64)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
}
// If this is a direct call, pass the chain and the callee.
if (Callee.getNode()) {
Ops.push_back(Chain);
Ops.push_back(Callee);
}
// If this is a tail call add stack pointer delta.
if (isTailCall)
Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
return CallOpc;
}
static
bool isLocalCall(const SDValue &Callee)
{
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
return !G->getGlobal()->isDeclaration() &&
!G->getGlobal()->isWeakForLinker();
return false;
}
SDValue
PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue Val = DAG.getCopyFromReg(Chain, dl,
VA.getLocReg(), VA.getLocVT(), InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::AExt:
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
break;
case CCValAssign::ZExt:
Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
DAG.getValueType(VA.getValVT()));
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
break;
case CCValAssign::SExt:
Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
DAG.getValueType(VA.getValVT()));
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
break;
}
InVals.push_back(Val);
}
return Chain;
}
SDValue
PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
bool isTailCall, bool isVarArg,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
SDValue InFlag, SDValue Chain,
SDValue &Callee,
int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals) const {
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
isTailCall, RegsToPass, Ops, NodeTys,
PPCSubTarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
// PPCFrameLowering::eliminateCallFramePseudoInstr.
int BytesCalleePops =
(CallConv == CallingConv::Fast &&
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
// Emit tail call.
if (isTailCall) {
assert(((Callee.getOpcode() == ISD::Register &&
cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress ||
isa<ConstantSDNode>(Callee)) &&
"Expecting an global address, external symbol, absolute value or register");
return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
}
// Add a NOP immediately after the branch instruction when using the 64-bit
// SVR4 ABI. At link time, if caller and callee are in a different module and
// thus have a different TOC, the call will be replaced with a call to a stub
// function which saves the current TOC, loads the TOC of the callee and
// branches to the callee. The NOP will be replaced with a load instruction
// which restores the TOC of the caller from the TOC save slot of the current
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
bool needsTOCRestore = false;
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
// See PrepareCall() for more information about calls through function
// pointers in the 64-bit SVR4 ABI.
// We are using a target-specific load with r2 hard coded, because the
// result of a target-independent load would never go directly into r2,
// since r2 is a reserved register (which prevents the register allocator
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
} else if ((CallOpc == PPCISD::CALL) &&
(!isLocalCall(Callee) ||
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
}
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
if (needsTOCRestore) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
InFlag = Chain.getValue(1);
}
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(BytesCalleePops, true),
InFlag, dl);
if (!Ins.empty())
InFlag = Chain.getValue(1);
return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
Ins, dl, DAG, InVals);
}
SDValue
PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &dl = CLI.DL;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
if (isTailCall)
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
if (PPCSubTarget.isSVR4ABI()) {
if (PPCSubTarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
else
return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
}
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
}
SDValue
PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
// of the 32-bit SVR4 ABI stack frame layout.
assert((CallConv == CallingConv::C ||
CallConv == CallingConv::Fast) && "Unknown calling convention!");
unsigned PtrByteSize = 4;
MachineFunction &MF = DAG.getMachineFunction();
// Mark this function as potentially containing a function that contains a
// tail call. As a consequence the frame pointer will be used for dynamicalloc
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
// Count how many bytes are to be pushed on the stack, including the linkage
// area, parameter list area and the part of the local variable space which
// contains copies of aggregates which are passed by value.
// Assign locations to all of the outgoing arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
if (isVarArg) {
// Handle fixed and variable vector arguments differently.
// Fixed vector arguments go into registers as long as registers are
// available. Variable vector arguments always go into memory.
unsigned NumArgs = Outs.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
bool Result;
if (Outs[i].IsFixed) {
Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
CCInfo);
} else {
Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo);
}
if (Result) {
#ifndef NDEBUG
errs() << "Call operand #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << "\n";
#endif
llvm_unreachable(0);
}
}
} else {
// All arguments are treated the same.
CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
}
// Assign locations to all of the outgoing aggregate by value arguments.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
// Size of the linkage area, parameter list area and the part of the local
// space variable where copies of aggregates which are passed by value are
// stored.
unsigned NumBytes = CCByValInfo.getNextStackOffset();
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be moved somewhere else
// later.
SDValue LROp, FPOp;
Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
dl);
// Set up a copy of the stack pointer for use loading and storing any
// arguments that may not fit in the registers available for argument
// passing.
SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
SmallVector<SDValue, 8> MemOpChains;
bool seenFloatArg = false;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, j = 0, e = ArgLocs.size();
i != e;
++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (Flags.isByVal()) {
// Argument is an aggregate which is passed by value, thus we need to
// create a copy of it in the local variable space of the current stack
// frame (which is the stack frame of the caller) and pass the address of
// this copy to the callee.
assert((j < ByValArgLocs.size()) && "Index out of bounds!");
CCValAssign &ByValVA = ByValArgLocs[j++];
assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
// Memory reserved in the local variable space of the callers stack frame.
unsigned LocMemOffset = ByValVA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
// Create a copy of the argument in the local area of the current
// stack frame.
SDValue MemcpyCall =
CreateCopyOfByValArgument(Arg, PtrOff,
CallSeqStart.getNode()->getOperand(0),
Flags, DAG, dl);
// This must go outside the CALLSEQ_START..END.
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
CallSeqStart.getNode()->getOperand(1),
SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
Chain = CallSeqStart = NewCallSeqStart;
// Pass the address of the aggregate copy on the stack either in a
// physical register or in the parameter list area of the current stack
// frame to the callee.
Arg = PtrOff;
}
if (VA.isRegLoc()) {
seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
// Put argument in the parameter list area of the current stack frame.
assert(VA.isMemLoc());
unsigned LocMemOffset = VA.getLocMemOffset();
if (!isTailCall) {
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(),
false, false, 0));
} else {
// Calculate and remember argument location.
CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
TailCallArguments);
}
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
// Set CR bit 6 to true if this is a vararg call with floating args passed in
// registers.
if (isVarArg) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, InFlag };
Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
InFlag = Chain.getValue(1);
}
if (isTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
// Copy an argument into memory, being careful to do this outside the
// call sequence for the call to which the argument belongs.
SDValue
PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
SDValue CallSeqStart,
ISD::ArgFlagsTy Flags,
SelectionDAG &DAG,
SDLoc dl) const {
SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
CallSeqStart.getNode()->getOperand(0),
Flags, DAG, dl);
// The MEMCPY must go outside the CALLSEQ_START..END.
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
CallSeqStart.getNode()->getOperand(1),
SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
return NewCallSeqStart;
}
SDValue
PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
unsigned NumOps = Outs.size();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned PtrByteSize = 8;
MachineFunction &MF = DAG.getMachineFunction();
// Mark this function as potentially containing a function that contains a
// tail call. As a consequence the frame pointer will be used for dynamicalloc
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
unsigned nAltivecParamsAtEnd = 0;
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with at least 48 bytes, which
// is reserved space for [SP][CR][LR][3 x unused].
// NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
// of this call.
unsigned NumBytes =
CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
Outs, OutVals, nAltivecParamsAtEnd);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
// To protect arguments on the stack from being clobbered in a tail call,
// force all the loads to happen before doing any other lowering.
if (isTailCall)
Chain = DAG.getStackArgumentTokenFactor(Chain);
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
// later.
SDValue LROp, FPOp;
Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
dl);
// Set up a copy of the stack pointer for use loading and storing any
// arguments that may not fit in the registers available for argument
// passing.
SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
// Figure out which arguments are going to go in registers, and which in
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
static const uint16_t GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
static const uint16_t *FPR = GetFPR();
static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
SmallVector<SDValue, 8> MemOpChains;
for (unsigned i = 0; i != NumOps; ++i) {
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDValue PtrOff;
PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
// Promote integers to 64-bit values.
if (Arg.getValueType() == MVT::i32) {
// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
}
// FIXME memcpy is used way more than necessary. Correctness first.
// Note: "by value" is code for passing a structure by value, not
// basic types.
if (Flags.isByVal()) {
// Note: Size includes alignment padding, so
// struct x { short a; char b; }
// will have Size = 4. With #pragma pack(1), it will have Size = 3.
// These are the proper values we need for right-justifying the
// aggregate in a parameter register.
unsigned Size = Flags.getByValSize();
// An empty aggregate parameter takes up no storage and no
// registers.
if (Size == 0)
continue;
unsigned BVAlign = Flags.getByValAlign();
if (BVAlign > 8) {
if (BVAlign % PtrByteSize != 0)
llvm_unreachable(
"ByVal alignment is not a multiple of the pointer size");
ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
}
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
MachinePointerInfo(), VT,
false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
continue;
}
}
if (GPR_idx == NumGPRs && Size < 8) {
SDValue Const = DAG.getConstant(PtrByteSize - Size,
PtrOff.getValueType());
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
ArgOffset += PtrByteSize;
continue;
}
// Copy entire object into memory. There are cases where gcc-generated
// code assumes it is there, even if it could be put entirely into
// registers. (This is not what the doc says.)
// FIXME: The above statement is likely due to a misunderstanding of the
// documents. All arguments must be copied into the parameter area BY
// THE CALLEE in the event that the callee takes the address of any
// formal argument. That has not yet been implemented. However, it is
// reasonable to use the stack area as a staging area for the register
// load.
// Skip this for small aggregates, as we will use the same slot for a
// right-justified copy, below.
if (Size >= 8)
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
CallSeqStart,
Flags, DAG, dl);
// When a register is available, pass a small aggregate right-justified.
if (Size < 8 && GPR_idx != NumGPRs) {
// The easiest way to get this right-justified in a register
// is to copy the structure into the rightmost portion of a
// local variable slot, then load the whole slot into the
// register.
// FIXME: The memcpy seems to produce pretty awful code for
// small aggregates, particularly for packed ones.
// FIXME: It would be preferable to use the slot in the
// parameter save area instead of a new local variable.
SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
// Load the slot into the register.
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
// Done with this argument.
ArgOffset += PtrByteSize;
continue;
}
// For aggregates larger than PtrByteSize, copy the pieces of the
// object that fit into registers from the parameter save area.
for (unsigned j=0; j<Size; j+=PtrByteSize) {
SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
} else {
ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
break;
}
}
continue;
}
switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
if (GPR_idx != NumGPRs) {
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
}
ArgOffset += PtrByteSize;
break;
case MVT::f32:
case MVT::f64:
if (FPR_idx != NumFPRs) {
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
if (isVarArg) {
// A single float or an aggregate containing only a single float
// must be passed right-justified in the stack doubleword, and
// in the GPR, if one is available.
SDValue StoreOff;
if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
} else
StoreOff = PtrOff;
SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
// Float varargs are always shadowed in available integer registers
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
MachinePointerInfo(), false, false,
false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
} else if (GPR_idx != NumGPRs)
// If we have any FPRs remaining, we may also have GPRs remaining.
++GPR_idx;
} else {
// Single-precision floating-point values are mapped to the
// second (rightmost) word of the stack doubleword.
if (Arg.getValueType() == MVT::f32) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
}
ArgOffset += 8;
break;
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
if (isVarArg) {
// These go aligned on the stack, or in the corresponding R registers
// when within range. The Darwin PPC ABI doc claims they also go in
// V registers; in fact gcc does this only for arguments that are
// prototyped, not for those that match the ... We do it for all
// arguments, seems to work.
while (ArgOffset % 16 !=0) {
ArgOffset += PtrByteSize;
if (GPR_idx != NumGPRs)
GPR_idx++;
}
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
DAG.getConstant(ArgOffset, PtrVT));
SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
if (VR_idx != NumVRs) {
SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
}
ArgOffset += 16;
for (unsigned i=0; i<16; i+=PtrByteSize) {
if (GPR_idx == NumGPRs)
break;
SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
DAG.getConstant(i, PtrVT));
SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
break;
}
// Non-varargs Altivec params generally go in registers, but have
// stack space allocated at the end.
if (VR_idx != NumVRs) {
// Doesn't have GPR space allocated.
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
ArgOffset += 16;
}
break;
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Check if this is an indirect call (MTCTR/BCTRL).
// See PrepareCall() for more information about calls through function
// pointers in the 64-bit SVR4 ABI.
if (!isTailCall &&
!dyn_cast<GlobalAddressSDNode>(Callee) &&
!dyn_cast<ExternalSymbolSDNode>(Callee) &&
!isBLACompatibleAddress(Callee, DAG)) {
// Load r2 into a virtual register and store it to the TOC save area.
SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
// TOC save area offset.
SDValue PtrOff = DAG.getIntPtrConstant(40);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
false, false, 0);
// R12 must contain the address of an indirect callee. This does not
// mean the MTCTR instruction must use R12; it's easier to model this
// as an extra parameter, so do that.
RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
if (isTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
SDValue
PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
unsigned NumOps = Outs.size();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
unsigned PtrByteSize = isPPC64 ? 8 : 4;
MachineFunction &MF = DAG.getMachineFunction();
// Mark this function as potentially containing a function that contains a
// tail call. As a consequence the frame pointer will be used for dynamicalloc
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
unsigned nAltivecParamsAtEnd = 0;
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
unsigned NumBytes =
CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
Outs, OutVals,
nAltivecParamsAtEnd);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
// To protect arguments on the stack from being clobbered in a tail call,
// force all the loads to happen before doing any other lowering.
if (isTailCall)
Chain = DAG.getStackArgumentTokenFactor(Chain);
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
// later.
SDValue LROp, FPOp;
Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
dl);
// Set up a copy of the stack pointer for use loading and storing any
// arguments that may not fit in the registers available for argument
// passing.
SDValue StackPtr;
if (isPPC64)
StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
else
StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
// Figure out which arguments are going to go in registers, and which in
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
static const uint16_t GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
static const uint16_t GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
static const uint16_t *FPR = GetFPR();
static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
const unsigned NumGPRs = array_lengthof(GPR_32);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
SmallVector<SDValue, 8> MemOpChains;
for (unsigned i = 0; i != NumOps; ++i) {
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDValue PtrOff;
PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
// On PPC64, promote integers to 64-bit values.
if (isPPC64 && Arg.getValueType() == MVT::i32) {
// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
}
// FIXME memcpy is used way more than necessary. Correctness first.
// Note: "by value" is code for passing a structure by value, not
// basic types.
if (Flags.isByVal()) {
unsigned Size = Flags.getByValSize();
// Very small objects are passed right-justified. Everything else is
// passed left-justified.
if (Size==1 || Size==2) {
EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
MachinePointerInfo(), VT,
false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
} else {
SDValue Const = DAG.getConstant(PtrByteSize - Size,
PtrOff.getValueType());
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
ArgOffset += PtrByteSize;
}
continue;
}
// Copy entire object into memory. There are cases where gcc-generated
// code assumes it is there, even if it could be put entirely into
// registers. (This is not what the doc says.)
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
CallSeqStart,
Flags, DAG, dl);
// For small aggregates (Darwin only) and aggregates >= PtrByteSize,
// copy the pieces of the object that fit into registers from the
// parameter save area.
for (unsigned j=0; j<Size; j+=PtrByteSize) {
SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
} else {
ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
break;
}
}
continue;
}
switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
if (GPR_idx != NumGPRs) {
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
isPPC64, isTailCall, false, MemOpChains,
TailCallArguments, dl);
}
ArgOffset += PtrByteSize;
break;
case MVT::f32:
case MVT::f64:
if (FPR_idx != NumFPRs) {
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
if (isVarArg) {
SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
// Float varargs are always shadowed in available integer registers
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
MachinePointerInfo(), false, false,
false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
} else {
// If we have any FPRs remaining, we may also have GPRs remaining.
// Args passed in FPRs consume either 1 (f32) or 2 (f64) available
// GPRs.
if (GPR_idx != NumGPRs)
++GPR_idx;
if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
!isPPC64) // PPC64 has 64-bit GPR's obviously :)
++GPR_idx;
}
} else
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
isPPC64, isTailCall, false, MemOpChains,
TailCallArguments, dl);
if (isPPC64)
ArgOffset += 8;
else
ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
break;
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
if (isVarArg) {
// These go aligned on the stack, or in the corresponding R registers
// when within range. The Darwin PPC ABI doc claims they also go in
// V registers; in fact gcc does this only for arguments that are
// prototyped, not for those that match the ... We do it for all
// arguments, seems to work.
while (ArgOffset % 16 !=0) {
ArgOffset += PtrByteSize;
if (GPR_idx != NumGPRs)
GPR_idx++;
}
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
DAG.getConstant(ArgOffset, PtrVT));
SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
if (VR_idx != NumVRs) {
SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
}
ArgOffset += 16;
for (unsigned i=0; i<16; i+=PtrByteSize) {
if (GPR_idx == NumGPRs)
break;
SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
DAG.getConstant(i, PtrVT));
SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
break;
}
// Non-varargs Altivec params generally go in registers, but have
// stack space allocated at the end.
if (VR_idx != NumVRs) {
// Doesn't have GPR space allocated.
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
} else if (nAltivecParamsAtEnd==0) {
// We are emitting Altivec params in order.
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
isPPC64, isTailCall, true, MemOpChains,
TailCallArguments, dl);
ArgOffset += 16;
}
break;
}
}
// If all Altivec parameters fit in registers, as they usually do,
// they get stack space following the non-Altivec parameters. We
// don't track this here because nobody below needs it.
// If there are more Altivec parameters than fit in registers emit
// the stores here.
if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
unsigned j = 0;
// Offset is aligned; skip 1st 12 params which go in V registers.
ArgOffset = ((ArgOffset+15)/16)*16;
ArgOffset += 12*16;
for (unsigned i = 0; i != NumOps; ++i) {
SDValue Arg = OutVals[i];
EVT ArgType = Outs[i].VT;
if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
if (++j > NumVRs) {
SDValue PtrOff;
// We are emitting Altivec params in order.
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
isPPC64, isTailCall, true, MemOpChains,
TailCallArguments, dl);
ArgOffset += 16;
}
}
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// On Darwin, R12 must contain the address of an indirect callee. This does
// not mean the MTCTR instruction must use R12; it's easier to model this as
// an extra parameter, so do that.
if (!isTailCall &&
!dyn_cast<GlobalAddressSDNode>(Callee) &&
!dyn_cast<ExternalSymbolSDNode>(Callee) &&
!isBLACompatibleAddress(Callee, DAG))
RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
PPC::R12), Callee));
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
if (isTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
bool
PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_PPC);
}
SDValue
PPCTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue Arg = OutVals[i];
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
break;
}
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
&RetOps[0], RetOps.size());
}
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
// When we pop the dynamic allocation we need to restore the SP link.
SDLoc dl(Op);
// Get the corect type for pointers.
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Construct the stack pointer operand.
bool isPPC64 = Subtarget.isPPC64();
unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
SDValue StackPtr = DAG.getRegister(SP, PtrVT);
// Get the operands for the STACKRESTORE.
SDValue Chain = Op.getOperand(0);
SDValue SaveSP = Op.getOperand(1);
// Load the old link SP.
SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
MachinePointerInfo(),
false, false, false, 0);
// Restore the stack pointer.
Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
// Store the old link SP.
return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
false, false, 0);
}
SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = PPCSubTarget.isPPC64();
bool isDarwinABI = PPCSubTarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
// primarily DYNALLOC instructions.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
int RASI = FI->getReturnAddrSaveIndex();
// If the frame pointer save index hasn't been defined yet.
if (!RASI) {
// Find out what the fix offset of the frame pointer save area.
int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
// Allocate the frame index for frame pointer save area.
RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
// Save the result.
FI->setReturnAddrSaveIndex(RASI);
}
return DAG.getFrameIndex(RASI, PtrVT);
}
SDValue
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = PPCSubTarget.isPPC64();
bool isDarwinABI = PPCSubTarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
// primarily DYNALLOC instructions.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
int FPSI = FI->getFramePointerSaveIndex();
// If the frame pointer save index hasn't been defined yet.
if (!FPSI) {
// Find out what the fix offset of the frame pointer save area.
int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
isDarwinABI);
// Allocate the frame index for frame pointer save area.
FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
}
return DAG.getFrameIndex(FPSI, PtrVT);
}
SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
SDLoc dl(Op);
// Get the corect type for pointers.
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Negate the size.
SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
DAG.getConstant(0, PtrVT), Size);
// Construct a node for the frame pointer save index.
SDValue FPSIdx = getFramePointerFrameIndex(DAG);
// Build a DYNALLOC node.
SDValue Ops[3] = { Chain, NegSize, FPSIdx };
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
}
SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
DAG.getVTList(MVT::i32, MVT::Other),
Op.getOperand(0), Op.getOperand(1));
}
SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
Op.getOperand(0), Op.getOperand(1));
}
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// Not FP? Not a fsel.
if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
!Op.getOperand(2).getValueType().isFloatingPoint())
return Op;
// We might be able to do better than this under some circumstances, but in
// general, fsel-based lowering of select is a finite-math-only optimization.
// For more information, see section F.3 of the 2.06 ISA specification.
if (!DAG.getTarget().Options.NoInfsFPMath ||
!DAG.getTarget().Options.NoNaNsFPMath)
return Op;
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
EVT ResVT = Op.getValueType();
EVT CmpVT = Op.getOperand(0).getValueType();
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
SDLoc dl(Op);
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
SDValue Sel1;
if (isFloatingPointZero(RHS))
switch (CC) {
default: break; // SETUO etc aren't handled by fsel.
case ISD::SETNE:
std::swap(TV, FV);
case ISD::SETEQ:
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
return DAG.getNode(PPCISD::FSEL, dl, ResVT,
DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
case ISD::SETOGE:
case ISD::SETGE:
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
case ISD::SETUGT:
case ISD::SETGT:
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
case ISD::SETOLE:
case ISD::SETLE:
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
return DAG.getNode(PPCISD::FSEL, dl, ResVT,
DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
}
SDValue Cmp;
switch (CC) {
default: break; // SETUO etc aren't handled by fsel.
case ISD::SETNE:
std::swap(TV, FV);
case ISD::SETEQ:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
return DAG.getNode(PPCISD::FSEL, dl, ResVT,
DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOGE:
case ISD::SETGE:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
case ISD::SETUGT:
case ISD::SETGT:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOLE:
case ISD::SETLE:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
}
return Op;
}
// FIXME: Split this code up when LegalizeDAGTypes lands.
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
SDLoc dl) const {
assert(Op.getOperand(0).getValueType().isFloatingPoint());
SDValue Src = Op.getOperand(0);
if (Src.getValueType() == MVT::f32)
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
SDValue Tmp;
switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
(PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
PPCISD::FCTIDZ),
dl, MVT::f64, Src);
break;
case MVT::i64:
assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
"i64 FP_TO_UINT is supported only with FPCVT");
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
PPCISD::FCTIDUZ,
dl, MVT::f64, Src);
break;
}
// Convert the FP value to an int value through memory.
bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
(Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
// Emit a store to the stack slot.
SDValue Chain;
if (i32Stack) {
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
MVT::i32, MMO);
} else
Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
MPI, false, false, 0);
// Result is a load from the stack slot. If loading 4 bytes, make sure to
// add in a bias.
if (Op.getValueType() == MVT::i32 && !i32Stack) {
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
MPI = MachinePointerInfo();
}
return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
false, false, false, 0);
}
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
// If we have FCFIDS, then use it when converting to single-precision.
// Otherwise, convert to double-precision and then round.
unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDUS : PPCISD::FCFIDS) :
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDU : PPCISD::FCFID);
MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
MVT::f32 : MVT::f64;
if (Op.getOperand(0).getValueType() == MVT::i64) {
SDValue SINT = Op.getOperand(0);
// When converting to single-precision, we actually need to convert
// to double-precision first and then round to single-precision.
// To avoid double-rounding effects during that operation, we have
// to prepare the input operand. Bits that might be truncated when
// converting to double-precision are replaced by a bit that won't
// be lost at this stage, but is below the single-precision rounding
// position.
//
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
if (Op.getValueType() == MVT::f32 &&
!PPCSubTarget.hasFPCVT() &&
!DAG.getTarget().Options.UnsafeFPMath) {
// Twiddle input to make sure the low 11 bits are zero. (If this
// is the case, we are guaranteed the value will fit into the 53 bit
// mantissa of an IEEE double-precision value without rounding.)
// If any of those low 11 bits were not zero originally, make sure
// bit 12 (value 2048) is set instead, so that the final rounding
// to single-precision gets the correct result.
SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
SINT, DAG.getConstant(2047, MVT::i64));
Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
Round, DAG.getConstant(2047, MVT::i64));
Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
Round = DAG.getNode(ISD::AND, dl, MVT::i64,
Round, DAG.getConstant(-2048, MVT::i64));
// However, we cannot use that value unconditionally: if the magnitude
// of the input value is small, the bit-twiddling we did above might
// end up visibly changing the output. Fortunately, in that case, we
// don't need to twiddle bits since the original input will convert
// exactly to double-precision floating-point already. Therefore,
// construct a conditional to use the original value if the top 11
// bits are all sign-bit copies, and use the rounded value computed
// above otherwise.
SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
SINT, DAG.getConstant(53, MVT::i32));
Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
Cond, DAG.getConstant(1, MVT::i64));
Cond = DAG.getSetCC(dl, MVT::i32,
Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
}
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl,
MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
assert(Op.getOperand(0).getValueType() == MVT::i32 &&
"Unhandled INT_TO_FP type in custom expander!");
// Since we only generate this in 64-bit mode, we can take advantage of
// 64-bit registers. In particular, sign extend the input value into the
// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
// then lfd it and fcfid it.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue Ld;
if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
MachinePointerInfo::getFixedStack(FrameIdx),
false, false, 0);
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
"Expected an i32 store");
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOLoad, 4, 4);
SDValue Ops[] = { Store, FIdx };
Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::LFIWZX : PPCISD::LFIWAX,
dl, DAG.getVTList(MVT::f64, MVT::Other),
Ops, 2, MVT::i32, MMO);
} else {
assert(PPCSubTarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
Op.getOperand(0));
// STD the extended value into the stack slot.
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
MachinePointerInfo::getFixedStack(FrameIdx),
false, false, 0);
// Load the value as a double.
Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
MachinePointerInfo::getFixedStack(FrameIdx),
false, false, false, 0);
}
// FCFID it and return it.
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
/*
The rounding mode is in bits 30:31 of FPSR, and has the following
settings:
00 Round to nearest
01 Round to 0
10 Round to +inf
11 Round to -inf
FLT_ROUNDS, on the other hand, expects the following:
-1 Undefined
0 Round to 0
1 Round to nearest
2 Round to +inf
3 Round to -inf
To perform the conversion, we do:
((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
*/
MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue MFFSreg, InFlag;
// Save FP Control Word to register
EVT NodeTys[] = {
MVT::f64, // return register
MVT::Glue // unused in this context
};
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
// Save FP register to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
StackSlot, MachinePointerInfo(), false, false,0);
// Load FP Control Word from low 32 bits of stack slot.
SDValue Four = DAG.getConstant(4, PtrVT);
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
false, false, false, 0);
// Transform as necessary
SDValue CWD1 =
DAG.getNode(ISD::AND, dl, MVT::i32,
CWD, DAG.getConstant(3, MVT::i32));
SDValue CWD2 =
DAG.getNode(ISD::SRL, dl, MVT::i32,
DAG.getNode(ISD::AND, dl, MVT::i32,
DAG.getNode(ISD::XOR, dl, MVT::i32,
CWD, DAG.getConstant(3, MVT::i32)),
DAG.getConstant(3, MVT::i32)),
DAG.getConstant(1, MVT::i32));
SDValue RetVal =
DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
return DAG.getNode((VT.getSizeInBits() < 16 ?
ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
}
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
SDLoc dl(Op);
assert(Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() &&
"Unexpected SHL!");
// Expand into a bunch of logical ops. Note that these ops
// depend on the PPC behavior for oversized shift amounts.
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Amt = Op.getOperand(2);
EVT AmtVT = Amt.getValueType();
SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
DAG.getConstant(BitWidth, AmtVT), Amt);
SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
DAG.getConstant(-BitWidth, AmtVT));
SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
SDValue OutOps[] = { OutLo, OutHi };
return DAG.getMergeValues(OutOps, 2, dl);
}
SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op);
unsigned BitWidth = VT.getSizeInBits();
assert(Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() &&
"Unexpected SRL!");
// Expand into a bunch of logical ops. Note that these ops
// depend on the PPC behavior for oversized shift amounts.
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Amt = Op.getOperand(2);
EVT AmtVT = Amt.getValueType();
SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
DAG.getConstant(BitWidth, AmtVT), Amt);
SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
DAG.getConstant(-BitWidth, AmtVT));
SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
SDValue OutOps[] = { OutLo, OutHi };
return DAG.getMergeValues(OutOps, 2, dl);
}
SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
assert(Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() &&
"Unexpected SRA!");
// Expand into a bunch of logical ops, followed by a select_cc.
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Amt = Op.getOperand(2);
EVT AmtVT = Amt.getValueType();
SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
DAG.getConstant(BitWidth, AmtVT), Amt);
SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
DAG.getConstant(-BitWidth, AmtVT));
SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
Tmp4, Tmp6, ISD::SETLE);
SDValue OutOps[] = { OutLo, OutHi };
return DAG.getMergeValues(OutOps, 2, dl);
}
//===----------------------------------------------------------------------===//
// Vector related lowering.
//
/// BuildSplatI - Build a canonical splati of Val with an element size of
/// SplatSize. Cast the result to VT.
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SelectionDAG &DAG, SDLoc dl) {
assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
static const EVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
// Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
if (Val == -1)
SplatSize = 1;
EVT CanonicalVT = VTys[SplatSize-1];
// Build a canonical splat for this value.
SDValue Elt = DAG.getConstant(Val, MVT::i32);
SmallVector<SDValue, 8> Ops;
Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
&Ops[0], Ops.size());
return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
}
/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
/// specified intrinsic ID.
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
SelectionDAG &DAG, SDLoc dl,
EVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = Op.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
DAG.getConstant(IID, MVT::i32), Op);
}
/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
/// specified intrinsic ID.
static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
SelectionDAG &DAG, SDLoc dl,
EVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = LHS.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
DAG.getConstant(IID, MVT::i32), LHS, RHS);
}
/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
/// specified intrinsic ID.
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
SDValue Op2, SelectionDAG &DAG,
SDLoc dl, EVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = Op0.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
}
/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
/// amount. The result has the specified value type.
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
EVT VT, SelectionDAG &DAG, SDLoc dl) {
// Force LHS/RHS to be the right type.
LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
int Ops[16];
for (unsigned i = 0; i != 16; ++i)
Ops[i] = i + Amt;
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
// this case more efficiently than a constant pool load, lower it to the
// sequence of ops that should be used.
SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, 0, true) || SplatBitSize > 32)
return SDValue();
unsigned SplatBits = APSplatBits.getZExtValue();
unsigned SplatUndef = APSplatUndef.getZExtValue();
unsigned SplatSize = SplatBitSize / 8;
// First, handle single instruction cases.
// All zeros?
if (SplatBits == 0) {
// Canonicalize all zero vectors to be v4i32.
if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
SDValue Z = DAG.getConstant(0, MVT::i32);
Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
}
return Op;
}
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
(32-SplatBitSize));
if (SextVal >= -16 && SextVal <= 15)
return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
// Two instruction sequences.
// If this value is in the range [-32,30] and is even, use:
// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
// If this value is in the range [17,31] and is odd, use:
// VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
// If this value is in the range [-31,-17] and is odd, use:
// VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
// Note the last two are three-instruction sequences.
if (SextVal >= -32 && SextVal <= 31) {
// To avoid having these optimizations undone by constant folding,
// we convert to a pseudo that will be expanded later into one of
// the above forms.
SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
EVT VT = Op.getValueType();
int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
SDValue EltSize = DAG.getConstant(Size, MVT::i32);
return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
}
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
// for fneg/fabs.
if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
// Make -1 and vspltisw -1:
SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
// Make the VSLW intrinsic, computing 0x8000_0000.
SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
OnesV, DAG, dl);
// xor by OnesV to invert it.
Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// Check to see if this is a wide variety of vsplti*, binop self cases.
static const signed char SplatCsts[] = {
-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
-8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
};
for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
// Indirect through the SplatCsts array so that we favor 'vsplti -1' for
// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
int i = SplatCsts[idx];
// Figure out what shift amount will be used by altivec if shifted by i in
// this splat size.
unsigned TypeShiftAmt = i & (SplatBitSize-1);
// vsplti + shl self.
if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
Intrinsic::ppc_altivec_vslw
};
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// vsplti + srl self.
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
Intrinsic::ppc_altivec_vsrw
};
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// vsplti + sra self.
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
Intrinsic::ppc_altivec_vsraw
};
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// vsplti + rol self.
if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
Intrinsic::ppc_altivec_vrlw
};
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// t = vsplti c, result = vsldoi t, t, 1
if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
}
}
return SDValue();
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
SDLoc dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
enum {
OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
OP_VMRGHW,
OP_VMRGLW,
OP_VSPLTISW0,
OP_VSPLTISW1,
OP_VSPLTISW2,
OP_VSPLTISW3,
OP_VSLDOI4,
OP_VSLDOI8,
OP_VSLDOI12
};
if (OpNum == OP_COPY) {
if (LHSID == (1*9+2)*9+3) return LHS;
assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
return RHS;
}
SDValue OpLHS, OpRHS;
OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
int ShufIdxs[16];
switch (OpNum) {
default: llvm_unreachable("Unknown i32 permute!");
case OP_VMRGHW:
ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
break;
case OP_VMRGLW:
ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
break;
case OP_VSPLTISW0:
for (unsigned i = 0; i != 16; ++i)
ShufIdxs[i] = (i&3)+0;
break;
case OP_VSPLTISW1:
for (unsigned i = 0; i != 16; ++i)
ShufIdxs[i] = (i&3)+4;
break;
case OP_VSPLTISW2:
for (unsigned i = 0; i != 16; ++i)
ShufIdxs[i] = (i&3)+8;
break;
case OP_VSPLTISW3:
for (unsigned i = 0; i != 16; ++i)
ShufIdxs[i] = (i&3)+12;
break;
case OP_VSLDOI4:
return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
case OP_VSLDOI8:
return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
case OP_VSLDOI12:
return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
}
EVT VT = OpLHS.getValueType();
OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
/// is a shuffle we can handle in a single instruction, return it. Otherwise,
/// return the code it can be lowered into. Worst case, it can always be
/// lowered into a vperm.
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
EVT VT = Op.getValueType();
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector.
if (V2.getOpcode() == ISD::UNDEF) {
if (PPC::isSplatShuffleMask(SVOp, 1) ||
PPC::isSplatShuffleMask(SVOp, 2) ||
PPC::isSplatShuffleMask(SVOp, 4) ||
PPC::isVPKUWUMShuffleMask(SVOp, true) ||
PPC::isVPKUHUMShuffleMask(SVOp, true) ||
PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
return Op;
}
}
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
PPC::isVPKUHUMShuffleMask(SVOp, false) ||
PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
PPC::isVMRGHShuffleMask(SVOp, 4, false))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
// perfect shuffle table to emit an optimal matching sequence.
ArrayRef<int> PermMask = SVOp->getMask();
unsigned PFIndexes[4];
bool isFourElementShuffle = true;
for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
unsigned EltNo = 8; // Start out undef.
for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
if (PermMask[i*4+j] < 0)
continue; // Undef, ignore it.
unsigned ByteSource = PermMask[i*4+j];
if ((ByteSource & 3) != j) {
isFourElementShuffle = false;
break;
}
if (EltNo == 8) {
EltNo = ByteSource/4;
} else if (EltNo != ByteSource/4) {
isFourElementShuffle = false;
break;
}
}
PFIndexes[i] = EltNo;
}
// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
// perfect shuffle vector to determine if it is cost effective to do this as
// discrete instructions, or whether we should use a vperm.
if (isFourElementShuffle) {
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
// Determining when to avoid vperm is tricky. Many things affect the cost
// of vperm, particularly how many times the perm mask needs to be computed.
// For example, if the perm mask can be hoisted out of a loop or is already
// used (perhaps because there are multiple permutes with the same shuffle
// mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
// the loop requires an extra register.
//
// As a compromise, we only emit discrete instructions if the shuffle can be
// generated in 3 or fewer operations. When we have loop information
// available, if this block is within a loop, we should avoid using vperm
// for 3-operation perms and use a constant pool load instead.
if (Cost < 3)
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
// vector that will get spilled to the constant pool.
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
EVT EltVT = V1.getValueType().getVectorElementType();
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
SmallVector<SDValue, 16> ResultMask;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
for (unsigned j = 0; j != BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
MVT::i32));
}
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
&ResultMask[0], ResultMask.size());
return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
}
/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
/// altivec comparison. If it is, return true and fill in Opc/isDot with
/// information about the intrinsic.
static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
bool &isDot) {
unsigned IntrinsicID =
cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
isDot = false;
switch (IntrinsicID) {
default: return false;
// Comparison predicates.
case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
// Normal Comparisons.
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
}
return true;
}
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
/// lower, do it, otherwise return null.
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
// If this is a lowered altivec predicate compare, CompareOpc is set to the
// opcode number of the comparison.
SDLoc dl(Op);
int CompareOpc;
bool isDot;
if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
return SDValue(); // Don't custom lower most intrinsics.
// If this is a non-dot comparison, make the VCMP node and we are done.
if (!isDot) {
SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
Op.getOperand(1), Op.getOperand(2),
DAG.getConstant(CompareOpc, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
}
// Create the PPCISD altivec 'dot' comparison node.
SDValue Ops[] = {
Op.getOperand(2), // LHS
Op.getOperand(3), // RHS
DAG.getConstant(CompareOpc, MVT::i32)
};
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
DAG.getRegister(PPC::CR6, MVT::i32),
CompNode.getValue(1));
// Unpack the result based on how the target uses it.
unsigned BitNo; // Bit # of CR6.
bool InvertBit; // Invert result?
switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
default: // Can't happen, don't crash on invalid number though.
case 0: // Return the value of the EQ bit of CR6.
BitNo = 0; InvertBit = false;
break;
case 1: // Return the inverted value of the EQ bit of CR6.
BitNo = 0; InvertBit = true;
break;
case 2: // Return the value of the LT bit of CR6.
BitNo = 2; InvertBit = false;
break;
case 3: // Return the inverted value of the LT bit of CR6.
BitNo = 2; InvertBit = true;
break;
}
// Shift the bit into the low position.
Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
DAG.getConstant(8-(3-BitNo), MVT::i32));
// Isolate the bit.
Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
DAG.getConstant(1, MVT::i32));
// If we are supposed to, toggle the bit.
if (InvertBit)
Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
DAG.getConstant(1, MVT::i32));
return Flags;
}
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
EVT PtrVT = getPointerTy();
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
// Store the input value into Value#0 of the stack slot.
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
Op.getOperand(0), FIdx, MachinePointerInfo(),
false, false, 0);
// Load it out.
return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
false, false, false, 0);
}
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
if (Op.getValueType() == MVT::v4i32) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
SDValue RHSSwap = // = vrlw RHS, 16
BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
// Shrinkify inputs to v8i16.
LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
// Low parts multiplied together, generating 32-bit results (we ignore the
// top parts).
SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
LHS, RHS, DAG, dl, MVT::v4i32);
SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
// Shift the high parts up 16 bits.
HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
Neg16, DAG, dl);
return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
} else if (Op.getValueType() == MVT::v8i16) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
LHS, RHS, Zero, DAG, dl);
} else if (Op.getValueType() == MVT::v16i8) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
// Multiply the even 8-bit parts, producing 16-bit sums.
SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
LHS, RHS, DAG, dl, MVT::v8i16);
EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
// Multiply the odd 8-bit parts, producing 16-bit sums.
SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
LHS, RHS, DAG, dl, MVT::v8i16);
OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
// Merge the results together.
int Ops[16];
for (unsigned i = 0; i != 8; ++i) {
Ops[i*2 ] = 2*i+1;
Ops[i*2+1] = 2*i+1+16;
}
return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
} else {
llvm_unreachable("Unknown mul to lower!");
}
}
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Wasn't expecting to be able to lower this!");
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG, PPCSubTarget);
case ISD::VAARG:
return LowerVAARG(Op, DAG, PPCSubTarget);
case ISD::VACOPY:
return LowerVACOPY(Op, DAG, PPCSubTarget);
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
SDLoc(Op));
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
// Lower 64-bit shifts.
case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
// Vector-related lowering.
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
// Frame & Return address.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
}
}
void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
const TargetMachine &TM = getTargetMachine();
SDLoc dl(N);
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
case ISD::INTRINSIC_W_CHAIN: {
if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
Intrinsic::ppc_is_decremented_ctr_nonzero)
break;
assert(N->getValueType(0) == MVT::i1 &&
"Unexpected result type for CTR decrement intrinsic");
EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
N->getOperand(1));
Results.push_back(NewInt);
Results.push_back(NewInt.getValue(1));
break;
}
case ISD::VAARG: {
if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
|| TM.getSubtarget<PPCSubtarget>().isPPC64())
return;
EVT VT = N->getValueType(0);
if (VT == MVT::i64) {
SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
Results.push_back(NewNode);
Results.push_back(NewNode.getValue(1));
}
return;
}
case ISD::FP_ROUND_INREG: {
assert(N->getValueType(0) == MVT::ppcf128);
assert(N->getOperand(0).getValueType() == MVT::ppcf128);
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
MVT::f64, N->getOperand(0),
DAG.getIntPtrConstant(0));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
MVT::f64, N->getOperand(0),
DAG.getIntPtrConstant(1));
// Add the two halves of the long double in round-to-zero mode.
SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
// We know the low half is about to be thrown away, so just use something
// convenient.
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
FPreg, FPreg));
return;
}
case ISD::FP_TO_SINT:
// LowerFP_TO_INT() can only handle f32 and f64.
if (N->getOperand(0).getValueType() == MVT::ppcf128)
return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
}
}
//===----------------------------------------------------------------------===//
// Other Lowering Code
//===----------------------------------------------------------------------===//
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
bool is64bit, unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = BB;
++It;
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
unsigned ptrB = MI->getOperand(2).getReg();
unsigned incr = MI->getOperand(3).getReg();
DebugLoc dl = MI->getDebugLoc();
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loopMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned TmpReg = (!BinOpcode) ? incr :
RegInfo.createVirtualRegister(
is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
(const TargetRegisterClass *) &PPC::GPRCRegClass);
// thisMBB:
// ...
// fallthrough --> loopMBB
BB->addSuccessor(loopMBB);
// loopMBB:
// l[wd]arx dest, ptr
// add r0, dest, incr
// st[wd]cx. r0, ptr
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
.addReg(ptrA).addReg(ptrB);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
return BB;
}
MachineBasicBlock *
PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *BB,
bool is8bit, // operation
unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = PPCSubTarget.isPPC64();
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = BB;
++It;
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
unsigned ptrB = MI->getOperand(2).getReg();
unsigned incr = MI->getOperand(3).getReg();
DebugLoc dl = MI->getDebugLoc();
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loopMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
const TargetRegisterClass *RC =
is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
(const TargetRegisterClass *) &PPC::GPRCRegClass;
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
unsigned MaskReg = RegInfo.createVirtualRegister(RC);
unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
unsigned Ptr1Reg;
unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
// thisMBB:
// ...
// fallthrough --> loopMBB
BB->addSuccessor(loopMBB);
// The 4-byte load must be aligned, while a char or short may be
// anywhere in the word. Hence all this nasty bookkeeping code.
// add ptr1, ptrA, ptrB [copy if ptrA==0]
// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
// xori shift, shift1, 24 [16]
// rlwinm ptr, ptr1, 0, 0, 29
// slw incr2, incr, shift
// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
// slw mask, mask2, shift
// loopMBB:
// lwarx tmpDest, ptr
// add tmp, tmpDest, incr2
// andc tmp2, tmpDest, mask
// and tmp3, tmp, mask
// or tmp4, tmp3, tmp2
// stwcx. tmp4, ptr
// bne- loopMBB
// fallthrough --> exitMBB
// srw dest, tmpDest, shift
if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
.addReg(ptrA).addReg(ptrB);
} else {
Ptr1Reg = ptrB;
}
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
.addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
.addReg(Ptr1Reg).addImm(0).addImm(61);
else
BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
.addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
.addReg(incr).addReg(ShiftReg);
if (is8bit)
BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
else {
BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
}
BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
.addReg(Mask2Reg).addReg(ShiftReg);
BB = loopMBB;
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
.addReg(ZeroReg).addReg(PtrReg);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
.addReg(Incr2Reg).addReg(TmpDestReg);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
.addReg(TmpDestReg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
.addReg(TmpReg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
.addReg(Tmp3Reg).addReg(Tmp2Reg);
BuildMI(BB, dl, TII->get(PPC::STWCX))
.addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
.addReg(ShiftReg);
return BB;
}
llvm::MachineBasicBlock*
PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = MBB;
++I;
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
unsigned DstReg = MI->getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(RC->hasType(MVT::i32) && "Invalid destination!");
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned restoreDstReg = MRI.createVirtualRegister(RC);
MVT PVT = getPointerTy();
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
// For v = setjmp(buf), we generate
//
// thisMBB:
// SjLjSetup mainMBB
// bl mainMBB
// v_restore = 1
// b sinkMBB
//
// mainMBB:
// buf[LabelOffset] = LR
// v_main = 0
//
// sinkMBB:
// v = phi(main, restore)
//
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
MachineInstrBuilder MIB;
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// Note that the structure of the jmp_buf used here is not compatible
// with that used by libc, and is not designed to be. Specifically, it
// stores only those 'reserved' registers that LLVM does not otherwise
// understand how to spill. Also, by convention, by the time this
// intrinsic is called, Clang has already stored the frame address in the
// first slot of the buffer and stack address in the third. Following the
// X86 target code, we'll store the jump address in the second slot. We also
// need to save the TOC pointer (R2) to handle jumps between shared
// libraries, and that will be stored in the fourth slot. The thread
// identifier (R13) is not affected.
// thisMBB:
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
const int64_t BPOffset = 4 * PVT.getStoreSize();
// Prepare IP either in reg.
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
unsigned BufReg = MI->getOperand(1).getReg();
if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
MIB.setMemRefs(MMOBegin, MMOEnd);
}
// Naked functions never have a base pointer, and so we use r1. For all
// other functions, this decision must be delayed until during PEI.
unsigned BaseReg;
if (MF->getFunction()->getAttributes().hasAttribute(
AttributeSet::FunctionIndex, Attribute::Naked))
BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
else
BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
MIB = BuildMI(*thisMBB, MI, DL,
TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
.addReg(BaseReg)
.addImm(BPOffset)
.addReg(BufReg);
MIB.setMemRefs(MMOBegin, MMOEnd);
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
const PPCRegisterInfo *TRI =
static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
.addMBB(mainMBB);
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
thisMBB->addSuccessor(mainMBB, /* weight */ 0);
thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
// mainMBB:
// mainDstReg = 0
MIB = BuildMI(mainMBB, DL,
TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
// Store IP
if (PPCSubTarget.isPPC64()) {
MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
.addReg(LabelReg)
.addImm(LabelOffset)
.addReg(BufReg);
} else {
MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
.addReg(LabelReg)
.addImm(LabelOffset)
.addReg(BufReg);
}
MIB.setMemRefs(MMOBegin, MMOEnd);
BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(PPC::PHI), DstReg)
.addReg(mainDstReg).addMBB(mainMBB)
.addReg(restoreDstReg).addMBB(thisMBB);
MI->eraseFromParent();
return sinkMBB;
}
MachineBasicBlock *
PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
MVT PVT = getPointerTy();
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
const TargetRegisterClass *RC =
(PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
MachineInstrBuilder MIB;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t SPOffset = 2 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
const int64_t BPOffset = 4 * PVT.getStoreSize();
unsigned BufReg = MI->getOperand(0).getReg();
// Reload FP (the jumped-to function may not have had a
// frame pointer, and if so, then its r31 will be restored
// as necessary).
if (PVT == MVT::i64) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
.addImm(0)
.addReg(BufReg);
} else {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
.addImm(0)
.addReg(BufReg);
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload IP
if (PVT == MVT::i64) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
.addImm(LabelOffset)
.addReg(BufReg);
} else {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
.addImm(LabelOffset)
.addReg(BufReg);
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload SP
if (PVT == MVT::i64) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
.addImm(SPOffset)
.addReg(BufReg);
} else {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
.addImm(SPOffset)
.addReg(BufReg);
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload BP
if (PVT == MVT::i64) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
.addImm(BPOffset)
.addReg(BufReg);
} else {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
.addImm(BPOffset)
.addReg(BufReg);
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload TOC
if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
MIB.setMemRefs(MMOBegin, MMOEnd);
}
// Jump
BuildMI(*MBB, MI, DL,
TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
MI->eraseFromParent();
return MBB;
}
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
return emitEHSjLjSetJmp(MI, BB);
} else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
return emitEHSjLjLongJmp(MI, BB);
}
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// To "insert" these instructions we actually have to insert their
// control-flow patterns.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
MachineFunction *F = BB->getParent();
if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8)) {
SmallVector<MachineOperand, 2> Cond;
Cond.push_back(MI->getOperand(4));
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
Cond, MI->getOperand(2).getReg(),
MI->getOperand(3).getReg());
} else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
MI->getOpcode() == PPC::SELECT_CC_F8 ||
MI->getOpcode() == PPC::SELECT_CC_VRRC) {
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
// thisMBB:
// ...
// TrueVal = ...
// cmpTY ccX, r1, r2
// bCC copy1MBB
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
unsigned SelectPred = MI->getOperand(4).getImm();
DebugLoc dl = MI->getDebugLoc();
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
BB = copy0MBB;
// Update machine-CFG edges
BB->addSuccessor(sinkMBB);
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
BuildMI(*BB, BB->begin(), dl,
TII->get(PPC::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
}
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
BB = EmitAtomicBinary(MI, BB, false, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
BB = EmitAtomicBinary(MI, BB, true, 0);
else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
unsigned ptrB = MI->getOperand(2).getReg();
unsigned oldval = MI->getOperand(3).getReg();
unsigned newval = MI->getOperand(4).getReg();
DebugLoc dl = MI->getDebugLoc();
MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loop1MBB);
F->insert(It, loop2MBB);
F->insert(It, midMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// ...
// fallthrough --> loopMBB
BB->addSuccessor(loop1MBB);
// loop1MBB:
// l[wd]arx dest, ptr
// cmp[wd] dest, oldval
// bne- midMBB
// loop2MBB:
// st[wd]cx. newval, ptr
// bne- loopMBB
// b exitBB
// midMBB:
// st[wd]cx. dest, ptr
// exitBB:
BB = loop1MBB;
BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
.addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
.addReg(oldval).addReg(dest);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(midMBB);
BB = loop2MBB;
BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
.addReg(newval).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
BB = midMBB;
BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
.addReg(dest).addReg(ptrA).addReg(ptrB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
} else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
// We must use 64-bit registers for addresses when targeting 64-bit,
// since we're actually doing arithmetic on them. Other registers
// can be 32-bit.
bool is64bit = PPCSubTarget.isPPC64();
bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
unsigned ptrB = MI->getOperand(2).getReg();
unsigned oldval = MI->getOperand(3).getReg();
unsigned newval = MI->getOperand(4).getReg();
DebugLoc dl = MI->getDebugLoc();
MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loop1MBB);
F->insert(It, loop2MBB);
F->insert(It, midMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
const TargetRegisterClass *RC =
is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
(const TargetRegisterClass *) &PPC::GPRCRegClass;
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
unsigned MaskReg = RegInfo.createVirtualRegister(RC);
unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
unsigned Ptr1Reg;
unsigned TmpReg = RegInfo.createVirtualRegister(RC);
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
// fallthrough --> loopMBB
BB->addSuccessor(loop1MBB);
// The 4-byte load must be aligned, while a char or short may be
// anywhere in the word. Hence all this nasty bookkeeping code.
// add ptr1, ptrA, ptrB [copy if ptrA==0]
// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
// xori shift, shift1, 24 [16]
// rlwinm ptr, ptr1, 0, 0, 29
// slw newval2, newval, shift
// slw oldval2, oldval,shift
// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
// slw mask, mask2, shift
// and newval3, newval2, mask
// and oldval3, oldval2, mask
// loop1MBB:
// lwarx tmpDest, ptr
// and tmp, tmpDest, mask
// cmpw tmp, oldval3
// bne- midMBB
// loop2MBB:
// andc tmp2, tmpDest, mask
// or tmp4, tmp2, newval3
// stwcx. tmp4, ptr
// bne- loop1MBB
// b exitBB
// midMBB:
// stwcx. tmpDest, ptr
// exitBB:
// srw dest, tmpDest, shift
if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
.addReg(ptrA).addReg(ptrB);
} else {
Ptr1Reg = ptrB;
}
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
.addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
.addReg(Ptr1Reg).addImm(0).addImm(61);
else
BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
.addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
.addReg(newval).addReg(ShiftReg);
BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
.addReg(oldval).addReg(ShiftReg);
if (is8bit)
BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
else {
BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
.addReg(Mask3Reg).addImm(65535);
}
BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
.addReg(Mask2Reg).addReg(ShiftReg);
BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
.addReg(NewVal2Reg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
.addReg(OldVal2Reg).addReg(MaskReg);
BB = loop1MBB;
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
.addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
.addReg(TmpDestReg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
.addReg(TmpReg).addReg(OldVal3Reg);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(midMBB);
BB = loop2MBB;
BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
.addReg(TmpDestReg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
.addReg(Tmp2Reg).addReg(NewVal3Reg);
BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
.addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
BB = midMBB;
BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
.addReg(ZeroReg).addReg(PtrReg);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
.addReg(ShiftReg);
} else if (MI->getOpcode() == PPC::FADDrtz) {
// This pseudo performs an FADD with rounding mode temporarily forced
// to round-to-zero. We emit this via custom inserter since the FPSCR
// is not modeled at the SelectionDAG level.
unsigned Dest = MI->getOperand(0).getReg();
unsigned Src1 = MI->getOperand(1).getReg();
unsigned Src2 = MI->getOperand(2).getReg();
DebugLoc dl = MI->getDebugLoc();
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
// Save FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
// Set rounding mode to round-to-zero.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
// Perform addition.
BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
//===----------------------------------------------------------------------===//
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
DAGCombinerInfo &DCI) const {
if (DCI.isAfterLegalizeVectorOps())
return SDValue();
EVT VT = Op.getValueType();
if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
(VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
// F(X) = A X - 1 [which has a zero at X = 1/A]
// =>
// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
// does not require additional intermediate precision]
// Convergence is quadratic, so we essentially double the number of digits
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(Op);
SDValue FPOne =
DAG.getConstantFP(1.0, VT.getScalarType());
if (VT.isVector()) {
assert(VT.getVectorNumElements() == 4 &&
"Unknown vector type");
FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
FPOne, FPOne, FPOne, FPOne);
}
SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
DCI.AddToWorklist(Est.getNode());
// Newton iterations: Est = Est + Est (1 - Arg * Est)
for (int i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
DCI.AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
DCI.AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
DCI.AddToWorklist(NewEst.getNode());
Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
DCI.AddToWorklist(Est.getNode());
}
return Est;
}
return SDValue();
}
SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
DAGCombinerInfo &DCI) const {
if (DCI.isAfterLegalizeVectorOps())
return SDValue();
EVT VT = Op.getValueType();
if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
// =>
// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
// As a result, we precompute A/2 prior to the iteration loop.
// Convergence is quadratic, so we essentially double the number of digits
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(Op);
SDValue FPThreeHalves =
DAG.getConstantFP(1.5, VT.getScalarType());
if (VT.isVector()) {
assert(VT.getVectorNumElements() == 4 &&
"Unknown vector type");
FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
FPThreeHalves, FPThreeHalves,
FPThreeHalves, FPThreeHalves);
}
SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
DCI.AddToWorklist(Est.getNode());
// We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
// this entire sequence requires only one FP constant.
SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
DCI.AddToWorklist(HalfArg.getNode());
HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
DCI.AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (int i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
DCI.AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
DCI.AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
DCI.AddToWorklist(NewEst.getNode());
Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
DCI.AddToWorklist(Est.getNode());
}
return Est;
}
return SDValue();
}
// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
// not enforce equality of the chain operands.
static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
unsigned Bytes, int Dist,
SelectionDAG &DAG) {
EVT VT = LS->getMemoryVT();
if (VT.getSizeInBits() / 8 != Bytes)
return false;
SDValue Loc = LS->getBasePtr();
SDValue BaseLoc = Base->getBasePtr();
if (Loc.getOpcode() == ISD::FrameIndex) {
if (BaseLoc.getOpcode() != ISD::FrameIndex)
return false;
const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
int FS = MFI->getObjectSize(FI);
int BFS = MFI->getObjectSize(BFI);
if (FS != BFS || FS != (int)Bytes) return false;
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
// Handle X+C
if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
return true;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const GlobalValue *GV1 = NULL;
const GlobalValue *GV2 = NULL;
int64_t Offset1 = 0;
int64_t Offset2 = 0;
bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
if (isGA1 && isGA2 && GV1 == GV2)
return Offset1 == (Offset2 + Dist*Bytes);
return false;
}
// Return true is there is a nearyby consecutive load to the one provided
// (regardless of alignment). We search up and down the chain, looking though
// token factors and other loads (but nothing else). As a result, a true
// results indicates that it is safe to create a new consecutive load adjacent
// to the load provided.
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
SDValue Chain = LD->getChain();
EVT VT = LD->getMemoryVT();
SmallSet<SDNode *, 16> LoadRoots;
SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
SmallSet<SDNode *, 16> Visited;
// First, search up the chain, branching to follow all token-factor operands.
// If we find a consecutive load, then we're done, otherwise, record all
// nodes just above the top-level loads and token factors.
while (!Queue.empty()) {
SDNode *ChainNext = Queue.pop_back_val();
if (!Visited.insert(ChainNext))
continue;
if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
return true;
if (!Visited.count(ChainLD->getChain().getNode()))
Queue.push_back(ChainLD->getChain().getNode());
} else if (ChainNext->getOpcode() == ISD::TokenFactor) {
for (SDNode::op_iterator O = ChainNext->op_begin(),
OE = ChainNext->op_end(); O != OE; ++O)
if (!Visited.count(O->getNode()))
Queue.push_back(O->getNode());
} else
LoadRoots.insert(ChainNext);
}
// Second, search down the chain, starting from the top-level nodes recorded
// in the first phase. These top-level nodes are the nodes just above all
// loads and token factors. Starting with their uses, recursively look though
// all loads (just the chain uses) and token factors to find a consecutive
// load.
Visited.clear();
Queue.clear();
for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
IE = LoadRoots.end(); I != IE; ++I) {
Queue.push_back(*I);
while (!Queue.empty()) {
SDNode *LoadRoot = Queue.pop_back_val();
if (!Visited.insert(LoadRoot))
continue;
if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
return true;
for (SDNode::use_iterator UI = LoadRoot->use_begin(),
UE = LoadRoot->use_end(); UI != UE; ++UI)
if (((isa<LoadSDNode>(*UI) &&
cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
Queue.push_back(*UI);
}
}
return false;
}
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
switch (N->getOpcode()) {
default: break;
case PPCISD::SHL:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
if (C->isNullValue()) // 0 << V -> 0.
return N->getOperand(0);
}
break;
case PPCISD::SRL:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
if (C->isNullValue()) // 0 >>u V -> 0.
return N->getOperand(0);
}
break;
case PPCISD::SRA:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
if (C->isNullValue() || // 0 >>s V -> 0.
C->isAllOnesValue()) // -1 >>s V -> -1.
return N->getOperand(0);
}
break;
case ISD::FDIV: {
assert(TM.Options.UnsafeFPMath &&
"Reciprocal estimates require UnsafeFPMath");
if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
}
} else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
DCI);
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV);
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
}
} else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
DCI);
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV,
N->getOperand(1).getOperand(1));
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
}
}
SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
}
}
break;
case ISD::FSQRT: {
assert(TM.Options.UnsafeFPMath &&
"Reciprocal estimates require UnsafeFPMath");
// Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
// reciprocal sqrt.
SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
if (RV.getNode() != 0) {
// Unfortunately, RV is now NaN if the input was exactly 0. Select out
// this case and force the answer to 0.
EVT VT = RV.getValueType();
SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
if (VT.isVector()) {
assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
}
SDValue ZeroCmp =
DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
N->getOperand(0), Zero, ISD::SETEQ);
DCI.AddToWorklist(ZeroCmp.getNode());
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
ZeroCmp, Zero, RV);
return RV;
}
}
}
break;
case ISD::SINT_TO_FP:
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
// We allow the src/dst to be either f32/f64, but the intermediate
// type must be i64.
if (N->getOperand(0).getValueType() == MVT::i64 &&
N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
SDValue Val = N->getOperand(0).getOperand(0);
if (Val.getValueType() == MVT::f32) {
Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
}
Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
if (N->getValueType(0) == MVT::f32) {
Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
DAG.getIntPtrConstant(0));
DCI.AddToWorklist(Val.getNode());
}
return Val;
} else if (N->getOperand(0).getValueType() == MVT::i32) {
// If the intermediate type is i32, we can avoid the load/store here
// too.
}
}
}
break;
case ISD::STORE:
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
!cast<StoreSDNode>(N)->isTruncatingStore() &&
N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
N->getOperand(1).getValueType() == MVT::i32 &&
N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
SDValue Val = N->getOperand(1).getOperand(0);
if (Val.getValueType() == MVT::f32) {
Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
}
Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
SDValue Ops[] = {
N->getOperand(0), Val, N->getOperand(2),
DAG.getValueType(N->getOperand(1).getValueType())
};
Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
DCI.AddToWorklist(Val.getNode());
return Val;
}
// Turn STORE (BSWAP) -> sthbrx/stwbrx.
if (cast<StoreSDNode>(N)->isUnindexed() &&
N->getOperand(1).getOpcode() == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i32 ||
N->getOperand(1).getValueType() == MVT::i16 ||
(TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
TM.getSubtarget<PPCSubtarget>().isPPC64() &&
N->getOperand(1).getValueType() == MVT::i64))) {
SDValue BSwapOp = N->getOperand(1).getOperand(0);
// Do an any-extend to 32-bits if this is a half-word input.
if (BSwapOp.getValueType() == MVT::i16)
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
SDValue Ops[] = {
N->getOperand(0), BSwapOp, N->getOperand(2),
DAG.getValueType(N->getOperand(1).getValueType())
};
return
DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
Ops, array_lengthof(Ops),
cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
}
break;
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT VT = LD->getValueType(0);
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
(VT == MVT::v16i8 || VT == MVT::v8i16 ||
VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
// This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
// This implements the loading of unaligned vectors as described in
// the venerable Apple Velocity Engine overview. Specifically:
// https://developer.apple.com/hardwaredrivers/ve/alignment.html
// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
//
// The general idea is to expand a sequence of one or more unaligned
// loads into a alignment-based permutation-control instruction (lvsl),
// a series of regular vector loads (which always truncate their
// input address to an aligned address), and a series of permutations.
// The results of these permutations are the requested loaded values.
// The trick is that the last "extra" load is not taken from the address
// you might suspect (sizeof(vector) bytes after the last requested
// load), but rather sizeof(vector) - 1 bytes after the last
// requested vector. The point of this is to avoid a page fault if the
// base address happend to be aligned. This works because if the base
// address is aligned, then adding less than a full vector length will
// cause the last vector in the sequence to be (re)loaded. Otherwise,
// the next vector will be fetched as you might suspect was necessary.
// We might be able to reuse the permutation generation from
// a different base address offset from this one by an aligned amount.
// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
// optimization later.
SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,
DAG, dl, MVT::v16i8);
// Refine the alignment of the original load (a "new" load created here
// which was identical to the first except for the alignment would be
// merged with the existing node regardless).
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(LD->getPointerInfo(),
LD->getMemOperand()->getFlags(),
LD->getMemoryVT().getStoreSize(),
ABIAlignment);
LD->refineAlignment(MMO);
SDValue BaseLoad = SDValue(LD, 0);
// Note that the value of IncOffset (which is provided to the next
// load's pointer info offset value, and thus used to calculate the
// alignment), and the value of IncValue (which is actually used to
// increment the pointer value) are different! This is because we
// require the next load to appear to be aligned, even though it
// is actually offset from the base pointer by a lesser amount.
int IncOffset = VT.getSizeInBits() / 8;
int IncValue = IncOffset;
// Walk (both up and down) the chain looking for another load at the real
// (aligned) offset (the alignment of the other load does not matter in
// this case). If found, then do not use the offset reduction trick, as
// that will prevent the loads from being later combined (as they would
// otherwise be duplicates).
if (!findConsecutiveLoad(LD, DAG))
--IncValue;
SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
SDValue ExtraLoad =
DAG.getLoad(VT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncOffset),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), ABIAlignment);
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
BaseLoad.getValue(1), ExtraLoad.getValue(1));
if (BaseLoad.getValueType() != MVT::v4i32)
BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
if (ExtraLoad.getValueType() != MVT::v4i32)
ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
BaseLoad, ExtraLoad, PermCntl, DAG, dl);
if (VT != MVT::v4i32)
Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
// Now we need to be really careful about how we update the users of the
// original load. We cannot just call DCI.CombineTo (or
// DAG.ReplaceAllUsesWith for that matter), because the load still has
// uses created here (the permutation for example) that need to stay.
SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
while (UI != UE) {
SDUse &Use = UI.getUse();
SDNode *User = *UI;
// Note: BaseLoad is checked here because it might not be N, but a
// bitcast of N.
if (User == Perm.getNode() || User == BaseLoad.getNode() ||
User == TF.getNode() || Use.getResNo() > 1) {
++UI;
continue;
}
SDValue To = Use.getResNo() ? TF : Perm;
++UI;
SmallVector<SDValue, 8> Ops;
for (SDNode::op_iterator O = User->op_begin(),
OE = User->op_end(); O != OE; ++O) {
if (*O == Use)
Ops.push_back(To);
else
Ops.push_back(*O);
}
DAG.UpdateNodeOperands(User, Ops.data(), Ops.size());
}
return SDValue(N, 0);
}
}
break;
case ISD::INTRINSIC_WO_CHAIN:
if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
Intrinsic::ppc_altivec_lvsl &&
N->getOperand(1)->getOpcode() == ISD::ADD) {
SDValue Add = N->getOperand(1);
if (DAG.MaskedValueIsZero(Add->getOperand(1),
APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
Add.getValueType().getScalarType().getSizeInBits()))) {
SDNode *BasePtr = Add->getOperand(0).getNode();
for (SDNode::use_iterator UI = BasePtr->use_begin(),
UE = BasePtr->use_end(); UI != UE; ++UI) {
if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
Intrinsic::ppc_altivec_lvsl) {
// We've found another LVSL, and this address if an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
return SDValue(*UI, 0);
}
}
}
}
break;
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
(TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
TM.getSubtarget<PPCSubtarget>().isPPC64() &&
N->getValueType(0) == MVT::i64))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
// Create the byte-swapping load.
SDValue Ops[] = {
LD->getChain(), // Chain
LD->getBasePtr(), // Ptr
DAG.getValueType(N->getValueType(0)) // VT
};
SDValue BSLoad =
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
DAG.getVTList(N->getValueType(0) == MVT::i64 ?
MVT::i64 : MVT::i32, MVT::Other),
Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
SDValue ResVal = BSLoad;
if (N->getValueType(0) == MVT::i16)
ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
// First, combine the bswap away. This makes the value produced by the
// load dead.
DCI.CombineTo(N, ResVal);
// Next, combine the load away, we give it a bogus result value but a real
// chain result. The result value is dead because the bswap is dead.
DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
break;
case PPCISD::VCMP: {
// If a VCMPo node already exists with exactly the same operands as this
// node, use its result instead of this node (VCMPo computes both a CR6 and
// a normal output).
//
if (!N->getOperand(0).hasOneUse() &&
!N->getOperand(1).hasOneUse() &&
!N->getOperand(2).hasOneUse()) {
// Scan all of the users of the LHS, looking for VCMPo's that match.
SDNode *VCMPoNode = 0;
SDNode *LHSN = N->getOperand(0).getNode();
for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
UI != E; ++UI)
if (UI->getOpcode() == PPCISD::VCMPo &&
UI->getOperand(1) == N->getOperand(1) &&
UI->getOperand(2) == N->getOperand(2) &&
UI->getOperand(0) == N->getOperand(0)) {
VCMPoNode = *UI;
break;
}
// If there is no VCMPo node, or if the flag value has a single use, don't
// transform this.
if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
break;
// Look at the (necessarily single) use of the flag value. If it has a
// chain, this transformation is more complex. Note that multiple things
// could use the value result, which we should ignore.
SDNode *FlagUser = 0;
for (SDNode::use_iterator UI = VCMPoNode->use_begin();
FlagUser == 0; ++UI) {
assert(UI != VCMPoNode->use_end() && "Didn't find user!");
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
FlagUser = User;
break;
}
}
}
// If the user is a MFOCRF instruction, we know this is safe.
// Otherwise we give up for right now.
if (FlagUser->getOpcode() == PPCISD::MFOCRF)
return SDValue(VCMPoNode, 0);
}
break;
}
case ISD::BR_CC: {
// If this is a branch on an altivec predicate comparison, lower this so
// that we don't have to do a MFOCRF: instead, branch directly on CR6. This
// lowering is done pre-legalize, because the legalizer lowers the predicate
// compare down to code that is difficult to reassemble.
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
// Sometimes the promoted value of the intrinsic is ANDed by some non-zero
// value. If so, pass-through the AND to get to the intrinsic.
if (LHS.getOpcode() == ISD::AND &&
LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
Intrinsic::ppc_is_decremented_ctr_nonzero &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
!cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
isZero())
LHS = LHS.getOperand(0);
if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
Intrinsic::ppc_is_decremented_ctr_nonzero &&
isa<ConstantSDNode>(RHS)) {
assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
"Counter decrement comparison is not EQ or NE");
unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
bool isBDNZ = (CC == ISD::SETEQ && Val) ||
(CC == ISD::SETNE && !Val);
// We now need to make the intrinsic dead (it cannot be instruction
// selected).
DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
assert(LHS.getNode()->hasOneUse() &&
"Counter decrement has more than one use");
return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
N->getOperand(0), N->getOperand(4));
}
int CompareOpc;
bool isDot;
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
assert(isDot && "Can't compare against a vector result!");
// If this is a comparison against something other than 0/1, then we know
// that the condition is never/always true.
unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
if (Val != 0 && Val != 1) {
if (CC == ISD::SETEQ) // Cond never true, remove branch.
return N->getOperand(0);
// Always !=, turn it into an unconditional branch.
return DAG.getNode(ISD::BR, dl, MVT::Other,
N->getOperand(0), N->getOperand(4));
}
bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
// Create the PPCISD altivec 'dot' comparison node.
SDValue Ops[] = {
LHS.getOperand(2), // LHS of compare
LHS.getOperand(3), // RHS of compare
DAG.getConstant(CompareOpc, MVT::i32)
};
EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Unpack the result based on how the target uses it.
PPC::Predicate CompOpc;
switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
default: // Can't happen, don't crash on invalid number though.
case 0: // Branch on the value of the EQ bit of CR6.
CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
break;
case 1: // Branch on the inverted value of the EQ bit of CR6.
CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
break;
case 2: // Branch on the value of the LT bit of CR6.
CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
break;
case 3: // Branch on the inverted value of the LT bit of CR6.
CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
break;
}
return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
DAG.getConstant(CompOpc, MVT::i32),
DAG.getRegister(PPC::CR6, MVT::i32),
N->getOperand(4), CompNode.getValue(1));
}
break;
}
}
return SDValue();
}
//===----------------------------------------------------------------------===//
// Inline Assembly Support
//===----------------------------------------------------------------------===//
void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
case PPCISD::LBRX: {
// lhbrx is known to have the top bits cleared out.
if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
KnownZero = 0xFFFF0000;
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
default: break;
case Intrinsic::ppc_altivec_vcmpbfp_p:
case Intrinsic::ppc_altivec_vcmpeqfp_p:
case Intrinsic::ppc_altivec_vcmpequb_p:
case Intrinsic::ppc_altivec_vcmpequh_p:
case Intrinsic::ppc_altivec_vcmpequw_p:
case Intrinsic::ppc_altivec_vcmpgefp_p:
case Intrinsic::ppc_altivec_vcmpgtfp_p:
case Intrinsic::ppc_altivec_vcmpgtsb_p:
case Intrinsic::ppc_altivec_vcmpgtsh_p:
case Intrinsic::ppc_altivec_vcmpgtsw_p:
case Intrinsic::ppc_altivec_vcmpgtub_p:
case Intrinsic::ppc_altivec_vcmpgtuh_p:
case Intrinsic::ppc_altivec_vcmpgtuw_p:
KnownZero = ~1U; // All bits but the low one are known to be zero.
break;
}
}
}
}
/// getConstraintType - Given a constraint, return the type of
/// constraint it is for this target.
PPCTargetLowering::ConstraintType
PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
case 'b':
case 'r':
case 'f':
case 'v':
case 'y':
return C_RegisterClass;
case 'Z':
// FIXME: While Z does indicate a memory constraint, it specifically
// indicates an r+r address (used in conjunction with the 'y' modifier
// in the replacement string). Currently, we're forcing the base
// register to be r0 in the asm printer (which is interpreted as zero)
// and forming the complete address in the second register. This is
// suboptimal.
return C_Memory;
}
}
return TargetLowering::getConstraintType(Constraint);
}
/// Examine constraint type and operand type and determine a weight value.
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
TargetLowering::ConstraintWeight
PPCTargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
break;
case 'b':
if (type->isIntegerTy())
weight = CW_Register;
break;
case 'f':
if (type->isFloatTy())
weight = CW_Register;
break;
case 'd':
if (type->isDoubleTy())
weight = CW_Register;
break;
case 'v':
if (type->isVectorTy())
weight = CW_Register;
break;
case 'y':
weight = CW_Register;
break;
case 'Z':
weight = CW_Memory;
break;
}
return weight;
}
std::pair<unsigned, const TargetRegisterClass*>
PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
return std::make_pair(0U, &PPC::GPRCRegClass);
case 'f':
if (VT == MVT::f32 || VT == MVT::i32)
return std::make_pair(0U, &PPC::F4RCRegClass);
if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &PPC::F8RCRegClass);
break;
case 'v':
return std::make_pair(0U, &PPC::VRRCRegClass);
case 'y': // crrc
return std::make_pair(0U, &PPC::CRRCRegClass);
}
}
std::pair<unsigned, const TargetRegisterClass*> R =
TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
// r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
// (which we call X[0-9]+). If a 64-bit value has been requested, and a
// 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
// register.
// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
// the AsmName field from *RegisterInfo.td, then this would not be necessary.
if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
PPC::GPRCRegClass.contains(R.first)) {
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
PPC::sub_32, &PPC::G8RCRegClass),
&PPC::G8RCRegClass);
}
return R;
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0,0);
// Only support length 1 constraints.
if (Constraint.length() > 1) return;
char Letter = Constraint[0];
switch (Letter) {
default: break;
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P': {
ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
if (!CST) return; // Must be an immediate to match.
unsigned Value = CST->getZExtValue();
switch (Letter) {
default: llvm_unreachable("Unknown constraint letter!");
case 'I': // "I" is a signed 16-bit constant.
if ((short)Value == (int)Value)
Result = DAG.getTargetConstant(Value, Op.getValueType());
break;
case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
if ((short)Value == 0)
Result = DAG.getTargetConstant(Value, Op.getValueType());
break;
case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
if ((Value >> 16) == 0)
Result = DAG.getTargetConstant(Value, Op.getValueType());
break;
case 'M': // "M" is a constant that is greater than 31.
if (Value > 31)
Result = DAG.getTargetConstant(Value, Op.getValueType());
break;
case 'N': // "N" is a positive constant that is an exact power of two.
if ((int)Value > 0 && isPowerOf2_32(Value))
Result = DAG.getTargetConstant(Value, Op.getValueType());
break;
case 'O': // "O" is the constant zero.
if (Value == 0)
Result = DAG.getTargetConstant(Value, Op.getValueType());
break;
case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
if ((short)-Value == (int)-Value)
Result = DAG.getTargetConstant(Value, Op.getValueType());
break;
}
break;
}
}
if (Result.getNode()) {
Ops.push_back(Result);
return;
}
// Handle standard constraint letters.
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
Type *Ty) const {
// FIXME: PPC does not allow r+i addressing modes for vectors!
// PPC allows a sign-extended 16-bit immediate field.
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
return false;
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// PPC only support r+r,
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
case 1:
if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
return false;
// Otherwise we have r+r or r+i.
break;
case 2:
if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
return false;
// Allow 2*r as r+r.
break;
default:
// No other scales are supported.
return false;
}
return true;
}
SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
// Make sure the function does not optimize away the store of the RA to
// the stack.
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
bool isPPC64 = PPCSubTarget.isPPC64();
bool isDarwinABI = PPCSubTarget.isDarwinABI();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
isPPC64? MVT::i64 : MVT::i32);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address off the stack.
SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
RetAddrFI, MachinePointerInfo(), false, false, false, 0);
}
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
// Naked functions never have a frame pointer, and so we use r1. For all
// other functions, this decision must be delayed until during PEI.
unsigned FrameReg;
if (MF.getFunction()->getAttributes().hasAttribute(
AttributeSet::FunctionIndex, Attribute::Naked))
FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
else
FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
PtrVT);
while (Depth--)
FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
FrameAddr, MachinePointerInfo(), false, false,
false, 0);
return FrameAddr;
}
bool
PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The PowerPC target isn't yet aware of offsets.
return false;
}
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If 'IsMemset' is
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
if (this->PPCSubTarget.isPPC64()) {
return MVT::i64;
} else {
return MVT::i32;
}
}
bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
bool *Fast) const {
if (DisablePPCUnaligned)
return false;
// PowerPC supports unaligned memory access for simple non-vector types.
// Although accessing unaligned addresses is not as efficient as accessing
// aligned addresses, it is generally more efficient than manual expansion,
// and generally only traps for software emulation when crossing page
// boundaries.
if (!VT.isSimple())
return false;
if (VT.getSimpleVT().isVector())
return false;
if (VT == MVT::ppcf128)
return false;
if (Fast)
*Fast = true;
return true;
}
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
return true;
default:
break;
}
return false;
}
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
}
// Create a fast isel object.
FastISel *
PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const {
return PPC::createFastISel(FuncInfo, LibInfo);
}
Index: head/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (revision 265925)
@@ -1,1532 +1,1536 @@
//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the PowerPC implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#include "PPCInstrInfo.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCHazardRecognizers.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define GET_INSTRMAP_INFO
#define GET_INSTRINFO_CTOR_DTOR
#include "PPCGenInstrInfo.inc"
using namespace llvm;
static cl::
opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
cl::desc("Disable analysis for CTR loops"));
static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
cl::desc("Disable compare instruction optimization"), cl::Hidden);
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
TM(tm), RI(*TM.getSubtargetImpl()) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
const TargetMachine *TM,
const ScheduleDAG *DAG) const {
unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
const InstrItineraryData *II = TM->getInstrItineraryData();
return new PPCScoreboardHazardRecognizer(II, DAG);
}
return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
}
/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
/// to use for this target when scheduling the DAG.
ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
// Most subtargets use a PPC970 recognizer.
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
assert(TM.getInstrInfo() && "No InstrInfo?");
return new PPCHazardRecognizer970(TM);
}
return new PPCScoreboardHazardRecognizer(II, DAG);
}
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default: return false;
case PPC::EXTSW:
case PPC::EXTSW_32_64:
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
SubIdx = PPC::sub_32;
return true;
}
}
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
// Note: This list must be kept consistent with LoadRegFromStackSlot.
switch (MI->getOpcode()) {
default: break;
case PPC::LD:
case PPC::LWZ:
case PPC::LFS:
case PPC::LFD:
case PPC::RESTORE_CR:
case PPC::LVX:
case PPC::RESTORE_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
MI->getOperand(2).isFI()) {
FrameIndex = MI->getOperand(2).getIndex();
return MI->getOperand(0).getReg();
}
break;
}
return 0;
}
unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
// Note: This list must be kept consistent with StoreRegToStackSlot.
switch (MI->getOpcode()) {
default: break;
case PPC::STD:
case PPC::STW:
case PPC::STFS:
case PPC::STFD:
case PPC::SPILL_CR:
case PPC::STVX:
case PPC::SPILL_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
MI->getOperand(2).isFI()) {
FrameIndex = MI->getOperand(2).getIndex();
return MI->getOperand(0).getReg();
}
break;
}
return 0;
}
// commuteInstruction - We can commute rlwimi instructions, but only if the
// rotate amt is zero. We also have to munge the immediates a bit.
MachineInstr *
PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
MachineFunction &MF = *MI->getParent()->getParent();
// Normal instructions can be commuted the obvious way.
if (MI->getOpcode() != PPC::RLWIMI &&
MI->getOpcode() != PPC::RLWIMIo)
return TargetInstrInfo::commuteInstruction(MI, NewMI);
// Cannot commute if it has a non-zero rotate count.
if (MI->getOperand(3).getImm() != 0)
return 0;
// If we have a zero rotate count, we have:
// M = mask(MB,ME)
// Op0 = (Op1 & ~M) | (Op2 & M)
// Change this to:
// M = mask((ME+1)&31, (MB-1)&31)
// Op0 = (Op2 & ~M) | (Op1 & M)
// Swap op1/op2
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
unsigned Reg2 = MI->getOperand(2).getReg();
bool Reg1IsKill = MI->getOperand(1).isKill();
bool Reg2IsKill = MI->getOperand(2).isKill();
bool ChangeReg0 = false;
// If machine instrs are no longer in two-address forms, update
// destination register as well.
if (Reg0 == Reg1) {
// Must be two address instruction!
assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
"Expecting a two-address instruction!");
Reg2IsKill = false;
ChangeReg0 = true;
}
// Masks.
unsigned MB = MI->getOperand(4).getImm();
unsigned ME = MI->getOperand(5).getImm();
if (NewMI) {
// Create a new instruction.
unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
bool Reg0IsDead = MI->getOperand(0).isDead();
return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
.addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
.addReg(Reg2, getKillRegState(Reg2IsKill))
.addReg(Reg1, getKillRegState(Reg1IsKill))
.addImm((ME+1) & 31)
.addImm((MB-1) & 31);
}
if (ChangeReg0)
MI->getOperand(0).setReg(Reg2);
MI->getOperand(2).setReg(Reg1);
MI->getOperand(1).setReg(Reg2);
MI->getOperand(2).setIsKill(Reg1IsKill);
MI->getOperand(1).setIsKill(Reg2IsKill);
// Swap the mask around.
MI->getOperand(4).setImm((ME+1) & 31);
MI->getOperand(5).setImm((MB-1) & 31);
return MI;
}
void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
DebugLoc DL;
BuildMI(MBB, MI, DL, get(PPC::NOP));
}
// Branch analysis.
// Note: If the condition register is set to CTR or CTR8 then this is a
// BDNZ (imm == 1) or BDZ (imm == 0) branch.
bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
return false;
--I;
while (I->isDebugValue()) {
if (I == MBB.begin())
return false;
--I;
}
if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
MachineInstr *LastInst = I;
// If there is only one terminator instruction, process it.
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
if (LastInst->getOpcode() == PPC::B) {
if (!LastInst->getOperand(0).isMBB())
return true;
TBB = LastInst->getOperand(0).getMBB();
return false;
} else if (LastInst->getOpcode() == PPC::BCC) {
if (!LastInst->getOperand(2).isMBB())
return true;
// Block ends with fall-through condbranch.
TBB = LastInst->getOperand(2).getMBB();
Cond.push_back(LastInst->getOperand(0));
Cond.push_back(LastInst->getOperand(1));
return false;
} else if (LastInst->getOpcode() == PPC::BDNZ8 ||
LastInst->getOpcode() == PPC::BDNZ) {
if (!LastInst->getOperand(0).isMBB())
return true;
if (DisableCTRLoopAnal)
return true;
TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(1));
Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
true));
return false;
} else if (LastInst->getOpcode() == PPC::BDZ8 ||
LastInst->getOpcode() == PPC::BDZ) {
if (!LastInst->getOperand(0).isMBB())
return true;
if (DisableCTRLoopAnal)
return true;
TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(0));
Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
true));
return false;
}
// Otherwise, don't know what this is.
return true;
}
// Get the instruction before it if it's a terminator.
MachineInstr *SecondLastInst = I;
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() &&
isUnpredicatedTerminator(--I))
return true;
// If the block ends with PPC::B and PPC:BCC, handle it.
if (SecondLastInst->getOpcode() == PPC::BCC &&
LastInst->getOpcode() == PPC::B) {
if (!SecondLastInst->getOperand(2).isMBB() ||
!LastInst->getOperand(0).isMBB())
return true;
TBB = SecondLastInst->getOperand(2).getMBB();
Cond.push_back(SecondLastInst->getOperand(0));
Cond.push_back(SecondLastInst->getOperand(1));
FBB = LastInst->getOperand(0).getMBB();
return false;
} else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 ||
SecondLastInst->getOpcode() == PPC::BDNZ) &&
LastInst->getOpcode() == PPC::B) {
if (!SecondLastInst->getOperand(0).isMBB() ||
!LastInst->getOperand(0).isMBB())
return true;
if (DisableCTRLoopAnal)
return true;
TBB = SecondLastInst->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(1));
Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
true));
FBB = LastInst->getOperand(0).getMBB();
return false;
} else if ((SecondLastInst->getOpcode() == PPC::BDZ8 ||
SecondLastInst->getOpcode() == PPC::BDZ) &&
LastInst->getOpcode() == PPC::B) {
if (!SecondLastInst->getOperand(0).isMBB() ||
!LastInst->getOperand(0).isMBB())
return true;
if (DisableCTRLoopAnal)
return true;
TBB = SecondLastInst->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(0));
Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
true));
FBB = LastInst->getOperand(0).getMBB();
return false;
}
// If the block ends with two PPC:Bs, handle it. The second one is not
// executed, so remove it.
if (SecondLastInst->getOpcode() == PPC::B &&
LastInst->getOpcode() == PPC::B) {
if (!SecondLastInst->getOperand(0).isMBB())
return true;
TBB = SecondLastInst->getOperand(0).getMBB();
I = LastInst;
if (AllowModify)
I->eraseFromParent();
return false;
}
// Otherwise, can't handle this.
return true;
}
unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
while (I->isDebugValue()) {
if (I == MBB.begin())
return 0;
--I;
}
if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
return 0;
// Remove the branch.
I->eraseFromParent();
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
if (I->getOpcode() != PPC::BCC &&
I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
return 1;
// Remove the branch.
I->eraseFromParent();
return 2;
}
unsigned
PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
"PPC branch conditions have two components!");
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
BuildMI(&MBB, DL, get(Cond[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
else // Conditional branch
BuildMI(&MBB, DL, get(PPC::BCC))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
BuildMI(&MBB, DL, get(Cond[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
else
BuildMI(&MBB, DL, get(PPC::BCC))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
return 2;
}
// Select analysis.
bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
const SmallVectorImpl<MachineOperand> &Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
if (!TM.getSubtargetImpl()->hasISEL())
return false;
if (Cond.size() != 2)
return false;
// If this is really a bdnz-like condition, then it cannot be turned into a
// select.
if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
return false;
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
if (!RC)
return false;
// isel is for regular integer GPRs only.
if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
!PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
!PPC::G8RCRegClass.hasSubClassEq(RC) &&
!PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
return false;
// FIXME: These numbers are for the A2, how well they work for other cores is
// an open question. On the A2, the isel instruction has a 2-cycle latency
// but single-cycle throughput. These numbers are used in combination with
// the MispredictPenalty setting from the active SchedMachineModel.
CondCycles = 1;
TrueCycles = 1;
FalseCycles = 1;
return true;
}
void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc dl,
unsigned DestReg,
const SmallVectorImpl<MachineOperand> &Cond,
unsigned TrueReg, unsigned FalseReg) const {
assert(Cond.size() == 2 &&
"PPC branch conditions have two components!");
assert(TM.getSubtargetImpl()->hasISEL() &&
"Cannot insert select on target without ISEL support");
// Get the register classes.
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
assert(RC && "TrueReg and FalseReg must have overlapping register classes");
bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
assert((Is64Bit ||
PPC::GPRCRegClass.hasSubClassEq(RC) ||
PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
"isel is for regular integer GPRs only");
unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
unsigned SelectPred = Cond[0].getImm();
unsigned SubIdx;
bool SwapOps;
switch (SelectPred) {
default: llvm_unreachable("invalid predicate for isel");
case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
}
unsigned FirstReg = SwapOps ? FalseReg : TrueReg,
SecondReg = SwapOps ? TrueReg : FalseReg;
// The first input register of isel cannot be r0. If it is a member
// of a register class that can be r0, then copy it first (the
// register allocator should eliminate the copy).
if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
const TargetRegisterClass *FirstRC =
MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
&PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
unsigned OldFirstReg = FirstReg;
FirstReg = MRI.createVirtualRegister(FirstRC);
BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
.addReg(OldFirstReg);
}
BuildMI(MBB, MI, dl, get(OpCode), DestReg)
.addReg(FirstReg).addReg(SecondReg)
.addReg(Cond[1].getReg(), 0, SubIdx);
}
void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR8;
else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
Opc = PPC::FMR;
else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::MCRF;
else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::VOR;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else
llvm_unreachable("Impossible reg-to-reg copy");
const MCInstrDesc &MCID = get(Opc);
if (MCID.getNumOperands() == 3)
BuildMI(MBB, I, DL, MCID, DestReg)
.addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
else
BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
}
// This function returns true if a CR spill is necessary and false otherwise.
bool
PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill,
int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs,
bool &NonRI, bool &SpillsVRS) const{
// Note: If additional store instructions are added here,
// update isStoreToStackSlot.
DebugLoc DL;
- if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
+ if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
- } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
// not cause any bug. If we need other uses of CR bits, the following
// code may be invalid.
unsigned Reg = 0;
if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
Reg = PPC::CR0;
else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
Reg = PPC::CR1;
else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
Reg = PPC::CR2;
else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
Reg = PPC::CR3;
else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
Reg = PPC::CR4;
else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
Reg = PPC::CR5;
else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
Reg = PPC::CR6;
else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
Reg = PPC::CR7;
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
&PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
SpillsVRS = true;
} else {
llvm_unreachable("Unknown regclass!");
}
return false;
}
void
PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setHasSpills();
bool NonRI = false, SpillsVRS = false;
if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
NonRI, SpillsVRS))
FuncInfo->setSpillsCR();
if (SpillsVRS)
FuncInfo->setSpillsVRSAVE();
if (NonRI)
FuncInfo->setHasNonRISpills();
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
const MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOStore,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
bool
PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs,
bool &NonRI, bool &SpillsVRS) const{
// Note: If additional load instructions are added here,
// update isLoadFromStackSlot.
- if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
+ if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
DestReg), FrameIdx));
- } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
FrameIdx));
} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
FrameIdx));
} else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
get(PPC::RESTORE_CR), DestReg),
FrameIdx));
return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
unsigned Reg = 0;
if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
Reg = PPC::CR0;
else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT ||
DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN)
Reg = PPC::CR1;
else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT ||
DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN)
Reg = PPC::CR2;
else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT ||
DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN)
Reg = PPC::CR3;
else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT ||
DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN)
Reg = PPC::CR4;
else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT ||
DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN)
Reg = PPC::CR5;
else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT ||
DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN)
Reg = PPC::CR6;
else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT ||
DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
Reg = PPC::CR7;
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
&PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
FrameIdx));
NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
get(PPC::RESTORE_VRSAVE),
DestReg),
FrameIdx));
SpillsVRS = true;
} else {
llvm_unreachable("Unknown regclass!");
}
return false;
}
void
PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setHasSpills();
bool NonRI = false, SpillsVRS = false;
if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
NonRI, SpillsVRS))
FuncInfo->setSpillsCR();
if (SpillsVRS)
FuncInfo->setSpillsVRSAVE();
if (NonRI)
FuncInfo->setHasNonRISpills();
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
const MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
bool PPCInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
else
// Leave the CR# the same, but invert the condition.
Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
return false;
}
bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned Reg, MachineRegisterInfo *MRI) const {
// For some instructions, it is legal to fold ZERO into the RA register field.
// A zero immediate should always be loaded with a single li.
unsigned DefOpc = DefMI->getOpcode();
if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
return false;
if (!DefMI->getOperand(1).isImm())
return false;
if (DefMI->getOperand(1).getImm() != 0)
return false;
// Note that we cannot here invert the arguments of an isel in order to fold
// a ZERO into what is presented as the second argument. All we have here
// is the condition bit, and that might come from a CR-logical bit operation.
const MCInstrDesc &UseMCID = UseMI->getDesc();
// Only fold into real machine instructions.
if (UseMCID.isPseudo())
return false;
unsigned UseIdx;
for (UseIdx = 0; UseIdx < UseMI->getNumOperands(); ++UseIdx)
if (UseMI->getOperand(UseIdx).isReg() &&
UseMI->getOperand(UseIdx).getReg() == Reg)
break;
assert(UseIdx < UseMI->getNumOperands() && "Cannot find Reg in UseMI");
assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx];
// We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
// register (which might also be specified as a pointer class kind).
if (UseInfo->isLookupPtrRegClass()) {
if (UseInfo->RegClass /* Kind */ != 1)
return false;
} else {
if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
return false;
}
// Make sure this is not tied to an output register (or otherwise
// constrained). This is true for ST?UX registers, for example, which
// are tied to their output registers.
if (UseInfo->Constraints != 0)
return false;
unsigned ZeroReg;
if (UseInfo->isLookupPtrRegClass()) {
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
} else {
ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
PPC::ZERO8 : PPC::ZERO;
}
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
UseMI->getOperand(UseIdx).setReg(ZeroReg);
if (DeleteDef)
DefMI->eraseFromParent();
return true;
}
static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE; ++I)
if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8))
return true;
return false;
}
// We should make sure that, if we're going to predicate both sides of a
// condition (a diamond), that both sides don't define the counter register. We
// can predicate counter-decrement-based branches, but while that predicates
// the branching, it does not predicate the counter decrement. If we tried to
// merge the triangle into one predicated block, we'd decrement the counter
// twice.
bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumT, unsigned ExtraT,
MachineBasicBlock &FMBB,
unsigned NumF, unsigned ExtraF,
const BranchProbability &Probability) const {
return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
}
bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const {
// The predicated branches are identified by their type, not really by the
// explicit presence of a predicate. Furthermore, some of them can be
// predicated more than once. Because if conversion won't try to predicate
// any instruction which already claims to be predicated (by returning true
// here), always return false. In doing so, we let isPredicable() be the
// final word on whether not the instruction can be (further) predicated.
return false;
}
bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
if (!MI->isTerminator())
return false;
// Conditional branch is a special case.
if (MI->isBranch() && !MI->isBarrier())
return true;
return !isPredicated(MI);
}
bool PPCInstrInfo::PredicateInstruction(
MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
unsigned OpC = MI->getOpcode();
if (OpC == PPC::BLR) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
(isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
} else {
MI->setDesc(get(PPC::BCLR));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg());
}
return true;
} else if (OpC == PPC::B) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
} else {
MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
MI->RemoveOperand(0);
MI->setDesc(get(PPC::BCC));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg())
.addMBB(MBB);
}
return true;
} else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 ||
OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
(setLR ? PPC::BCCTRL : PPC::BCCTR)));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg());
return true;
}
return false;
}
bool PPCInstrInfo::SubsumesPredicate(
const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const {
assert(Pred1.size() == 2 && "Invalid PPC first predicate");
assert(Pred2.size() == 2 && "Invalid PPC second predicate");
if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
return false;
if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
return false;
// P1 can only subsume P2 if they test the same condition register.
if (Pred1[1].getReg() != Pred2[1].getReg())
return false;
PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
if (P1 == P2)
return true;
// Does P1 subsume P2, e.g. GE subsumes GT.
if (P1 == PPC::PRED_LE &&
(P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
return true;
if (P1 == PPC::PRED_GE &&
(P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
return true;
return false;
}
bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
// Note: At the present time, the contents of Pred from this function is
// unused by IfConversion. This implementation follows ARM by pushing the
// CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
// predicate, instructions defining CTR or CTR8 are also included as
// predicate-defining instructions.
const TargetRegisterClass *RCs[] =
{ &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
&PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
bool Found = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
const TargetRegisterClass *RC = RCs[c];
if (MO.isReg()) {
if (MO.isDef() && RC->contains(MO.getReg())) {
Pred.push_back(MO);
Found = true;
}
} else if (MO.isRegMask()) {
for (TargetRegisterClass::iterator I = RC->begin(),
IE = RC->end(); I != IE; ++I)
if (MO.clobbersPhysReg(*I)) {
Pred.push_back(MO);
Found = true;
}
}
}
}
return Found;
}
bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
unsigned OpC = MI->getOpcode();
switch (OpC) {
default:
return false;
case PPC::B:
case PPC::BLR:
case PPC::BCTR:
case PPC::BCTR8:
case PPC::BCTRL:
case PPC::BCTRL8:
return true;
}
}
bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI,
unsigned &SrcReg, unsigned &SrcReg2,
int &Mask, int &Value) const {
unsigned Opc = MI->getOpcode();
switch (Opc) {
default: return false;
case PPC::CMPWI:
case PPC::CMPLWI:
case PPC::CMPDI:
case PPC::CMPLDI:
SrcReg = MI->getOperand(1).getReg();
SrcReg2 = 0;
Value = MI->getOperand(2).getImm();
Mask = 0xFFFF;
return true;
case PPC::CMPW:
case PPC::CMPLW:
case PPC::CMPD:
case PPC::CMPLD:
case PPC::FCMPUS:
case PPC::FCMPUD:
SrcReg = MI->getOperand(1).getReg();
SrcReg2 = MI->getOperand(2).getReg();
return true;
}
}
bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
unsigned SrcReg, unsigned SrcReg2,
int Mask, int Value,
const MachineRegisterInfo *MRI) const {
if (DisableCmpOpt)
return false;
int OpC = CmpInstr->getOpcode();
unsigned CRReg = CmpInstr->getOperand(0).getReg();
// FP record forms set CR1 based on the execption status bits, not a
// comparison with zero.
if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
return false;
// The record forms set the condition register based on a signed comparison
// with zero (so says the ISA manual). This is not as straightforward as it
// seems, however, because this is always a 64-bit comparison on PPC64, even
// for instructions that are 32-bit in nature (like slw for example).
// So, on PPC32, for unsigned comparisons, we can use the record forms only
// for equality checks (as those don't depend on the sign). On PPC64,
// we are restricted to equality for unsigned 64-bit comparisons and for
// signed 32-bit comparisons the applicability is more restricted.
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI) return false;
int MIOpC = MI->getOpcode();
bool equalityOnly = false;
bool noSub = false;
if (isPPC64) {
if (is32BitSignedCompare) {
// We can perform this optimization only if MI is sign-extending.
if (MIOpC == PPC::SRAW || MIOpC == PPC::SRAWo ||
MIOpC == PPC::SRAWI || MIOpC == PPC::SRAWIo ||
MIOpC == PPC::EXTSB || MIOpC == PPC::EXTSBo ||
MIOpC == PPC::EXTSH || MIOpC == PPC::EXTSHo ||
MIOpC == PPC::EXTSW || MIOpC == PPC::EXTSWo) {
noSub = true;
} else
return false;
} else if (is32BitUnsignedCompare) {
// We can perform this optimization, equality only, if MI is
// zero-extending.
if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo ||
MIOpC == PPC::SLW || MIOpC == PPC::SLWo ||
MIOpC == PPC::SRW || MIOpC == PPC::SRWo) {
noSub = true;
equalityOnly = true;
} else
return false;
} else
equalityOnly = is64BitUnsignedCompare;
} else
equalityOnly = is32BitUnsignedCompare;
if (equalityOnly) {
// We need to check the uses of the condition register in order to reject
// non-equality comparisons.
for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
IE = MRI->use_end(); I != IE; ++I) {
MachineInstr *UseMI = &*I;
if (UseMI->getOpcode() == PPC::BCC) {
unsigned Pred = UseMI->getOperand(0).getImm();
if (Pred != PPC::PRED_EQ && Pred != PPC::PRED_NE)
return false;
} else if (UseMI->getOpcode() == PPC::ISEL ||
UseMI->getOpcode() == PPC::ISEL8) {
unsigned SubIdx = UseMI->getOperand(3).getSubReg();
if (SubIdx != PPC::sub_eq)
return false;
} else
return false;
}
}
MachineBasicBlock::iterator I = CmpInstr;
// Scan forward to find the first use of the compare.
for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
I != EL; ++I) {
bool FoundUse = false;
for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg),
JE = MRI->use_end(); J != JE; ++J)
if (&*J == &*I) {
FoundUse = true;
break;
}
if (FoundUse)
break;
}
// There are two possible candidates which can be changed to set CR[01].
// One is MI, the other is a SUB instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
MachineInstr *Sub = NULL;
if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
MI = NULL;
// FIXME: Conservatively refuse to convert an instruction which isn't in the
// same BB as the comparison. This is to allow the check below to avoid calls
// (and other explicit clobbers); instead we should really check for these
// more explicitly (in at least a few predecessors).
else if (MI->getParent() != CmpInstr->getParent() || Value != 0) {
// PPC does not have a record-form SUBri.
return false;
}
// Search for Sub.
const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
// Get ready to iterate backward from CmpInstr.
MachineBasicBlock::iterator E = MI,
B = CmpInstr->getParent()->begin();
for (; I != E && !noSub; --I) {
const MachineInstr &Instr = *I;
unsigned IOpC = Instr.getOpcode();
if (&*I != CmpInstr && (
Instr.modifiesRegister(PPC::CR0, TRI) ||
Instr.readsRegister(PPC::CR0, TRI)))
// This instruction modifies or uses the record condition register after
// the one we want to change. While we could do this transformation, it
// would likely not be profitable. This transformation removes one
// instruction, and so even forcing RA to generate one move probably
// makes it unprofitable.
return false;
// Check whether CmpInstr can be made redundant by the current instruction.
if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
(IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
((Instr.getOperand(1).getReg() == SrcReg &&
Instr.getOperand(2).getReg() == SrcReg2) ||
(Instr.getOperand(1).getReg() == SrcReg2 &&
Instr.getOperand(2).getReg() == SrcReg))) {
Sub = &*I;
break;
}
if (I == B)
// The 'and' is below the comparison instruction.
return false;
}
// Return false if no candidates exist.
if (!MI && !Sub)
return false;
// The single candidate is called MI.
if (!MI) MI = Sub;
int NewOpC = -1;
MIOpC = MI->getOpcode();
if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
NewOpC = MIOpC;
else {
NewOpC = PPC::getRecordFormOpcode(MIOpC);
if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
NewOpC = MIOpC;
}
// FIXME: On the non-embedded POWER architectures, only some of the record
// forms are fast, and we should use only the fast ones.
// The defining instruction has a record form (or is already a record
// form). It is possible, however, that we'll need to reverse the condition
// code of the users.
if (NewOpC == -1)
return false;
SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
// needs to be updated to be based on SUB. Push the condition code
// operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
// condition code of these operands will be modified.
bool ShouldSwap = false;
if (Sub) {
ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
Sub->getOperand(2).getReg() == SrcReg;
// The operands to subf are the opposite of sub, so only in the fixed-point
// case, invert the order.
ShouldSwap = !ShouldSwap;
}
if (ShouldSwap)
for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
IE = MRI->use_end(); I != IE; ++I) {
MachineInstr *UseMI = &*I;
if (UseMI->getOpcode() == PPC::BCC) {
PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
assert((!equalityOnly ||
Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) &&
"Invalid predicate for equality-only optimization");
PredsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)),
PPC::getSwappedPredicate(Pred)));
} else if (UseMI->getOpcode() == PPC::ISEL ||
UseMI->getOpcode() == PPC::ISEL8) {
unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
"Invalid CR bit for equality-only optimization");
if (NewSubReg == PPC::sub_lt)
NewSubReg = PPC::sub_gt;
else if (NewSubReg == PPC::sub_gt)
NewSubReg = PPC::sub_lt;
SubRegsToUpdate.push_back(std::make_pair(&((*I).getOperand(3)),
NewSubReg));
} else // We need to abort on a user we don't understand.
return false;
}
// Create a new virtual register to hold the value of the CR set by the
// record-form instruction. If the instruction was not previously in
// record form, then set the kill flag on the CR.
CmpInstr->eraseFromParent();
MachineBasicBlock::iterator MII = MI;
BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(),
get(TargetOpcode::COPY), CRReg)
.addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
if (MIOpC != NewOpC) {
// We need to be careful here: we're replacing one instruction with
// another, and we need to make sure that we get all of the right
// implicit uses and defs. On the other hand, the caller may be holding
// an iterator to this instruction, and so we can't delete it (this is
// specifically the case if this is the instruction directly after the
// compare).
const MCInstrDesc &NewDesc = get(NewOpC);
MI->setDesc(NewDesc);
if (NewDesc.ImplicitDefs)
for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs();
*ImpDefs; ++ImpDefs)
if (!MI->definesRegister(*ImpDefs))
MI->addOperand(*MI->getParent()->getParent(),
MachineOperand::CreateReg(*ImpDefs, true, true));
if (NewDesc.ImplicitUses)
for (const uint16_t *ImpUses = NewDesc.getImplicitUses();
*ImpUses; ++ImpUses)
if (!MI->readsRegister(*ImpUses))
MI->addOperand(*MI->getParent()->getParent(),
MachineOperand::CreateReg(*ImpUses, false, true));
}
// Modify the condition code of operands in OperandsToUpdate.
// Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
// be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
return true;
}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
case PPC::INLINEASM: { // Inline Asm: Variable size.
const MachineFunction *MF = MI->getParent()->getParent();
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
case PPC::PROLOG_LABEL:
case PPC::EH_LABEL:
case PPC::GC_LABEL:
case PPC::DBG_VALUE:
return 0;
case PPC::BL8_NOP:
case PPC::BLA8_NOP:
return 8;
default:
return 4; // PowerPC instructions are all 4 bytes
}
}
#undef DEBUG_TYPE
#define DEBUG_TYPE "ppc-early-ret"
STATISTIC(NumBCLR, "Number of early conditional returns");
STATISTIC(NumBLR, "Number of early returns");
namespace llvm {
void initializePPCEarlyReturnPass(PassRegistry&);
}
namespace {
// PPCEarlyReturn pass - For simple functions without epilogue code, move
// returns up, and create conditional returns, to avoid unnecessary
// branch-to-blr sequences.
struct PPCEarlyReturn : public MachineFunctionPass {
static char ID;
PPCEarlyReturn() : MachineFunctionPass(ID) {
initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
}
const PPCTargetMachine *TM;
const PPCInstrInfo *TII;
protected:
bool processBlock(MachineBasicBlock &ReturnMBB) {
bool Changed = false;
MachineBasicBlock::iterator I = ReturnMBB.begin();
I = ReturnMBB.SkipPHIsAndLabels(I);
// The block must be essentially empty except for the blr.
if (I == ReturnMBB.end() || I->getOpcode() != PPC::BLR ||
I != ReturnMBB.getLastNonDebugInstr())
return Changed;
SmallVector<MachineBasicBlock*, 8> PredToRemove;
for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
bool OtherReference = false, BlockChanged = false;
for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
if (J->getOpcode() == PPC::B) {
if (J->getOperand(0).getMBB() == &ReturnMBB) {
// This is an unconditional branch to the return. Replace the
// branch with a blr.
BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
++NumBLR;
continue;
}
} else if (J->getOpcode() == PPC::BCC) {
if (J->getOperand(2).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR))
.addImm(J->getOperand(0).getImm())
.addReg(J->getOperand(1).getReg());
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
++NumBCLR;
continue;
}
} else if (J->isBranch()) {
if (J->isIndirectBranch()) {
if (ReturnMBB.hasAddressTaken())
OtherReference = true;
} else
for (unsigned i = 0; i < J->getNumOperands(); ++i)
if (J->getOperand(i).isMBB() &&
J->getOperand(i).getMBB() == &ReturnMBB)
OtherReference = true;
} else if (!J->isTerminator() && !J->isDebugValue())
break;
if (J == (*PI)->begin())
break;
--J;
}
if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
OtherReference = true;
// Predecessors are stored in a vector and can't be removed here.
if (!OtherReference && BlockChanged) {
PredToRemove.push_back(*PI);
}
if (BlockChanged)
Changed = true;
}
for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
PredToRemove[i]->removeSuccessor(&ReturnMBB);
if (Changed && !ReturnMBB.hasAddressTaken()) {
// We now might be able to merge this blr-only block into its
// by-layout predecessor.
if (ReturnMBB.pred_size() == 1 &&
(*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
// Move the blr into the preceding block.
MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
PrevMBB.removeSuccessor(&ReturnMBB);
}
if (ReturnMBB.pred_empty())
ReturnMBB.eraseFromParent();
}
return Changed;
}
public:
virtual bool runOnMachineFunction(MachineFunction &MF) {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
TII = TM->getInstrInfo();
bool Changed = false;
// If the function does not have at least two blocks, then there is
// nothing to do.
if (MF.size() < 2)
return Changed;
for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
MachineBasicBlock &B = *I++;
if (processBlock(B))
Changed = true;
}
return Changed;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
}
INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
"PowerPC Early-Return Creation", false, false)
char PPCEarlyReturn::ID = 0;
FunctionPass*
llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
Index: head/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td (revision 265925)
@@ -1,2709 +1,2720 @@
//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the subset of the 32-bit PowerPC instruction set, as used
// by the PowerPC instruction selector.
//
//===----------------------------------------------------------------------===//
include "PPCInstrFormats.td"
//===----------------------------------------------------------------------===//
// PowerPC specific type constraints.
//
def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
SDTCisVT<0, f64>, SDTCisPtrTy<1>
]>;
def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
SDTCisVT<0, f64>, SDTCisPtrTy<1>
]>;
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
def SDT_PPCvperm : SDTypeProfile<1, 3, [
SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
]>;
def SDT_PPCvcmp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
]>;
def SDT_PPCcondbr : SDTypeProfile<0, 3, [
SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPClbrx : SDTypeProfile<1, 2, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPCstbrx : SDTypeProfile<0, 3, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPClarx : SDTypeProfile<1, 1, [
SDTCisInt<0>, SDTCisPtrTy<1>
]>;
def SDT_PPCstcx : SDTypeProfile<0, 2, [
SDTCisInt<0>, SDTCisPtrTy<1>
]>;
def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;
def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>;
def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;
def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
[SDNPHasChain, SDNPMayStore]>;
def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
[SDNPHasChain, SDNPMayLoad]>;
def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
[SDNPHasChain, SDNPMayLoad]>;
// Extract FPSCR (not modeled at the DAG level).
def PPCmffs : SDNode<"PPCISD::MFFS",
SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
// Perform FADD in round-to-zero mode.
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
[SDNPMayLoad]>;
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
[SDNPHasChain]>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
// amounts. These nodes are generated by the multi-precision shift code.
def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
[SDNPHasChain, SDNPSideEffect,
SDNPInGlue, SDNPOutGlue]>;
def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
[SDNPHasChain, SDNPSideEffect,
SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP",
SDTypeProfile<1, 1, [SDTCisInt<0>,
SDTCisPtrTy<1>]>,
[SDNPHasChain, SDNPSideEffect]>;
def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPSideEffect]>;
def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc,
[SDNPHasChain, SDNPSideEffect]>;
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
[SDNPHasChain, SDNPOptInGlue]>;
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
[SDNPHasChain, SDNPMayLoad]>;
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
// Instructions to set/unset CR bit 6 for SVR4 vararg calls
def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
// Instructions to support atomic operations
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
[SDNPHasChain, SDNPMayLoad]>;
def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
[SDNPHasChain, SDNPMayStore]>;
// Instructions to support medium and large code model
def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// PowerPC specific transformation functions and pattern fragments.
//
def SHL32 : SDNodeXForm<imm, [{
// Transformation function: 31 - imm
return getI32Imm(31 - N->getZExtValue());
}]>;
def SRL32 : SDNodeXForm<imm, [{
// Transformation function: 32 - imm
return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
}]>;
def LO16 : SDNodeXForm<imm, [{
// Transformation function: get the low 16 bits.
return getI32Imm((unsigned short)N->getZExtValue());
}]>;
def HI16 : SDNodeXForm<imm, [{
// Transformation function: shift the immediate value down into the low bits.
return getI32Imm((unsigned)N->getZExtValue() >> 16);
}]>;
def HA16 : SDNodeXForm<imm, [{
// Transformation function: shift the immediate value down into the low bits.
signed int Val = N->getZExtValue();
return getI32Imm((Val - (signed short)Val) >> 16);
}]>;
def MB : SDNodeXForm<imm, [{
// Transformation function: get the start bit of a mask
unsigned mb = 0, me;
(void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
return getI32Imm(mb);
}]>;
def ME : SDNodeXForm<imm, [{
// Transformation function: get the end bit of a mask
unsigned mb, me = 0;
(void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
return getI32Imm(me);
}]>;
def maskimm32 : PatLeaf<(imm), [{
// maskImm predicate - True if immediate is a run of ones.
unsigned mb, me;
if (N->getValueType(0) == MVT::i32)
return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
else
return false;
}]>;
def imm32SExt16 : Operand<i32>, ImmLeaf<i32, [{
// imm32SExt16 predicate - True if the i32 immediate fits in a 16-bit
// sign extended field. Used by instructions like 'addi'.
return (int32_t)Imm == (short)Imm;
}]>;
def imm64SExt16 : Operand<i64>, ImmLeaf<i64, [{
// imm64SExt16 predicate - True if the i64 immediate fits in a 16-bit
// sign extended field. Used by instructions like 'addi'.
return (int64_t)Imm == (short)Imm;
}]>;
def immZExt16 : PatLeaf<(imm), [{
// immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
// field. Used by instructions like 'ori'.
return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
}], LO16>;
// imm16Shifted* - These match immediates where the low 16-bits are zero. There
// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
// identical in 32-bit mode, but in 64-bit mode, they return true if the
// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
// clear).
def imm16ShiftedZExt : PatLeaf<(imm), [{
// imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
// immediate are set. Used by instructions like 'xoris'.
return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
}], HI16>;
def imm16ShiftedSExt : PatLeaf<(imm), [{
// imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
// immediate are set. Used by instructions like 'addis'. Identical to
// imm16ShiftedZExt in 32-bit mode.
if (N->getZExtValue() & 0xFFFF) return false;
if (N->getValueType(0) == MVT::i32)
return true;
// For 64-bit, make sure it is sext right.
return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
}], HI16>;
// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
// restricted memrix (4-aligned) constants are alignment sensitive. If these
// offsets are hidden behind TOC entries than the values of the lower-order
// bits cannot be checked directly. As a result, we need to also incorporate
// an alignment check into the relevant patterns.
def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 4;
}]>;
def aligned4store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAlignment() >= 4;
}]>;
def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 4;
}]>;
def aligned4pre_store : PatFrag<
(ops node:$val, node:$base, node:$offset),
(pre_store node:$val, node:$base, node:$offset), [{
return cast<StoreSDNode>(N)->getAlignment() >= 4;
}]>;
def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() < 4;
}]>;
def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAlignment() < 4;
}]>;
def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() < 4;
}]>;
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
class isPPC64 { bit PPC64 = 1; }
class isDOT { bit RC = 1; }
class RegConstraint<string C> {
string Constraints = C;
}
class NoEncode<string E> {
string DisableEncoding = E;
}
//===----------------------------------------------------------------------===//
// PowerPC Operand Definitions.
// In the default PowerPC assembler syntax, registers are specified simply
// by number, so they cannot be distinguished from immediate values (without
// looking at the opcode). This means that the default operand matching logic
// for the asm parser does not work, and we need to specify custom matchers.
// Since those can only be specified with RegisterOperand classes and not
// directly on the RegisterClass, all instructions patterns used by the asm
// parser need to use a RegisterOperand (instead of a RegisterClass) for
// all their register operands.
// For this purpose, we define one RegisterOperand for each RegisterClass,
// using the same name as the class, just in lower case.
def PPCRegGPRCAsmOperand : AsmOperandClass {
let Name = "RegGPRC"; let PredicateMethod = "isRegNumber";
}
def gprc : RegisterOperand<GPRC> {
let ParserMatchClass = PPCRegGPRCAsmOperand;
}
def PPCRegG8RCAsmOperand : AsmOperandClass {
let Name = "RegG8RC"; let PredicateMethod = "isRegNumber";
}
def g8rc : RegisterOperand<G8RC> {
let ParserMatchClass = PPCRegG8RCAsmOperand;
}
def PPCRegGPRCNoR0AsmOperand : AsmOperandClass {
let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber";
}
def gprc_nor0 : RegisterOperand<GPRC_NOR0> {
let ParserMatchClass = PPCRegGPRCNoR0AsmOperand;
}
def PPCRegG8RCNoX0AsmOperand : AsmOperandClass {
let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber";
}
def g8rc_nox0 : RegisterOperand<G8RC_NOX0> {
let ParserMatchClass = PPCRegG8RCNoX0AsmOperand;
}
def PPCRegF8RCAsmOperand : AsmOperandClass {
let Name = "RegF8RC"; let PredicateMethod = "isRegNumber";
}
def f8rc : RegisterOperand<F8RC> {
let ParserMatchClass = PPCRegF8RCAsmOperand;
}
def PPCRegF4RCAsmOperand : AsmOperandClass {
let Name = "RegF4RC"; let PredicateMethod = "isRegNumber";
}
def f4rc : RegisterOperand<F4RC> {
let ParserMatchClass = PPCRegF4RCAsmOperand;
}
def PPCRegVRRCAsmOperand : AsmOperandClass {
let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
}
def vrrc : RegisterOperand<VRRC> {
let ParserMatchClass = PPCRegVRRCAsmOperand;
}
def PPCRegCRBITRCAsmOperand : AsmOperandClass {
let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber";
}
def crbitrc : RegisterOperand<CRBITRC> {
let ParserMatchClass = PPCRegCRBITRCAsmOperand;
}
def PPCRegCRRCAsmOperand : AsmOperandClass {
let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber";
}
def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
let RenderMethod = "addImmOperands";
}
def s5imm : Operand<i32> {
let PrintMethod = "printS5ImmOperand";
let ParserMatchClass = PPCS5ImmAsmOperand;
}
def PPCU5ImmAsmOperand : AsmOperandClass {
let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
let RenderMethod = "addImmOperands";
}
def u5imm : Operand<i32> {
let PrintMethod = "printU5ImmOperand";
let ParserMatchClass = PPCU5ImmAsmOperand;
}
def PPCU6ImmAsmOperand : AsmOperandClass {
let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
let RenderMethod = "addImmOperands";
}
def u6imm : Operand<i32> {
let PrintMethod = "printU6ImmOperand";
let ParserMatchClass = PPCU6ImmAsmOperand;
}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
let RenderMethod = "addImmOperands";
}
def s16imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
}
def PPCU16ImmAsmOperand : AsmOperandClass {
let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
let RenderMethod = "addImmOperands";
}
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
}
def PPCS17ImmAsmOperand : AsmOperandClass {
let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
let RenderMethod = "addImmOperands";
}
def s17imm : Operand<i32> {
// This operand type is used for addis/lis to allow the assembler parser
// to accept immediates in the range -65536..65535 for compatibility with
// the GNU assembler. The operand is treated as 16-bit otherwise.
let PrintMethod = "printS16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS17ImmAsmOperand;
}
def PPCDirectBrAsmOperand : AsmOperandClass {
let Name = "DirectBr"; let PredicateMethod = "isDirectBr";
let RenderMethod = "addBranchTargetOperands";
}
def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
let ParserMatchClass = PPCDirectBrAsmOperand;
}
def absdirectbrtarget : Operand<OtherVT> {
let PrintMethod = "printAbsBranchOperand";
let EncoderMethod = "getAbsDirectBrEncoding";
let ParserMatchClass = PPCDirectBrAsmOperand;
}
def PPCCondBrAsmOperand : AsmOperandClass {
let Name = "CondBr"; let PredicateMethod = "isCondBr";
let RenderMethod = "addBranchTargetOperands";
}
def condbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getCondBrEncoding";
let ParserMatchClass = PPCCondBrAsmOperand;
}
def abscondbrtarget : Operand<OtherVT> {
let PrintMethod = "printAbsBranchOperand";
let EncoderMethod = "getAbsCondBrEncoding";
let ParserMatchClass = PPCCondBrAsmOperand;
}
def calltarget : Operand<iPTR> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
let ParserMatchClass = PPCDirectBrAsmOperand;
}
def abscalltarget : Operand<iPTR> {
let PrintMethod = "printAbsBranchOperand";
let EncoderMethod = "getAbsDirectBrEncoding";
let ParserMatchClass = PPCDirectBrAsmOperand;
}
def PPCCRBitMaskOperand : AsmOperandClass {
let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask";
}
def crbitm: Operand<i8> {
let PrintMethod = "printcrbitm";
let EncoderMethod = "get_crbitm_encoding";
let ParserMatchClass = PPCCRBitMaskOperand;
}
// Address operands
// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
def PPCRegGxRCNoR0Operand : AsmOperandClass {
let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber";
}
def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
let ParserMatchClass = PPCRegGxRCNoR0Operand;
}
// A version of ptr_rc usable with the asm parser.
def PPCRegGxRCOperand : AsmOperandClass {
let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
}
def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
let ParserMatchClass = PPCRegGxRCOperand;
}
def PPCDispRIOperand : AsmOperandClass {
let Name = "DispRI"; let PredicateMethod = "isS16Imm";
let RenderMethod = "addImmOperands";
}
def dispRI : Operand<iPTR> {
let ParserMatchClass = PPCDispRIOperand;
}
def PPCDispRIXOperand : AsmOperandClass {
let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
let RenderMethod = "addImmOperands";
}
def dispRIX : Operand<iPTR> {
let ParserMatchClass = PPCDispRIXOperand;
}
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIEncoding";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
}
def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
// A single-register address. This is used with the SjLj
// pseudo-instructions.
def memr : Operand<iPTR> {
let MIOperandInfo = (ops ptr_rc:$ptrreg);
}
// PowerPC Predicate operand.
def pred : Operand<OtherVT> {
let PrintMethod = "printPredicateOperand";
let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg);
}
// Define PowerPC specific addressing mode.
def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
// The address in a single register. This is used with the SjLj
// pseudo-instructions.
def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
+def IsNotBookE : Predicate<"!PPCSubTarget.isBookE()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : XForm_6<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
def o : XForm_6<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
let Defs = [CARRY] in
def NAME : XForm_6<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
def o : XForm_6<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XForm_10r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : XForm_10<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
def o : XForm_10<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
let Defs = [CARRY] in
def NAME : XForm_10<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
def o : XForm_10<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : XForm_11<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
def o : XForm_11<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
def o : XOForm_1<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
let Defs = [CARRY] in
def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
def o : XOForm_1<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
def o : XOForm_3<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
let Defs = [CARRY] in
def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
def o : XOForm_3<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : MForm_2<opcode, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
def o : MForm_2<opcode, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : MDForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
def o : MDForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : MDSForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
def o : MDSForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
let Defs = [CARRY] in
def NAME : XSForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
def o : XSForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : XForm_26<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
def o : XForm_26<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : XForm_28<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
def o : XForm_28<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : AForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
def o : AForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : AForm_2<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
def o : AForm_2<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : AForm_3<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
def o : AForm_3<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT, RecFormRel;
}
}
//===----------------------------------------------------------------------===//
// PowerPC Instruction Definitions.
// Pseudo-instructions:
let hasCtrlDep = 1 in {
let Defs = [R1], Uses = [R1] in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt",
[(callseq_start timm:$amt)]>;
def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
def UPDATE_VRSAVE : Pseudo<(outs gprc:$rD), (ins gprc:$rS),
"UPDATE_VRSAVE $rD, $rS", []>;
}
let Defs = [R1], Uses = [R1] in
def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
[(set i32:$result,
(PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
let usesCustomInserter = 1, // Expanded after instruction selection.
PPC970_Single = 1 in {
// Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
// because either operand might become the first operand in an isel, and
// that operand cannot be r0.
def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond,
gprc_nor0:$T, gprc_nor0:$F,
i32imm:$BROPC), "#SELECT_CC_I4",
[]>;
def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond,
g8rc_nox0:$T, g8rc_nox0:$F,
i32imm:$BROPC), "#SELECT_CC_I8",
[]>;
def SELECT_CC_F4 : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
i32imm:$BROPC), "#SELECT_CC_F4",
[]>;
def SELECT_CC_F8 : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
i32imm:$BROPC), "#SELECT_CC_F8",
[]>;
def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
i32imm:$BROPC), "#SELECT_CC_VRRC",
[]>;
}
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
let mayStore = 1 in
def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
"#SPILL_CR", []>;
// RESTORE_CR - Indicate that we're restoring the CR register (previously
// spilled), so we'll need to scavenge a register for it.
let mayLoad = 1 in
def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
"#RESTORE_CR", []>;
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let isReturn = 1, Uses = [LR, RM] in
def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
[(retflag)]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
let isCodeGenOnly = 1 in
def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
"b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>;
}
}
let Defs = [LR] in
def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isBarrier = 1 in {
def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
"b $dst", BrB,
[(br bb:$dst)]>;
def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst),
"ba $dst", BrB, []>;
}
// BCC represents an arbitrary conditional branch on a predicate.
// FIXME: should be able to write a pattern for PPCcondbranch, but can't use
// a two-value operand where a dag node expects two operands. :(
let isCodeGenOnly = 1 in {
def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc}${cond:pm} ${cond:reg}, $dst"
/*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst),
"b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
let isReturn = 1, Uses = [LR, RM] in
def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
"b${cond:cc}lr${cond:pm} ${cond:reg}", BrB, []>;
}
let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
"bdzlr", BrB, []>;
def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
"bdnzlr", BrB, []>;
def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins),
"bdzlr+", BrB, []>;
def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins),
"bdnzlr+", BrB, []>;
def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins),
"bdzlr-", BrB, []>;
def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins),
"bdnzlr-", BrB, []>;
}
let Defs = [CTR], Uses = [CTR] in {
def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
"bdz $dst">;
def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
"bdnz $dst">;
def BDZA : BForm_1<16, 18, 1, 0, (outs), (ins abscondbrtarget:$dst),
"bdza $dst">;
def BDNZA : BForm_1<16, 16, 1, 0, (outs), (ins abscondbrtarget:$dst),
"bdnza $dst">;
def BDZp : BForm_1<16, 27, 0, 0, (outs), (ins condbrtarget:$dst),
"bdz+ $dst">;
def BDNZp: BForm_1<16, 25, 0, 0, (outs), (ins condbrtarget:$dst),
"bdnz+ $dst">;
def BDZAp : BForm_1<16, 27, 1, 0, (outs), (ins abscondbrtarget:$dst),
"bdza+ $dst">;
def BDNZAp: BForm_1<16, 25, 1, 0, (outs), (ins abscondbrtarget:$dst),
"bdnza+ $dst">;
def BDZm : BForm_1<16, 26, 0, 0, (outs), (ins condbrtarget:$dst),
"bdz- $dst">;
def BDNZm: BForm_1<16, 24, 0, 0, (outs), (ins condbrtarget:$dst),
"bdnz- $dst">;
def BDZAm : BForm_1<16, 26, 1, 0, (outs), (ins abscondbrtarget:$dst),
"bdza- $dst">;
def BDNZAm: BForm_1<16, 24, 1, 0, (outs), (ins abscondbrtarget:$dst),
"bdnza- $dst">;
}
}
// The unconditional BCL used by the SjLj setjmp code.
let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
let Defs = [LR], Uses = [RM] in {
def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
"bcl 20, 31, $dst">;
}
}
let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
"bla $func", BrB, [(PPCcall (i32 imm:$func))]>;
let isCodeGenOnly = 1 in {
def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
"b${cond:cc}la${cond:pm} ${cond:reg}, $dst">;
}
}
let Uses = [CTR, RM] in {
def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
"bctrl", BrB, [(PPCbctrl)]>,
Requires<[In32BitMode]>;
let isCodeGenOnly = 1 in
def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
"b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>;
}
let Uses = [LR, RM] in {
def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins),
"blrl", BrB, []>;
let isCodeGenOnly = 1 in
def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond),
"b${cond:cc}lrl${cond:pm} ${cond:reg}", BrB, []>;
}
let Defs = [CTR], Uses = [CTR, RM] in {
def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst),
"bdzl $dst">;
def BDNZL : BForm_1<16, 16, 0, 1, (outs), (ins condbrtarget:$dst),
"bdnzl $dst">;
def BDZLA : BForm_1<16, 18, 1, 1, (outs), (ins abscondbrtarget:$dst),
"bdzla $dst">;
def BDNZLA : BForm_1<16, 16, 1, 1, (outs), (ins abscondbrtarget:$dst),
"bdnzla $dst">;
def BDZLp : BForm_1<16, 27, 0, 1, (outs), (ins condbrtarget:$dst),
"bdzl+ $dst">;
def BDNZLp: BForm_1<16, 25, 0, 1, (outs), (ins condbrtarget:$dst),
"bdnzl+ $dst">;
def BDZLAp : BForm_1<16, 27, 1, 1, (outs), (ins abscondbrtarget:$dst),
"bdzla+ $dst">;
def BDNZLAp: BForm_1<16, 25, 1, 1, (outs), (ins abscondbrtarget:$dst),
"bdnzla+ $dst">;
def BDZLm : BForm_1<16, 26, 0, 1, (outs), (ins condbrtarget:$dst),
"bdzl- $dst">;
def BDNZLm: BForm_1<16, 24, 0, 1, (outs), (ins condbrtarget:$dst),
"bdnzl- $dst">;
def BDZLAm : BForm_1<16, 26, 1, 1, (outs), (ins abscondbrtarget:$dst),
"bdzla- $dst">;
def BDNZLAm: BForm_1<16, 24, 1, 1, (outs), (ins abscondbrtarget:$dst),
"bdnzla- $dst">;
}
let Defs = [CTR], Uses = [CTR, LR, RM] in {
def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins),
"bdzlrl", BrB, []>;
def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins),
"bdnzlrl", BrB, []>;
def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins),
"bdzlrl+", BrB, []>;
def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins),
"bdnzlrl+", BrB, []>;
def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins),
"bdzlrl-", BrB, []>;
def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins),
"bdnzlrl-", BrB, []>;
}
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi :Pseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
"#TC_RETURNa $func $offset",
[(PPCtc_return (i32 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
"#TC_RETURNr $dst $offset",
[]>;
let isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
Requires<[In32BitMode]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
"b $dst", BrB,
[]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
"ba $dst", BrB,
[]>;
}
let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
let Defs = [CTR] in
def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP32",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[In32BitMode]>;
let isTerminator = 1 in
def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
"#EH_SJLJ_LONGJMP32",
[(PPCeh_sjlj_longjmp addr:$buf)]>,
Requires<[In32BitMode]>;
}
let isBranch = 1, isTerminator = 1 in {
def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
"#EH_SjLj_Setup\t$dst", []>;
}
// System call.
let PPC970_Unit = 7 in {
def SC : SCForm<17, 1, (outs), (ins i32imm:$lev),
"sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>;
}
// DCB* instructions.
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
"dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
PPC970_DGroup_Single;
def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst),
"dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
PPC970_DGroup_Single;
def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst),
"dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
PPC970_DGroup_Single;
def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst),
"dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
PPC970_DGroup_Single;
def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst),
"dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
PPC970_DGroup_Single;
def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst),
"dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
PPC970_DGroup_Single;
def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst),
"dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
PPC970_DGroup_Single;
def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
"dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
(DCBT xoaddr:$dst)>;
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I8 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
[(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I8 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
[(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I8 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
[(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I8 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
[(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I8 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
[(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I8 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
[(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_ADD_I16 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
[(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I16 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
[(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I16 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
[(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I16 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
[(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I16 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
[(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I16 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
[(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_ADD_I32 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
[(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I32 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
[(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I32 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
[(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I32 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
[(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I32 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
[(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I32 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
[(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_CMP_SWAP_I8 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
[(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I16 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
[(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I32 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
[(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_SWAP_I8 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
[(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
def ATOMIC_SWAP_I16 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
[(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
def ATOMIC_SWAP_I32 : Pseudo<
(outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
[(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
}
}
// Instructions to support atomic operations
def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
"lwarx $rD, $src", LdStLWARX,
[(set i32:$rD, (PPClarx xoaddr:$src))]>;
let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
"stwcx. $rS, $dst", LdStSTWCX,
[(PPCstcx i32:$rS, xoaddr:$dst)]>,
isDOT;
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm),
"twi $to, $rA, $imm", IntTrapW, []>;
def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB),
"tw $to, $rA, $rB", IntTrapW, []>;
def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm),
"tdi $to, $rA, $imm", IntTrapD, []>;
def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
"td $to, $rA, $rB", IntTrapD, []>;
//===----------------------------------------------------------------------===//
// PPC32 Load Instructions.
//
// Unindexed (r+i) Loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
"lbz $rD, $src", LdStLoad,
[(set i32:$rD, (zextloadi8 iaddr:$src))]>;
def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src),
"lha $rD, $src", LdStLHA,
[(set i32:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src),
"lhz $rD, $src", LdStLoad,
[(set i32:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src),
"lwz $rD, $src", LdStLoad,
[(set i32:$rD, (load iaddr:$src))]>;
def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src),
"lfs $rD, $src", LdStLFD,
[(set f32:$rD, (load iaddr:$src))]>;
def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
"lfd $rD, $src", LdStLFD,
[(set f64:$rD, (load iaddr:$src))]>;
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1, neverHasSideEffects = 1 in {
def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfsu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfdu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// Indexed (r+r) Loads with Update (preinc).
def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfsux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfdux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
}
// Indexed (r+r) Loads.
//
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
"lbzx $rD, $src", LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
[(set i32:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
"lhzx $rD, $src", LdStLoad,
[(set i32:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src),
"lwzx $rD, $src", LdStLoad,
[(set i32:$rD, (load xaddr:$src))]>;
def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
"lhbrx $rD, $src", LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
def LWBRX : XForm_1<31, 534, (outs gprc:$rD), (ins memrr:$src),
"lwbrx $rD, $src", LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src),
"lfsx $frD, $src", LdStLFD,
[(set f32:$frD, (load xaddr:$src))]>;
def LFDX : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src),
"lfdx $frD, $src", LdStLFD,
[(set f64:$frD, (load xaddr:$src))]>;
def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src),
"lfiwax $frD, $src", LdStLFD,
[(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src),
"lfiwzx $frD, $src", LdStLFD,
[(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
}
// Load Multiple
def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
"lmw $rD, $src", LdStLMW, []>;
//===----------------------------------------------------------------------===//
// PPC32 Store Instructions.
//
// Unindexed (r+i) Stores.
let PPC970_Unit = 2 in {
def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
"stb $rS, $src", LdStStore,
[(truncstorei8 i32:$rS, iaddr:$src)]>;
def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
"sth $rS, $src", LdStStore,
[(truncstorei16 i32:$rS, iaddr:$src)]>;
def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
"stw $rS, $src", LdStStore,
[(store i32:$rS, iaddr:$src)]>;
def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
"stfs $rS, $dst", LdStSTFD,
[(store f32:$rS, iaddr:$dst)]>;
def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
"stfd $rS, $dst", LdStSTFD,
[(store f64:$rS, iaddr:$dst)]>;
}
// Unindexed (r+i) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1 in {
def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
"stbu $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
"sthu $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
"stwu $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
"stfsu $rS, $dst", LdStSTFDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst),
"stfdu $rS, $dst", LdStSTFDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
}
// Patterns to match the pre-inc stores. We can't put the patterns on
// the instruction definitions directly as ISel wants the address base
// and offset to be separate operands, not a single complex operand.
def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STBU $rS, iaddroff:$ptroff, $ptrreg)>;
def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STHU $rS, iaddroff:$ptroff, $ptrreg)>;
def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STWU $rS, iaddroff:$ptroff, $ptrreg)>;
def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STFSU $rS, iaddroff:$ptroff, $ptrreg)>;
def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STFDU $rS, iaddroff:$ptroff, $ptrreg)>;
// Indexed (r+r) Stores.
let PPC970_Unit = 2 in {
def STBX : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
[(truncstorei8 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst),
"sthx $rS, $dst", LdStStore,
[(truncstorei16 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
[(store i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStStore,
[(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
PPC970_DGroup_Cracked;
def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst),
"stwbrx $rS, $dst", LdStStore,
[(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
PPC970_DGroup_Cracked;
def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst),
"stfiwx $frS, $dst", LdStSTFD,
[(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst),
"stfsx $frS, $dst", LdStSTFD,
[(store f32:$frS, xaddr:$dst)]>;
def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
"stfdx $frS, $dst", LdStSTFD,
[(store f64:$frS, xaddr:$dst)]>;
}
// Indexed (r+r) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1 in {
def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
"stbux $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
"sthux $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
"stwux $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst),
"stfsux $rS, $dst", LdStSTFDU, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst),
"stfdux $rS, $dst", LdStSTFDU, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
}
// Patterns to match the pre-inc stores. We can't put the patterns on
// the instruction definitions directly as ISel wants the address base
// and offset to be separate operands, not a single complex operand.
def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
(STBUX $rS, $ptrreg, $ptroff)>;
def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
(STHUX $rS, $ptrreg, $ptroff)>;
def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
(STWUX $rS, $ptrreg, $ptroff)>;
def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
(STFSUX $rS, $ptrreg, $ptroff)>;
def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
(STFDUX $rS, $ptrreg, $ptroff)>;
// Store Multiple
def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
"stmw $rS, $dst", LdStLMW, []>;
def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
- "sync $L", LdStSync, []>;
-def : Pat<(int_ppc_sync), (SYNC 0)>;
+ "sync $L", LdStSync, []>, Requires<[IsNotBookE]>;
+let isCodeGenOnly = 1 in {
+ def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
+ "msync", LdStSync, []>, Requires<[IsBookE]> {
+ let L = 0;
+ }
+}
+
+def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
+
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.
//
let PPC970_Unit = 1 in { // FXU Operations.
def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm),
"addi $rD, $rA, $imm", IntSimple,
[(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>;
let BaseName = "addic" in {
let Defs = [CARRY] in
def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"addic $rD, $rA, $imm", IntGeneral,
[(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
RecFormRel, PPC970_DGroup_Cracked;
let Defs = [CARRY, CR0] in
def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"addic. $rD, $rA, $imm", IntGeneral,
[]>, isDOT, RecFormRel;
}
def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm),
"addis $rD, $rA, $imm", IntSimple,
[(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
let isCodeGenOnly = 1 in
def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym),
"la $rD, $sym($rA)", IntGeneral,
[(set i32:$rD, (add i32:$rA,
(PPClo tglobaladdr:$sym, 0)))]>;
def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"mulli $rD, $rA, $imm", IntMulLI,
[(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>;
let Defs = [CARRY] in
def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
[(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm),
"li $rD, $imm", IntSimple,
[(set i32:$rD, imm32SExt16:$imm)]>;
def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm),
"lis $rD, $imm", IntSimple,
[(set i32:$rD, imm16ShiftedSExt:$imm)]>;
}
}
let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CR0] in {
def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IntGeneral,
[(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
isDOT;
def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IntGeneral,
[(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
}
def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IntSimple,
[(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"oris $dst, $src1, $src2", IntSimple,
[(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"xori $dst, $src1, $src2", IntSimple,
[(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IntSimple,
[(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
[]>;
let isCompare = 1, neverHasSideEffects = 1 in {
def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
"cmpwi $crD, $rA, $imm", IntCompare>;
def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2),
"cmplwi $dst, $src1, $src2", IntCompare>;
}
}
let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"nand", "$rA, $rS, $rB", IntSimple,
[(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"and", "$rA, $rS, $rB", IntSimple,
[(set i32:$rA, (and i32:$rS, i32:$rB))]>;
defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"andc", "$rA, $rS, $rB", IntSimple,
[(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"or", "$rA, $rS, $rB", IntSimple,
[(set i32:$rA, (or i32:$rS, i32:$rB))]>;
defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"nor", "$rA, $rS, $rB", IntSimple,
[(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"orc", "$rA, $rS, $rB", IntSimple,
[(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"eqv", "$rA, $rS, $rB", IntSimple,
[(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"xor", "$rA, $rS, $rB", IntSimple,
[(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"slw", "$rA, $rS, $rB", IntGeneral,
[(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"srw", "$rA, $rS, $rB", IntGeneral,
[(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"sraw", "$rA, $rS, $rB", IntShift,
[(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
}
let PPC970_Unit = 1 in { // FXU Operations.
let neverHasSideEffects = 1 in {
defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
"srawi", "$rA, $rS, $SH", IntShift,
[(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS),
"cntlzw", "$rA, $rS", IntGeneral,
[(set i32:$rA, (ctlz i32:$rS))]>;
defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
"extsb", "$rA, $rS", IntSimple,
[(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
"extsh", "$rA, $rS", IntSimple,
[(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
}
let isCompare = 1, neverHasSideEffects = 1 in {
def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
"cmpw $crD, $rA, $rB", IntCompare>;
def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
"cmplw $crD, $rA, $rB", IntCompare>;
}
}
let PPC970_Unit = 3 in { // FPU Operations.
//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
// "fcmpo $crD, $fA, $fB", FPCompare>;
let isCompare = 1, neverHasSideEffects = 1 in {
def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
"fcmpu $crD, $fA, $fB", FPCompare>;
def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
"fcmpu $crD, $fA, $fB", FPCompare>;
}
let Uses = [RM] in {
let neverHasSideEffects = 1 in {
defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiw", "$frD, $frB", FPGeneral,
[]>;
defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiwz", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfctiwz f64:$frB))]>;
defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
"frsp", "$frD, $frB", FPGeneral,
[(set f32:$frD, (fround f64:$frB))]>;
let Interpretation64Bit = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
"frin", "$frD, $frB", FPGeneral,
[(set f64:$frD, (frnd f64:$frB))]>;
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
"frin", "$frD, $frB", FPGeneral,
[(set f32:$frD, (frnd f32:$frB))]>;
}
let neverHasSideEffects = 1 in {
let Interpretation64Bit = 1 in
defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
"frip", "$frD, $frB", FPGeneral,
[(set f64:$frD, (fceil f64:$frB))]>;
defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
"frip", "$frD, $frB", FPGeneral,
[(set f32:$frD, (fceil f32:$frB))]>;
let Interpretation64Bit = 1 in
defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
"friz", "$frD, $frB", FPGeneral,
[(set f64:$frD, (ftrunc f64:$frB))]>;
defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
"friz", "$frD, $frB", FPGeneral,
[(set f32:$frD, (ftrunc f32:$frB))]>;
let Interpretation64Bit = 1 in
defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
"frim", "$frD, $frB", FPGeneral,
[(set f64:$frD, (ffloor f64:$frB))]>;
defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
"frim", "$frD, $frB", FPGeneral,
[(set f32:$frD, (ffloor f32:$frB))]>;
defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
"fsqrt", "$frD, $frB", FPSqrt,
[(set f64:$frD, (fsqrt f64:$frB))]>;
defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
"fsqrts", "$frD, $frB", FPSqrt,
[(set f32:$frD, (fsqrt f32:$frB))]>;
}
}
}
/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
/// often coalesced away and we don't want the dispatch group builder to think
/// that they will fill slots (which could cause the load of a LSU reject to
/// sneak into a d-group with a store).
let neverHasSideEffects = 1 in
defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB),
"fmr", "$frD, $frB", FPGeneral,
[]>, // (set f32:$frD, f32:$frB)
PPC970_Unit_Pseudo;
let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB),
"fabs", "$frD, $frB", FPGeneral,
[(set f32:$frD, (fabs f32:$frB))]>;
let Interpretation64Bit = 1 in
defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB),
"fabs", "$frD, $frB", FPGeneral,
[(set f64:$frD, (fabs f64:$frB))]>;
defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB),
"fnabs", "$frD, $frB", FPGeneral,
[(set f32:$frD, (fneg (fabs f32:$frB)))]>;
let Interpretation64Bit = 1 in
defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB),
"fnabs", "$frD, $frB", FPGeneral,
[(set f64:$frD, (fneg (fabs f64:$frB)))]>;
defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB),
"fneg", "$frD, $frB", FPGeneral,
[(set f32:$frD, (fneg f32:$frB))]>;
let Interpretation64Bit = 1 in
defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
"fneg", "$frD, $frB", FPGeneral,
[(set f64:$frD, (fneg f64:$frB))]>;
defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB),
"fcpsgn", "$frD, $frA, $frB", FPGeneral,
[(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>;
let Interpretation64Bit = 1 in
defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
"fcpsgn", "$frD, $frA, $frB", FPGeneral,
[(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
// Reciprocal estimates.
defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
"fre", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfre f64:$frB))]>;
defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB),
"fres", "$frD, $frB", FPGeneral,
[(set f32:$frD, (PPCfre f32:$frB))]>;
defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB),
"frsqrte", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
"frsqrtes", "$frD, $frB", FPGeneral,
[(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
}
// XL-Form instructions. condition register logical ops.
//
let neverHasSideEffects = 1 in
def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA),
"mcrf $BF, $BFA", BrMCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
"crand $CRD, $CRA, $CRB", BrCR, []>;
def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
"crnand $CRD, $CRA, $CRB", BrCR, []>;
def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
"cror $CRD, $CRA, $CRB", BrCR, []>;
def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
"crxor $CRD, $CRA, $CRB", BrCR, []>;
def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
"crnor $CRD, $CRA, $CRB", BrCR, []>;
def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
"creqv $CRD, $CRA, $CRB", BrCR, []>;
def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
"crandc $CRD, $CRA, $CRB", BrCR, []>;
def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
"crorc $CRD, $CRA, $CRB", BrCR, []>;
let isCodeGenOnly = 1 in {
def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
"creqv $dst, $dst, $dst", BrCR,
[]>;
def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
"crxor $dst, $dst, $dst", BrCR,
[]>;
let Defs = [CR1EQ], CRD = 6 in {
def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
"creqv 6, 6, 6", BrCR,
[(PPCcr6set)]>;
def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
"crxor 6, 6, 6", BrCR,
[(PPCcr6unset)]>;
}
}
// XFX-Form instructions. Instructions that deal with SPRs.
//
def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR),
"mfspr $RT, $SPR", SprMFSPR>;
def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
"mtspr $SPR, $RT", SprMTSPR>;
def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
"mftb $RT, $SPR", SprMFTB>, Deprecated<DeprecatedMFTB>;
let Uses = [CTR] in {
def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
"mfctr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
let Pattern = [(int_ppc_mtctr i32:$rS)] in
def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Defs = [LR] in {
def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
"mtlr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Uses = [LR] in {
def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
"mflr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let isCodeGenOnly = 1 in {
// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed
// like a GPR on the PPC970. As such, copies in and out have the same
// performance characteristics as an OR instruction.
def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
"mtspr 256, $rS", IntGeneral>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
(outs VRSAVERC:$reg), (ins gprc:$rS),
"mtspr 256, $rS", IntGeneral>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT),
(ins VRSAVERC:$reg),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
// so we'll need to scavenge a register for it.
let mayStore = 1 in
def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
"#SPILL_VRSAVE", []>;
// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
// spilled), so we'll need to scavenge a register for it.
let mayLoad = 1 in
def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
"#RESTORE_VRSAVE", []>;
let neverHasSideEffects = 1 in {
def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
"mtocrf $FXM, $ST", BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
"mfcr $rT", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
} // neverHasSideEffects = 1
// Pseudo instruction to perform FADD in round-to-zero mode.
let usesCustomInserter = 1, Uses = [RM] in {
def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
[(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
}
// The above pseudo gets expanded to make use of the following instructions
// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
let Uses = [RM], Defs = [RM] in {
def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
"mtfsb0 $FM", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
"mtfsb1 $FM", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
"mtfsf $FM, $rT", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
let Uses = [RM] in {
def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins),
"mffs $rT", IntMFFS,
[(set f64:$rT, (PPCmffs))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
//
defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"add", "$rT, $rA, $rB", IntSimple,
[(set i32:$rT, (add i32:$rA, i32:$rB))]>;
defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"addc", "$rT, $rA, $rB", IntGeneral,
[(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
PPC970_DGroup_Cracked;
defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"divw", "$rT, $rA, $rB", IntDivW,
[(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"divwu", "$rT, $rA, $rB", IntDivW,
[(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mulhw", "$rT, $rA, $rB", IntMulHW,
[(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mulhwu", "$rT, $rA, $rB", IntMulHWU,
[(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mullw", "$rT, $rA, $rB", IntMulHW,
[(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"subf", "$rT, $rA, $rB", IntGeneral,
[(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"subfc", "$rT, $rA, $rB", IntGeneral,
[(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
PPC970_DGroup_Cracked;
defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA),
"neg", "$rT, $rA", IntSimple,
[(set i32:$rT, (ineg i32:$rA))]>;
let Uses = [CARRY] in {
defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"adde", "$rT, $rA, $rB", IntGeneral,
[(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA),
"addme", "$rT, $rA", IntGeneral,
[(set i32:$rT, (adde i32:$rA, -1))]>;
defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA),
"addze", "$rT, $rA", IntGeneral,
[(set i32:$rT, (adde i32:$rA, 0))]>;
defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"subfe", "$rT, $rA, $rB", IntGeneral,
[(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA),
"subfme", "$rT, $rA", IntGeneral,
[(set i32:$rT, (sube -1, i32:$rA))]>;
defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
"subfze", "$rT, $rA", IntGeneral,
[(set i32:$rT, (sube 0, i32:$rA))]>;
}
}
// A-Form instructions. Most of the instructions executed in the FPU are of
// this type.
//
let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
let Uses = [RM] in {
defm FMADD : AForm_1r<63, 29,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
[(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
defm FMADDS : AForm_1r<59, 29,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
defm FMSUB : AForm_1r<63, 28,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
[(set f64:$FRT,
(fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
defm FMSUBS : AForm_1r<59, 28,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set f32:$FRT,
(fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
defm FNMADD : AForm_1r<63, 31,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
[(set f64:$FRT,
(fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
defm FNMADDS : AForm_1r<59, 31,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set f32:$FRT,
(fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
defm FNMSUB : AForm_1r<63, 30,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
[(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
(fneg f64:$FRB))))]>;
defm FNMSUBS : AForm_1r<59, 30,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
(fneg f32:$FRB))))]>;
}
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
// having 4 of these, force the comparison to always be an 8-byte double (code
// should use an FMRSD if the input comparison value really wants to be a float)
// and 4/8 byte forms for the result and operand type..
let Interpretation64Bit = 1 in
defm FSELD : AForm_1r<63, 23,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
defm FSELS : AForm_1r<63, 23,
(outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
let Uses = [RM] in {
defm FADD : AForm_2r<63, 21,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
"fadd", "$FRT, $FRA, $FRB", FPAddSub,
[(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
defm FADDS : AForm_2r<59, 21,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
"fadds", "$FRT, $FRA, $FRB", FPGeneral,
[(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
defm FDIV : AForm_2r<63, 18,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
"fdiv", "$FRT, $FRA, $FRB", FPDivD,
[(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
defm FDIVS : AForm_2r<59, 18,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
"fdivs", "$FRT, $FRA, $FRB", FPDivS,
[(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
defm FMUL : AForm_3r<63, 25,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC),
"fmul", "$FRT, $FRA, $FRC", FPFused,
[(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
defm FMULS : AForm_3r<59, 25,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC),
"fmuls", "$FRT, $FRA, $FRC", FPGeneral,
[(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
defm FSUB : AForm_2r<63, 20,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
"fsub", "$FRT, $FRA, $FRB", FPAddSub,
[(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
defm FSUBS : AForm_2r<59, 20,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
"fsubs", "$FRT, $FRA, $FRB", FPGeneral,
[(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
}
}
let neverHasSideEffects = 1 in {
let PPC970_Unit = 1 in { // FXU Operations.
let isSelect = 1 in
def ISEL : AForm_4<31, 15,
(outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
}
let PPC970_Unit = 1 in { // FXU Operations.
// M-Form instructions. rotate and mask instructions.
//
let isCommutable = 1 in {
// RLWIMI can be commuted if the rotate amount is zero.
defm RLWIMI : MForm_2r<20, (outs gprc:$rA),
(ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB,
u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate,
[]>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
NoEncode<"$rSi">;
}
let BaseName = "rlwinm" in {
def RLWINM : MForm_2<21,
(outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
"rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
[]>, RecFormRel;
let Defs = [CR0] in
def RLWINMo : MForm_2<21,
(outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
"rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
[]>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
}
defm RLWNM : MForm_2r<23, (outs gprc:$rA),
(ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
"rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral,
[]>;
}
} // neverHasSideEffects = 1
//===----------------------------------------------------------------------===//
// PowerPC Instruction Patterns
//
// Arbitrary immediate support. Implement in terms of LIS/ORI.
def : Pat<(i32 imm:$imm),
(ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
// Implement the 'not' operation with the NOR instruction.
def NOT : Pat<(not i32:$in),
(NOR $in, $in)>;
// ADD an arbitrary immediate.
def : Pat<(add i32:$in, imm:$imm),
(ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
// OR an arbitrary immediate.
def : Pat<(or i32:$in, imm:$imm),
(ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
// XOR an arbitrary immediate.
def : Pat<(xor i32:$in, imm:$imm),
(XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
// SUBFIC
def : Pat<(sub imm32SExt16:$imm, i32:$in),
(SUBFIC $in, imm:$imm)>;
// SHL/SRL
def : Pat<(shl i32:$in, (i32 imm:$imm)),
(RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>;
def : Pat<(srl i32:$in, (i32 imm:$imm)),
(RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>;
// ROTL
def : Pat<(rotl i32:$in, i32:$sh),
(RLWNM $in, $sh, 0, 31)>;
def : Pat<(rotl i32:$in, (i32 imm:$imm)),
(RLWINM $in, imm:$imm, 0, 31)>;
// RLWNM
def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
(RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
// Calls
def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
(BL tglobaladdr:$dst)>;
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
(TCRETURNdi texternalsym:$dst, imm:$imm)>;
def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
(TCRETURNri CTRRC:$dst, imm:$imm)>;
// Hi and Lo for Darwin Global Addresses.
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in),
(ADDIS $in, tglobaltlsaddr:$g)>;
def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in),
(ADDI $in, tglobaltlsaddr:$g)>;
def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)),
(ADDIS $in, tglobaladdr:$g)>;
def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)),
(ADDIS $in, tconstpool:$g)>;
def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
(ADDIS $in, tjumptable:$g)>;
def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS $in, tblockaddress:$g)>;
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
def : Pat<(sra i32:$rS, i32:$rB),
(SRAW $rS, $rB)>;
def : Pat<(srl i32:$rS, i32:$rB),
(SRW $rS, $rB)>;
def : Pat<(shl i32:$rS, i32:$rB),
(SLW $rS, $rB)>;
def : Pat<(zextloadi1 iaddr:$src),
(LBZ iaddr:$src)>;
def : Pat<(zextloadi1 xaddr:$src),
(LBZX xaddr:$src)>;
def : Pat<(extloadi1 iaddr:$src),
(LBZ iaddr:$src)>;
def : Pat<(extloadi1 xaddr:$src),
(LBZX xaddr:$src)>;
def : Pat<(extloadi8 iaddr:$src),
(LBZ iaddr:$src)>;
def : Pat<(extloadi8 xaddr:$src),
(LBZX xaddr:$src)>;
def : Pat<(extloadi16 iaddr:$src),
(LHZ iaddr:$src)>;
def : Pat<(extloadi16 xaddr:$src),
(LHZX xaddr:$src)>;
def : Pat<(f64 (extloadf32 iaddr:$src)),
(COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
def : Pat<(f64 (extloadf32 xaddr:$src)),
(COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
def : Pat<(f64 (fextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;
-def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
(FNMSUB $A, $C, $B)>;
def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
(FNMSUB $A, $C, $B)>;
def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
(FNMSUBS $A, $C, $B)>;
def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
(FNMSUBS $A, $C, $B)>;
// FCOPYSIGN's operand types need not agree.
def : Pat<(fcopysign f64:$frB, f32:$frA),
(FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>;
def : Pat<(fcopysign f32:$frB, f64:$frA),
(FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>;
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
//===----------------------------------------------------------------------===//
// PowerPC Instructions used for assembler/disassembler only
//
def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
"isync", SprISYNC, []>;
def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
"icbi $src", LdStICBI, []>;
def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
"eieio", LdStLoad, []>;
def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
"wait $L", LdStLoad, []>;
def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
"mtmsr $RS, $L", SprMTMSR>;
def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
"mfmsr $RT", SprMFMSR, []>;
def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
"mtmsrd $RS, $L", SprMTMSRD>;
def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
"slbie $RB", SprSLBIE, []>;
def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB),
"slbmte $RS, $RB", SprSLBMTE, []>;
def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
"slbmfee $RT, $RB", SprSLBMFEE, []>;
def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>;
def TLBSYNC : XForm_0<31, 566, (outs), (ins),
"tlbsync", SprTLBSYNC, []>;
def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
"tlbiel $RB", SprTLBIEL, []>;
def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
"tlbie $RB,$RS", SprTLBIE, []>;
//===----------------------------------------------------------------------===//
// PowerPC Assembler Instruction Aliases
//
// Pseudo-instructions for alternate assembly syntax (never used by codegen).
// These are aliases that require C++ handling to convert to the target
// instruction, while InstAliases can be handled directly by tblgen.
class PPCAsmPseudo<string asm, dag iops>
: Instruction {
let Namespace = "PPC";
bit PPC64 = 0; // Default value, override with isPPC64
let OutOperandList = (outs);
let InOperandList = iops;
let Pattern = [];
let AsmString = asm;
let isAsmParserOnly = 1;
let isPseudo = 1;
}
def : InstAlias<"sc", (SC 0)>;
-def : InstAlias<"sync", (SYNC 0)>;
-def : InstAlias<"msync", (SYNC 0)>;
-def : InstAlias<"lwsync", (SYNC 1)>;
-def : InstAlias<"ptesync", (SYNC 2)>;
+def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>;
+def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>;
+def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>;
+def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;
def : InstAlias<"wait", (WAIT 0)>;
def : InstAlias<"waitrsv", (WAIT 1)>;
def : InstAlias<"waitimpl", (WAIT 2)>;
def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
def : InstAlias<"xnop", (XORI R0, R0, 0)>;
def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
(ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
(ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
(ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
def SUBICo : PPCAsmPseudo<"subic. $rA, $rB, $imm",
(ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>;
def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>;
def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>;
def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>;
def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>;
def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>;
def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>;
def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>;
def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>;
def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>;
def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>;
def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>;
def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>;
def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>;
def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>;
def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>;
def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>;
def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>;
def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>;
def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>;
def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>;
def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>;
def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>;
def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>;
def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTRWI : PPCAsmPseudo<"extrwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTRWIo : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def INSLWI : PPCAsmPseudo<"inslwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def INSLWIo : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def INSRWI : PPCAsmPseudo<"insrwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def INSRWIo : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def ROTRWI : PPCAsmPseudo<"rotrwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def ROTRWIo : PPCAsmPseudo<"rotrwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def SLWIo : PPCAsmPseudo<"slwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def SRWIo : PPCAsmPseudo<"srwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def CLRRWI : PPCAsmPseudo<"clrrwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def CLRRWIo : PPCAsmPseudo<"clrrwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
def CLRLSLWIo : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def EXTRDI : PPCAsmPseudo<"extrdi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def EXTRDIo : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def INSRDI : PPCAsmPseudo<"insrdi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def INSRDIo : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def ROTRDI : PPCAsmPseudo<"rotrdi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def ROTRDIo : PPCAsmPseudo<"rotrdi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def SLDIo : PPCAsmPseudo<"sldi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def SRDIo : PPCAsmPseudo<"srdi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def CLRRDI : PPCAsmPseudo<"clrrdi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def CLRRDIo : PPCAsmPseudo<"clrrdi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def CLRLSLDI : PPCAsmPseudo<"clrlsldi $rA, $rS, $b, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
def CLRLSLDIo : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
// These generic branch instruction forms are used for the assembler parser only.
// Defs and Uses are conservative, since we don't know the BO value.
let PPC970_Unit = 7 in {
let Defs = [CTR], Uses = [CTR, RM] in {
def gBC : BForm_3<16, 0, 0, (outs),
(ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
"bc $bo, $bi, $dst">;
def gBCA : BForm_3<16, 1, 0, (outs),
(ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
"bca $bo, $bi, $dst">;
}
let Defs = [LR, CTR], Uses = [CTR, RM] in {
def gBCL : BForm_3<16, 0, 1, (outs),
(ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
"bcl $bo, $bi, $dst">;
def gBCLA : BForm_3<16, 1, 1, (outs),
(ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
"bcla $bo, $bi, $dst">;
}
let Defs = [CTR], Uses = [CTR, LR, RM] in
def gBCLR : XLForm_2<19, 16, 0, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
"bclr $bo, $bi, $bh", BrB, []>;
let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
def gBCLRL : XLForm_2<19, 16, 1, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
"bclrl $bo, $bi, $bh", BrB, []>;
let Defs = [CTR], Uses = [CTR, LR, RM] in
def gBCCTR : XLForm_2<19, 528, 0, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
"bcctr $bo, $bi, $bh", BrB, []>;
let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
def gBCCTRL : XLForm_2<19, 528, 1, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
"bcctrl $bo, $bi, $bh", BrB, []>;
}
def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>;
def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>;
def : InstAlias<"bcctr $bo, $bi", (gBCCTR u5imm:$bo, crbitrc:$bi, 0)>;
def : InstAlias<"bcctrl $bo, $bi", (gBCCTRL u5imm:$bo, crbitrc:$bi, 0)>;
multiclass BranchSimpleMnemonic1<string name, string pm, int bo> {
def : InstAlias<"b"#name#pm#" $bi, $dst", (gBC bo, crbitrc:$bi, condbrtarget:$dst)>;
def : InstAlias<"b"#name#"a"#pm#" $bi, $dst", (gBCA bo, crbitrc:$bi, abscondbrtarget:$dst)>;
def : InstAlias<"b"#name#"lr"#pm#" $bi", (gBCLR bo, crbitrc:$bi, 0)>;
def : InstAlias<"b"#name#"l"#pm#" $bi, $dst", (gBCL bo, crbitrc:$bi, condbrtarget:$dst)>;
def : InstAlias<"b"#name#"la"#pm#" $bi, $dst", (gBCLA bo, crbitrc:$bi, abscondbrtarget:$dst)>;
def : InstAlias<"b"#name#"lrl"#pm#" $bi", (gBCLRL bo, crbitrc:$bi, 0)>;
}
multiclass BranchSimpleMnemonic2<string name, string pm, int bo>
: BranchSimpleMnemonic1<name, pm, bo> {
def : InstAlias<"b"#name#"ctr"#pm#" $bi", (gBCCTR bo, crbitrc:$bi, 0)>;
def : InstAlias<"b"#name#"ctrl"#pm#" $bi", (gBCCTRL bo, crbitrc:$bi, 0)>;
}
defm : BranchSimpleMnemonic2<"t", "", 12>;
defm : BranchSimpleMnemonic2<"f", "", 4>;
defm : BranchSimpleMnemonic2<"t", "-", 14>;
defm : BranchSimpleMnemonic2<"f", "-", 6>;
defm : BranchSimpleMnemonic2<"t", "+", 15>;
defm : BranchSimpleMnemonic2<"f", "+", 7>;
defm : BranchSimpleMnemonic1<"dnzt", "", 8>;
defm : BranchSimpleMnemonic1<"dnzf", "", 0>;
defm : BranchSimpleMnemonic1<"dzt", "", 10>;
defm : BranchSimpleMnemonic1<"dzf", "", 2>;
multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> {
def : InstAlias<"b"#name#pm#" $cc, $dst",
(BCC bibo, crrc:$cc, condbrtarget:$dst)>;
def : InstAlias<"b"#name#pm#" $dst",
(BCC bibo, CR0, condbrtarget:$dst)>;
def : InstAlias<"b"#name#"a"#pm#" $cc, $dst",
(BCCA bibo, crrc:$cc, abscondbrtarget:$dst)>;
def : InstAlias<"b"#name#"a"#pm#" $dst",
(BCCA bibo, CR0, abscondbrtarget:$dst)>;
def : InstAlias<"b"#name#"lr"#pm#" $cc",
(BCLR bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"lr"#pm,
(BCLR bibo, CR0)>;
def : InstAlias<"b"#name#"ctr"#pm#" $cc",
(BCCTR bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"ctr"#pm,
(BCCTR bibo, CR0)>;
def : InstAlias<"b"#name#"l"#pm#" $cc, $dst",
(BCCL bibo, crrc:$cc, condbrtarget:$dst)>;
def : InstAlias<"b"#name#"l"#pm#" $dst",
(BCCL bibo, CR0, condbrtarget:$dst)>;
def : InstAlias<"b"#name#"la"#pm#" $cc, $dst",
(BCCLA bibo, crrc:$cc, abscondbrtarget:$dst)>;
def : InstAlias<"b"#name#"la"#pm#" $dst",
(BCCLA bibo, CR0, abscondbrtarget:$dst)>;
def : InstAlias<"b"#name#"lrl"#pm#" $cc",
(BCLRL bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"lrl"#pm,
(BCLRL bibo, CR0)>;
def : InstAlias<"b"#name#"ctrl"#pm#" $cc",
(BCCTRL bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"ctrl"#pm,
(BCCTRL bibo, CR0)>;
}
multiclass BranchExtendedMnemonic<string name, int bibo> {
defm : BranchExtendedMnemonicPM<name, "", bibo>;
defm : BranchExtendedMnemonicPM<name, "-", !add(bibo, 2)>;
defm : BranchExtendedMnemonicPM<name, "+", !add(bibo, 3)>;
}
defm : BranchExtendedMnemonic<"lt", 12>;
defm : BranchExtendedMnemonic<"gt", 44>;
defm : BranchExtendedMnemonic<"eq", 76>;
defm : BranchExtendedMnemonic<"un", 108>;
defm : BranchExtendedMnemonic<"so", 108>;
defm : BranchExtendedMnemonic<"ge", 4>;
defm : BranchExtendedMnemonic<"nl", 4>;
defm : BranchExtendedMnemonic<"le", 36>;
defm : BranchExtendedMnemonic<"ng", 36>;
defm : BranchExtendedMnemonic<"ne", 68>;
defm : BranchExtendedMnemonic<"nu", 100>;
defm : BranchExtendedMnemonic<"ns", 100>;
def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>;
def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>;
def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>;
def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>;
def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm:$imm)>;
def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>;
def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>;
def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>;
def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>;
def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>;
def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>;
def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>;
def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm:$imm)>;
def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm:$imm)>;
def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
multiclass TrapExtendedMnemonic<string name, int to> {
def : InstAlias<"td"#name#"i $rA, $imm", (TDI to, g8rc:$rA, s16imm:$imm)>;
def : InstAlias<"td"#name#" $rA, $rB", (TD to, g8rc:$rA, g8rc:$rB)>;
def : InstAlias<"tw"#name#"i $rA, $imm", (TWI to, gprc:$rA, s16imm:$imm)>;
def : InstAlias<"tw"#name#" $rA, $rB", (TW to, gprc:$rA, gprc:$rB)>;
}
defm : TrapExtendedMnemonic<"lt", 16>;
defm : TrapExtendedMnemonic<"le", 20>;
defm : TrapExtendedMnemonic<"eq", 4>;
defm : TrapExtendedMnemonic<"ge", 12>;
defm : TrapExtendedMnemonic<"gt", 8>;
defm : TrapExtendedMnemonic<"nl", 12>;
defm : TrapExtendedMnemonic<"ne", 24>;
defm : TrapExtendedMnemonic<"ng", 20>;
defm : TrapExtendedMnemonic<"llt", 2>;
defm : TrapExtendedMnemonic<"lle", 6>;
defm : TrapExtendedMnemonic<"lge", 5>;
defm : TrapExtendedMnemonic<"lgt", 1>;
defm : TrapExtendedMnemonic<"lnl", 5>;
defm : TrapExtendedMnemonic<"lng", 6>;
defm : TrapExtendedMnemonic<"u", 31>;
Index: head/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td (revision 265925)
@@ -1,236 +1,248 @@
//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
let Namespace = "PPC" in {
def sub_lt : SubRegIndex<1>;
def sub_gt : SubRegIndex<1, 1>;
def sub_eq : SubRegIndex<1, 2>;
def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>;
}
class PPCReg<string n> : Register<n> {
let Namespace = "PPC";
}
// We identify all our registers with a 5-bit ID, for consistency's sake.
// GPR - One of the 32 32-bit general-purpose registers
class GPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
// GP8 - One of the 32 64-bit general-purpose registers
class GP8<GPR SubReg, string n> : PPCReg<n> {
let HWEncoding = SubReg.HWEncoding;
let SubRegs = [SubReg];
let SubRegIndices = [sub_32];
}
// SPR - One of the 32-bit special-purpose registers
class SPR<bits<10> num, string n> : PPCReg<n> {
let HWEncoding{9-0} = num;
}
// FPR - One of the 32 64-bit floating-point registers
class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
// VR - One of the 32 128-bit vector registers
class VR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
// CR - One of the 8 4-bit condition registers
class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num;
let SubRegs = subregs;
}
// CRBIT - One of the 32 1-bit condition register fields
class CRBIT<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
// General-purpose registers
foreach Index = 0-31 in {
def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
}
// 64-bit General-purpose registers
foreach Index = 0-31 in {
def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>,
DwarfRegNum<[Index, -2]>;
}
// Floating-point registers
foreach Index = 0-31 in {
def F#Index : FPR<Index, "f"#Index>,
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
}
// Vector registers
foreach Index = 0-31 in {
def V#Index : VR<Index, "v"#Index>,
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
// The reprsentation of r0 when treated as the constant 0.
def ZERO : GPR<0, "0">;
def ZERO8 : GP8<ZERO, "0">;
// Representations of the frame pointer used by ISD::FRAMEADDR.
def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
def FP8 : GP8<FP, "**FRAME POINTER**">;
// Representations of the base pointer used by setjmp.
def BP : GPR<0 /* arbitrary */, "**BASE POINTER**">;
def BP8 : GP8<BP, "**BASE POINTER**">;
// Condition register bits
def CR0LT : CRBIT< 0, "0">;
def CR0GT : CRBIT< 1, "1">;
def CR0EQ : CRBIT< 2, "2">;
def CR0UN : CRBIT< 3, "3">;
def CR1LT : CRBIT< 4, "4">;
def CR1GT : CRBIT< 5, "5">;
def CR1EQ : CRBIT< 6, "6">;
def CR1UN : CRBIT< 7, "7">;
def CR2LT : CRBIT< 8, "8">;
def CR2GT : CRBIT< 9, "9">;
def CR2EQ : CRBIT<10, "10">;
def CR2UN : CRBIT<11, "11">;
def CR3LT : CRBIT<12, "12">;
def CR3GT : CRBIT<13, "13">;
def CR3EQ : CRBIT<14, "14">;
def CR3UN : CRBIT<15, "15">;
def CR4LT : CRBIT<16, "16">;
def CR4GT : CRBIT<17, "17">;
def CR4EQ : CRBIT<18, "18">;
def CR4UN : CRBIT<19, "19">;
def CR5LT : CRBIT<20, "20">;
def CR5GT : CRBIT<21, "21">;
def CR5EQ : CRBIT<22, "22">;
def CR5UN : CRBIT<23, "23">;
def CR6LT : CRBIT<24, "24">;
def CR6GT : CRBIT<25, "25">;
def CR6EQ : CRBIT<26, "26">;
def CR6UN : CRBIT<27, "27">;
def CR7LT : CRBIT<28, "28">;
def CR7GT : CRBIT<29, "29">;
def CR7EQ : CRBIT<30, "30">;
def CR7UN : CRBIT<31, "31">;
// Condition registers
let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68, 68]>;
def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69, 69]>;
def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70, 70]>;
def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71, 71]>;
def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72, 72]>;
def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73, 73]>;
def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
}
+// The full condition-code register. This is not modeled fully, but defined
+// here primarily, for compatibility with gcc, to allow the inline asm "cc"
+// clobber specification to work.
+def CC : PPCReg<"cc">, DwarfRegAlias<CR0> {
+ let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7];
+}
+
// Link register
def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
//let Aliases = [LR] in
def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]>;
// Count register
def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>;
def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>;
// VRsave register
def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>;
// Carry bit. In the architecture this is really bit 0 of the XER register
// (which really is SPR register 1); this is the only bit interesting to a
// compiler.
def CARRY: SPR<1, "ca">;
// FP rounding mode: bits 30 and 31 of the FP status and control register
// This is not allocated as a normal register; it appears only in
// Uses and Defs. The ABI says it needs to be preserved by a function,
// but this is not achieved by saving and restoring it as with
// most registers, it has to be done in code; to make this work all the
// return and call instructions are described as Uses of RM, so instructions
// that do nothing but change RM will not get deleted.
// Also, in the architecture it is not really a SPR; 512 is arbitrary.
def RM: SPR<512, "**ROUNDING MODE**">;
/// Register classes
// Allocate volatiles first
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
(sequence "R%u", 30, 13),
R31, R0, R1, FP, BP)>;
def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
(sequence "X%u", 30, 14),
X31, X13, X0, X1, FP8, BP8)>;
// For some instructions r0 is special (representing the value 0 instead of
// the value in the r0 register), and we use these register subclasses to
// prevent r0 from being allocated for use by those instructions.
def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
// ABI the size of the Floating-point register save area is determined by the
// allocated non-volatile register with the lowest register number, as FP
// register N is spilled to offset 8 * (32 - N) below the back chain word of the
// previous stack frame. By allocating non-volatiles in reverse order we make
// sure that the Floating-point register save area is always as small as
// possible because there aren't any unused spill slots.
def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
(sequence "F%u", 31, 14))>;
def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
(add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
def CRBITRC : RegisterClass<"PPC", [i32], 32,
(add CR0LT, CR0GT, CR0EQ, CR0UN,
CR1LT, CR1GT, CR1EQ, CR1UN,
CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
CR4LT, CR4GT, CR4EQ, CR4UN,
CR5LT, CR5GT, CR5EQ, CR5UN,
CR6LT, CR6GT, CR6EQ, CR6UN,
CR7LT, CR7GT, CR7EQ, CR7UN)>
{
let CopyCost = -1;
}
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
CR7, CR2, CR3, CR4)>;
// The CTR registers are not allocatable because they're used by the
// decrement-and-branch instructions, and thus need to stay live across
// multiple basic blocks.
def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)> {
let isAllocatable = 0;
}
def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> {
let isAllocatable = 0;
}
def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
let CopyCost = -1;
}
+
+def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> {
+ let isAllocatable = 0;
+}
+
Index: head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h (revision 265925)
@@ -1,226 +1,210 @@
//===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the PowerPC specific subclass of TargetSubtargetInfo.
//
//===----------------------------------------------------------------------===//
#ifndef POWERPCSUBTARGET_H
#define POWERPCSUBTARGET_H
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "PPCGenSubtargetInfo.inc"
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
namespace llvm {
class StringRef;
namespace PPC {
// -m directive values.
enum {
DIR_NONE,
DIR_32,
DIR_440,
DIR_601,
DIR_602,
DIR_603,
DIR_7400,
DIR_750,
DIR_970,
DIR_A2,
DIR_E500mc,
DIR_E5500,
DIR_PWR3,
DIR_PWR4,
DIR_PWR5,
DIR_PWR5X,
DIR_PWR6,
DIR_PWR6X,
DIR_PWR7,
DIR_64
};
}
class GlobalValue;
class TargetMachine;
class PPCSubtarget : public PPCGenSubtargetInfo {
protected:
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned StackAlignment;
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
/// Which cpu directive was used.
unsigned DarwinDirective;
/// Used by the ISel to turn in optimizations for POWER4-derived architectures
bool HasMFOCRF;
bool Has64BitSupport;
bool Use64BitRegs;
bool IsPPC64;
bool HasAltivec;
bool HasQPX;
bool HasVSX;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
bool HasRecipPrec;
bool HasSTFIWX;
bool HasLFIWAX;
bool HasFPRND;
bool HasFPCVT;
bool HasISEL;
bool HasPOPCNTD;
bool HasLDBRX;
bool IsBookE;
bool DeprecatedMFTB;
bool DeprecatedDST;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
bool IsLittleEndian;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
PPCSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
/// SetJITMode - This is called to inform the subtarget info that we are
/// producing code for the JIT.
void SetJITMode();
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
unsigned getStackAlignment() const { return StackAlignment; }
/// getDarwinDirective - Returns the -m directive specified for the cpu.
///
unsigned getDarwinDirective() const { return DarwinDirective; }
/// getInstrItins - Return the instruction itineraies based on subtarget
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
- /// getDataLayoutString - Return the pointer size and type alignment
- /// properties of this subtarget.
- const char *getDataLayoutString() const {
- // Note, the alignment values for f64 and i64 on ppc64 in Darwin
- // documentation are wrong; these are correct (i.e. "what gcc does").
- if (isPPC64() && isSVR4ABI()) {
- if (TargetTriple.getOS() == llvm::Triple::FreeBSD)
- return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64";
- else
- return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64";
- }
-
- return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
- : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
- }
-
/// \brief Reset the features for the PowerPC target.
virtual void resetSubtargetFeatures(const MachineFunction *MF);
private:
void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
public:
/// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
///
bool isPPC64() const { return IsPPC64; }
/// has64BitSupport - Return true if the selected CPU supports 64-bit
/// instructions, regardless of whether we are in 32-bit or 64-bit mode.
bool has64BitSupport() const { return Has64BitSupport; }
/// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
/// registers in 32-bit mode when possible. This can only true if
/// has64BitSupport() returns true.
bool use64BitRegs() const { return Use64BitRegs; }
/// hasLazyResolverStub - Return true if accesses to the specified global have
/// to go through a dyld lazy resolution stub. This means that an extra load
/// is required to get the address of the global.
bool hasLazyResolverStub(const GlobalValue *GV,
const TargetMachine &TM) const;
// isJITCodeModel - True if we're generating code for the JIT
bool isJITCodeModel() const { return IsJITCodeModel; }
// isLittleEndian - True if generating little-endian code
bool isLittleEndian() const { return IsLittleEndian; }
// Specific obvious features.
bool hasFCPSGN() const { return HasFCPSGN; }
bool hasFSQRT() const { return HasFSQRT; }
bool hasFRE() const { return HasFRE; }
bool hasFRES() const { return HasFRES; }
bool hasFRSQRTE() const { return HasFRSQRTE; }
bool hasFRSQRTES() const { return HasFRSQRTES; }
bool hasRecipPrec() const { return HasRecipPrec; }
bool hasSTFIWX() const { return HasSTFIWX; }
bool hasLFIWAX() const { return HasLFIWAX; }
bool hasFPRND() const { return HasFPRND; }
bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
bool hasQPX() const { return HasQPX; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
bool isDeprecatedDST() const { return DeprecatedDST; }
const Triple &getTargetTriple() const { return TargetTriple; }
/// isDarwin - True if this is any darwin platform.
bool isDarwin() const { return TargetTriple.isMacOSX(); }
/// isBGP - True if this is a BG/P platform.
bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
/// isBGQ - True if this is a BG/Q platform.
bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const;
// Scheduling customization.
bool enableMachineScheduler() const;
void overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,
unsigned NumRegionInstrs) const;
bool useAA() const;
};
} // End llvm namespace
#endif
Index: head/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp (revision 265925)
@@ -1,169 +1,206 @@
//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Top-level implementation for the PowerPC target.
//
//===----------------------------------------------------------------------===//
#include "PPCTargetMachine.h"
#include "PPC.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
static cl::
opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
cl::desc("Disable CTR loops for PPC"));
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const PPCSubtarget &ST) {
+ const Triple &T = ST.getTargetTriple();
+
+ // PPC is big endian
+ std::string Ret = "E";
+
+ // PPC64 has 64 bit pointers, PPC32 has 32 bit pointers.
+ if (ST.isPPC64())
+ Ret += "-p:64:64";
+ else
+ Ret += "-p:32:32";
+
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ if (ST.isPPC64() || ST.isSVR4ABI())
+ Ret += "-f64:64:64-i64:64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // Set support for 128 floats depending on the ABI.
+ if (!ST.isPPC64() && ST.isSVR4ABI())
+ Ret += "-f128:64:128";
+
+ // Some ABIs support 128 bit vectors.
+ if (ST.isPPC64() && ST.isSVR4ABI())
+ Ret += "-v128:128:128";
+
+ // PPC64 has 32 and 64 bit register, PPC32 has only 32 bit ones.
+ if (ST.isPPC64())
+ Ret += "-n32:64";
+ else
+ Ret += "-n32";
+
+ return Ret;
+}
+
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64Bit),
- DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
+ DL(getDataLayoutString(Subtarget)), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
// The binutils for the BG/P are too old for CFI.
if (Subtarget.isBGP())
setMCUseCFI(false);
initAsmInfo();
}
void PPC32TargetMachine::anchor() { }
PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
void PPC64TargetMachine::anchor() { }
PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
namespace {
/// PPC Code Generator Pass Configuration Options.
class PPCPassConfig : public TargetPassConfig {
public:
PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
PPCTargetMachine &getPPCTargetMachine() const {
return getTM<PPCTargetMachine>();
}
const PPCSubtarget &getPPCSubtarget() const {
return *getPPCTargetMachine().getSubtargetImpl();
}
virtual bool addPreISel();
virtual bool addILPOpts();
virtual bool addInstSelector();
virtual bool addPreSched2();
virtual bool addPreEmitPass();
};
} // namespace
TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
return new PPCPassConfig(this, PM);
}
bool PPCPassConfig::addPreISel() {
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createPPCCTRLoops(getPPCTargetMachine()));
return false;
}
bool PPCPassConfig::addILPOpts() {
if (getPPCSubtarget().hasISEL()) {
addPass(&EarlyIfConverterID);
return true;
}
return false;
}
bool PPCPassConfig::addInstSelector() {
// Install an instruction selector.
addPass(createPPCISelDag(getPPCTargetMachine()));
#ifndef NDEBUG
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createPPCCTRLoopsVerify());
#endif
return false;
}
bool PPCPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
return true;
}
bool PPCPassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createPPCEarlyReturnPass());
// Must run branch selection immediately preceding the asm printer.
addPass(createPPCBranchSelectionPass());
return false;
}
bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE) {
// Inform the subtarget that we are in JIT mode. FIXME: does this break macho
// writing?
Subtarget.SetJITMode();
// Machine code emitter pass for PowerPC.
PM.add(createPPCJITCodeEmitterPass(*this, JCE));
return false;
}
void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
// Add first the target-independent BasicTTI pass, then our PPC pass. This
// allows the PPC pass to delegate to the target independent layer when
// appropriate.
PM.add(createBasicTargetTransformInfoPass(this));
PM.add(createPPCTargetTransformInfoPass(this));
}
Index: head/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
===================================================================
--- head/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp (revision 265925)
@@ -1,835 +1,837 @@
//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief This is the parent TargetLowering class for hardware code gen
/// targets.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUFrameLowering.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDILIntrinsicInfo.h"
#include "R600MachineFunctionInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DataLayout.h"
using namespace llvm;
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}
#include "AMDGPUGenCallingConv.inc"
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Initialize target lowering borrowed from AMDIL
InitAMDILLowering();
// We need to custom lower some of the intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// Library functions. These default to Expand, but we have instructions
// for them.
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FEXP2, MVT::f32, Legal);
setOperationAction(ISD::FPOW, MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Legal);
setOperationAction(ISD::FABS, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
// The hardware supports ROTR, but not ROTL
setOperationAction(ISD::ROTL, MVT::i32, Expand);
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::STORE, MVT::f32, Promote);
AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
setOperationAction(ISD::STORE, MVT::v2f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
setOperationAction(ISD::STORE, MVT::v8f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
setOperationAction(ISD::STORE, MVT::v16f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
setOperationAction(ISD::STORE, MVT::f64, Promote);
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
// Custom lowering of vector stores is required for local address space
// stores.
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
// XXX: Native v2i32 local address space stores are possible, but not
// currently implemented.
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
// XXX: This can be change to Custom, once ExpandVectorStores can
// handle 64-bit stores.
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
setOperationAction(ISD::LOAD, MVT::f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
setOperationAction(ISD::LOAD, MVT::f64, Promote);
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
static const MVT::SimpleValueType IntTypes[] = {
MVT::v2i32, MVT::v4i32
};
const size_t NumIntTypes = array_lengthof(IntTypes);
for (unsigned int x = 0; x < NumIntTypes; ++x) {
MVT::SimpleValueType VT = IntTypes[x];
//Expand the following operations for the current type by default
setOperationAction(ISD::ADD, VT, Expand);
setOperationAction(ISD::AND, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
setOperationAction(ISD::MUL, VT, Expand);
setOperationAction(ISD::OR, VT, Expand);
setOperationAction(ISD::SHL, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::SRL, VT, Expand);
setOperationAction(ISD::SRA, VT, Expand);
setOperationAction(ISD::SUB, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::XOR, VT, Expand);
}
static const MVT::SimpleValueType FloatTypes[] = {
MVT::v2f32, MVT::v4f32
};
const size_t NumFloatTypes = array_lengthof(FloatTypes);
for (unsigned int x = 0; x < NumFloatTypes; ++x) {
MVT::SimpleValueType VT = FloatTypes[x];
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FMUL, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FSUB, VT, Expand);
}
}
//===----------------------------------------------------------------------===//
// Target Information
//===----------------------------------------------------------------------===//
MVT AMDGPUTargetLowering::getVectorIdxTy() const {
return MVT::i32;
}
bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
EVT CastTy) const {
if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
return true;
unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
return ((LScalarSize <= CastScalarSize) ||
(CastScalarSize >= 32) ||
(LScalarSize < 32));
}
//===---------------------------------------------------------------------===//
// Target Properties
//===---------------------------------------------------------------------===//
bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
assert(VT.isFloatingPoint());
return VT == MVT::f32;
}
bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
assert(VT.isFloatingPoint());
return VT == MVT::f32;
}
//===---------------------------------------------------------------------===//
// TargetLowering Callbacks
//===---------------------------------------------------------------------===//
void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const {
State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
}
SDValue AMDGPUTargetLowering::LowerReturn(
SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
SDLoc DL, SelectionDAG &DAG) const {
return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
}
//===---------------------------------------------------------------------===//
// Target specific lowering
//===---------------------------------------------------------------------===//
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
const {
switch (Op.getOpcode()) {
default:
Op.getNode()->dump();
assert(0 && "Custom lowering code for this"
"instruction is not implemented yet!");
break;
// AMDIL DAG lowering
case ISD::SDIV: return LowerSDIV(Op, DAG);
case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
// AMDGPU DAG lowering
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
}
return Op;
}
SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue Op,
SelectionDAG &DAG) const {
const DataLayout *TD = getTargetMachine().getDataLayout();
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
// XXX: What does the value of G->getOffset() mean?
assert(G->getOffset() == 0 &&
"Do not know what to do with an non-zero offset");
const GlobalValue *GV = G->getGlobal();
unsigned Offset;
if (MFI->LocalMemoryObjects.count(GV) == 0) {
uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
Offset = MFI->LDSSize;
MFI->LocalMemoryObjects[GV] = Offset;
// XXX: Account for alignment?
MFI->LDSSize += Size;
} else {
Offset = MFI->LocalMemoryObjects[GV];
}
return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
}
void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Args,
unsigned Start,
unsigned Count) const {
EVT VT = Op.getValueType();
for (unsigned i = Start, e = Start + Count; i != e; ++i) {
Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
VT.getVectorElementType(),
Op, DAG.getConstant(i, MVT::i32)));
}
}
SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
SmallVector<SDValue, 8> Args;
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
ExtractVectorElements(A, DAG, Args, 0,
A.getValueType().getVectorNumElements());
ExtractVectorElements(B, DAG, Args, 0,
B.getValueType().getVectorNumElements());
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
&Args[0], Args.size());
}
SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
SmallVector<SDValue, 8> Args;
EVT VT = Op.getValueType();
unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
VT.getVectorNumElements());
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
&Args[0], Args.size());
}
SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const AMDGPUFrameLowering *TFL =
static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
assert(FIN);
unsigned FrameIndex = FIN->getIndex();
unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
Op.getValueType());
}
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc DL(Op);
EVT VT = Op.getValueType();
switch (IntrinsicID) {
default: return Op;
case AMDGPUIntrinsic::AMDIL_abs:
return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDIL_exp:
return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
case AMDGPUIntrinsic::AMDIL_fraction:
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDIL_max:
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imax:
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umax:
return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDIL_min:
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imin:
return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umin:
return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDIL_round_nearest:
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
}
}
///IABS(a) = SMAX(sub(0, a), a)
SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
Op.getOperand(1));
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
}
/// Linear Interpolation
/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
DAG.getConstantFP(1.0f, MVT::f32),
Op.getOperand(1));
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
Op.getOperand(3));
return DAG.getNode(ISD::FADD, DL, VT,
DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
OneSubAC);
}
/// \brief Generate Min/Max node
SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue True = Op.getOperand(2);
SDValue False = Op.getOperand(3);
SDValue CC = Op.getOperand(4);
if (VT != MVT::f32 ||
!((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
return SDValue();
}
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
switch (CCOpcode) {
case ISD::SETOEQ:
case ISD::SETONE:
case ISD::SETUNE:
case ISD::SETNE:
case ISD::SETUEQ:
case ISD::SETEQ:
case ISD::SETFALSE:
case ISD::SETFALSE2:
case ISD::SETTRUE:
case ISD::SETTRUE2:
case ISD::SETUO:
case ISD::SETO:
assert(0 && "Operation should already be optimised !");
case ISD::SETULE:
case ISD::SETULT:
case ISD::SETOLE:
case ISD::SETOLT:
case ISD::SETLE:
case ISD::SETLT: {
if (LHS == True)
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
else
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
}
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETUGE:
case ISD::SETOGE:
case ISD::SETUGT:
case ISD::SETOGT: {
if (LHS == True)
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
else
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
}
case ISD::SETCC_INVALID:
assert(0 && "Invalid setcc condcode !");
}
return Op;
}
SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
EVT EltVT = Op.getValueType().getVectorElementType();
EVT PtrVT = Load->getBasePtr().getValueType();
unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
SmallVector<SDValue, 8> Loads;
SDLoc SL(Op);
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
Load->getChain(), Ptr,
MachinePointerInfo(Load->getMemOperand()->getValue()),
MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
Load->getAlignment()));
}
return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
Loads.size());
}
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
EVT MemVT = Store->getMemoryVT();
unsigned MemBits = MemVT.getSizeInBits();
// Byte stores are really expensive, so if possible, try to pack
// 32-bit vector truncatating store into an i32 store.
// XXX: We could also handle optimize other vector bitwidths
if (!MemVT.isVector() || MemBits > 32) {
return SDValue();
}
SDLoc DL(Op);
const SDValue &Value = Store->getValue();
EVT VT = Value.getValueType();
const SDValue &Ptr = Store->getBasePtr();
EVT MemEltVT = MemVT.getVectorElementType();
unsigned MemEltBits = MemEltVT.getSizeInBits();
unsigned MemNumElements = MemVT.getVectorNumElements();
EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
SDValue Mask;
switch(MemEltBits) {
case 8:
Mask = DAG.getConstant(0xFF, PackedVT);
break;
case 16:
Mask = DAG.getConstant(0xFFFF, PackedVT);
break;
default:
llvm_unreachable("Cannot lower this vector store");
}
SDValue PackedValue;
for (unsigned i = 0; i < MemNumElements; ++i) {
EVT ElemVT = VT.getVectorElementType();
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
DAG.getConstant(i, MVT::i32));
Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
if (i == 0) {
PackedValue = Elt;
} else {
PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
}
}
return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
MachinePointerInfo(Store->getMemOperand()->getValue()),
Store->isVolatile(), Store->isNonTemporal(),
Store->getAlignment());
}
SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
EVT EltVT = Store->getValue().getValueType().getVectorElementType();
EVT PtrVT = Store->getBasePtr().getValueType();
unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
SDLoc SL(Op);
SmallVector<SDValue, 8> Chains;
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
Store->getValue(), DAG.getConstant(i, MVT::i32));
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
Store->getBasePtr(),
DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
PtrVT));
Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
MachinePointerInfo(Store->getMemOperand()->getValue()),
MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
Store->getAlignment()));
}
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
}
SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
if (Result.getNode()) {
return Result;
}
StoreSDNode *Store = cast<StoreSDNode>(Op);
if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Store->getValue().getValueType().isVector()) {
return SplitVectorStore(Op, DAG);
}
return SDValue();
}
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Num = Op.getOperand(0);
SDValue Den = Op.getOperand(1);
SmallVector<SDValue, 8> Results;
// RCP = URECIP(Den) = 2^32 / Den + e
// e is rounding error.
SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
// RCP_LO = umulo(RCP, Den) */
SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
// RCP_HI = mulhu (RCP, Den) */
SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
// NEG_RCP_LO = -RCP_LO
SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
RCP_LO);
// ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
NEG_RCP_LO, RCP_LO,
ISD::SETEQ);
// Calculate the rounding error from the URECIP instruction
// E = mulhu(ABS_RCP_LO, RCP)
SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
// RCP_A_E = RCP + E
SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
// RCP_S_E = RCP - E
SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
// Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
RCP_A_E, RCP_S_E,
ISD::SETEQ);
// Quotient = mulhu(Tmp0, Num)
SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
// Num_S_Remainder = Quotient * Den
SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
// Remainder = Num - Num_S_Remainder
SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
ISD::SETUGE);
// Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
Num_S_Remainder,
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
ISD::SETUGE);
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
Remainder_GE_Zero);
// Calculate Division result:
// Quotient_A_One = Quotient + 1
SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
DAG.getConstant(1, VT));
// Quotient_S_One = Quotient - 1
SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
DAG.getConstant(1, VT));
// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
Quotient, Quotient_A_One, ISD::SETEQ);
// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
Quotient_S_One, Div, ISD::SETEQ);
// Calculate Rem result:
// Remainder_S_Den = Remainder - Den
SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
// Remainder_A_Den = Remainder + Den
SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
Remainder, Remainder_S_Den, ISD::SETEQ);
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
Remainder_A_Den, Rem, ISD::SETEQ);
SDValue Ops[2];
Ops[0] = Div;
Ops[1] = Rem;
return DAG.getMergeValues(Ops, 2, DL);
}
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue S0 = Op.getOperand(0);
SDLoc DL(Op);
if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
return SDValue();
// f32 uint_to_fp i64
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
DAG.getConstant(0, MVT::i32));
SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
DAG.getConstant(1, MVT::i32));
SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
}
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
void AMDGPUTargetLowering::getOriginalFunctionArgs(
SelectionDAG &DAG,
const Function *F,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<ISD::InputArg> &OrigIns) const {
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
if (Ins[i].ArgVT == Ins[i].VT) {
OrigIns.push_back(Ins[i]);
continue;
}
EVT VT;
if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
// Vector has been split into scalars.
VT = Ins[i].ArgVT.getVectorElementType();
} else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
Ins[i].ArgVT.getVectorElementType() !=
Ins[i].VT.getVectorElementType()) {
// Vector elements have been promoted
VT = Ins[i].ArgVT;
} else {
// Vector has been spilt into smaller vectors.
VT = Ins[i].VT;
}
ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
Ins[i].OrigArgIndex, Ins[i].PartOffset);
OrigIns.push_back(Arg);
}
}
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->isExactlyValue(1.0);
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
return C->isAllOnesValue();
}
return false;
}
bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->getValueAPF().isZero();
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
return C->isNullValue();
}
return false;
}
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned VirtualRegister;
if (!MRI.isLiveIn(Reg)) {
VirtualRegister = MRI.createVirtualRegister(RC);
MRI.addLiveIn(Reg, VirtualRegister);
} else {
VirtualRegister = MRI.getLiveInVirtReg(Reg);
}
return DAG.getRegister(VirtualRegister, VT);
}
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
// AMDIL DAG nodes
NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
NODE_NAME_CASE(DIV_INF);
NODE_NAME_CASE(RET_FLAG);
NODE_NAME_CASE(BRANCH_COND);
// AMDGPU DAG nodes
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(FMAX)
NODE_NAME_CASE(SMAX)
NODE_NAME_CASE(UMAX)
NODE_NAME_CASE(FMIN)
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(EXPORT)
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
NODE_NAME_CASE(REGISTER_STORE)
NODE_NAME_CASE(LOAD_CONSTANT)
NODE_NAME_CASE(LOAD_INPUT)
NODE_NAME_CASE(SAMPLE)
NODE_NAME_CASE(SAMPLEB)
NODE_NAME_CASE(SAMPLED)
NODE_NAME_CASE(SAMPLEL)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
}
}
Index: head/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
===================================================================
--- head/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td (revision 265925)
@@ -1,419 +1,426 @@
//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains instruction defs that are common to all hw codegen
// targets.
//
//===----------------------------------------------------------------------===//
class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
field bit isRegisterLoad = 0;
field bit isRegisterStore = 0;
let Namespace = "AMDGPU";
let OutOperandList = outs;
let InOperandList = ins;
let AsmString = asm;
let Pattern = pattern;
let Itinerary = NullALU;
let TSFlags{63} = isRegisterLoad;
let TSFlags{62} = isRegisterStore;
}
class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
: AMDGPUInst<outs, ins, asm, pattern> {
field bits<32> Inst = 0xffffffff;
}
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
//===----------------------------------------------------------------------===//
// PatLeafs for floating-point comparisons
//===----------------------------------------------------------------------===//
def COND_OEQ : PatLeaf <
(cond),
[{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
>;
def COND_OGT : PatLeaf <
(cond),
[{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
>;
def COND_OGE : PatLeaf <
(cond),
[{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
>;
def COND_OLT : PatLeaf <
(cond),
[{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
>;
def COND_OLE : PatLeaf <
(cond),
[{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
>;
def COND_UNE : PatLeaf <
(cond),
[{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
>;
def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
//===----------------------------------------------------------------------===//
// PatLeafs for unsigned comparisons
//===----------------------------------------------------------------------===//
def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
//===----------------------------------------------------------------------===//
// PatLeafs for signed comparisons
//===----------------------------------------------------------------------===//
def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
//===----------------------------------------------------------------------===//
// PatLeafs for integer equality
//===----------------------------------------------------------------------===//
def COND_EQ : PatLeaf <
(cond),
[{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
>;
def COND_NE : PatLeaf <
(cond),
[{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
>;
def COND_NULL : PatLeaf <
(cond),
[{return false;}]
>;
//===----------------------------------------------------------------------===//
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
LoadSDNode *L = cast<LoadSDNode>(N);
return L->getExtensionType() == ISD::ZEXTLOAD ||
L->getExtensionType() == ISD::EXTLOAD;
}]>;
def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
}]>;
def az_extloadi16_global : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
def az_extloadi32_global : PatFrag<(ops node:$ptr),
(az_extloadi32 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi32_constant : PatFrag<(ops node:$ptr),
(az_extloadi32 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
def truncstorei8_global : PatFrag<(ops node:$val, node:$ptr),
(truncstorei8 node:$val, node:$ptr), [{
return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
(truncstorei16 node:$val, node:$ptr), [{
return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
def local_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return isLocalStore(dyn_cast<StoreSDNode>(N));
}]>;
def truncstorei8_local : PatFrag<(ops node:$val, node:$ptr),
(truncstorei8 node:$val, node:$ptr), [{
return isLocalStore(dyn_cast<StoreSDNode>(N));
}]>;
def truncstorei16_local : PatFrag<(ops node:$val, node:$ptr),
(truncstorei16 node:$val, node:$ptr), [{
return isLocalStore(dyn_cast<StoreSDNode>(N));
}]>;
def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value),
(atomic_load_add node:$ptr, node:$value), [{
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value),
(atomic_load_sub node:$ptr, node:$value), [{
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
}
def CONST : Constants;
def FP_ZERO : PatLeaf <
(fpimm),
[{return N->getValueAPF().isZero();}]
>;
def FP_ONE : PatLeaf <
(fpimm),
[{return N->isExactlyValue(1.0);}]
>;
def U24 : ComplexPattern<i32, 1, "SelectU24", [], []>;
def I24 : ComplexPattern<i32, 1, "SelectI24", [], []>;
let isCodeGenOnly = 1, isPseudo = 1 in {
let usesCustomInserter = 1 in {
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"CLAMP $dst, $src0",
[(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
>;
class FABS <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"FABS $dst, $src0",
[(set f32:$dst, (fabs f32:$src0))]
>;
class FNEG <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"FNEG $dst, $src0",
[(set f32:$dst, (fneg f32:$src0))]
>;
} // usesCustomInserter = 1
multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
ComplexPattern addrPat> {
let UseNamedOperandTable = 1 in {
def RegisterLoad : AMDGPUShaderInst <
(outs dstClass:$dst),
(ins addrClass:$addr, i32imm:$chan),
"RegisterLoad $dst, $addr",
[(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))]
> {
let isRegisterLoad = 1;
}
def RegisterStore : AMDGPUShaderInst <
(outs),
(ins dstClass:$val, addrClass:$addr, i32imm:$chan),
"RegisterStore $val, $addr",
[(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
> {
let isRegisterStore = 1;
}
}
}
} // End isCodeGenOnly = 1, isPseudo = 1
/* Generic helper patterns for intrinsics */
/* -------------------------------------- */
class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
: Pat <
(fpow f32:$src0, f32:$src1),
(exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
>;
/* Other helper patterns */
/* --------------------- */
/* Extract element pattern */
class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
SubRegIndex sub_reg>
: Pat<
(sub_type (vector_extract vec_type:$src, sub_idx)),
(EXTRACT_SUBREG $src, sub_reg)
>;
/* Insert element pattern */
class Insert_Element <ValueType elem_type, ValueType vec_type,
int sub_idx, SubRegIndex sub_reg>
: Pat <
(vector_insert vec_type:$vec, elem_type:$elem, sub_idx),
(INSERT_SUBREG $vec, $elem, sub_reg)
>;
class Vector4_Build <ValueType vecType, ValueType elemType> : Pat <
(vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
(vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3)
>;
// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
// can handle COPY instructions.
// bitconvert pattern
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
(dt (bitconvert (st rc:$src0))),
(dt rc:$src0)
>;
// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
// can handle COPY instructions.
class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
(vt (AMDGPUdwordaddr (vt rc:$addr))),
(vt rc:$addr)
>;
// BFI_INT patterns
multiclass BFIPatterns <Instruction BFI_INT> {
// Definition from ISA doc:
// (y & x) | (z & ~x)
def : Pat <
(or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
(BFI_INT $x, $y, $z)
>;
// SHA-256 Ch function
// z ^ (x & (y ^ z))
def : Pat <
(xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
(BFI_INT $x, $y, $z)
>;
}
// SHA-256 Ma patterns
// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
(or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
(BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
>;
// Bitfield extract patterns
+/*
+
+XXX: The BFE pattern is not working correctly because the XForm is not being
+applied.
+
def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
class BFEPattern <Instruction BFE> : Pat <
(and (srl i32:$x, legalshift32:$y), bfemask:$z),
(BFE $x, $y, $z)
>;
+
+*/
// rotr pattern
class ROTRPattern <Instruction BIT_ALIGN> : Pat <
(rotr i32:$src0, i32:$src1),
(BIT_ALIGN $src0, $src0, $src1)
>;
// 24-bit arithmetic patterns
def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>;
/*
class UMUL24Pattern <Instruction UMUL24> : Pat <
(mul U24:$x, U24:$y),
(UMUL24 $x, $y)
>;
*/
include "R600Instructions.td"
include "SIInstrInfo.td"
Index: head/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
===================================================================
--- head/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp (revision 265925)
@@ -1,70 +1,68 @@
//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//===----------------------------------------------------------------------===//
#include "AMDGPUMCAsmInfo.h"
using namespace llvm;
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
HasSingleParameterDotFile = false;
- WeakDefDirective = 0;
//===------------------------------------------------------------------===//
HasSubsectionsViaSymbols = true;
HasMachoZeroFillDirective = false;
HasMachoTBSSDirective = false;
HasStaticCtorDtorReferenceInStaticMode = false;
LinkerRequiresNonEmptyDwarfLines = true;
MaxInstLength = 16;
SeparatorString = "\n";
CommentColumn = 40;
CommentString = ";";
LabelSuffix = ":";
GlobalPrefix = "@";
PrivateGlobalPrefix = ";.";
LinkerPrivateGlobalPrefix = "!";
InlineAsmStart = ";#ASMSTART";
InlineAsmEnd = ";#ASMEND";
AssemblerDialect = 0;
//===--- Data Emission Directives -------------------------------------===//
ZeroDirective = ".zero";
AsciiDirective = ".ascii\t";
AscizDirective = ".asciz\t";
Data8bitsDirective = ".byte\t";
Data16bitsDirective = ".short\t";
Data32bitsDirective = ".long\t";
Data64bitsDirective = ".quad\t";
GPRel32Directive = 0;
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
HasMicrosoftFastStdCallMangling = false;
//===--- Alignment Information ----------------------------------------===//
AlignDirective = ".align\t";
AlignmentIsInBytes = true;
TextAlignFillValue = 0;
//===--- Global Variable Emission Directives --------------------------===//
GlobalDirective = ".global";
HasSetDirective = false;
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = false;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
WeakRefDirective = ".weakref\t";
- LinkOnceDirective = 0;
//===--- Dwarf Emission Directives -----------------------------------===//
HasLEB128 = true;
SupportsDebugInformation = true;
}
const MCSection*
AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const {
return 0;
}
Index: head/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
===================================================================
--- head/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp (revision 265925)
@@ -1,525 +1,526 @@
//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass compute turns all control flow pseudo instructions into native one
/// computing their address on the fly ; it also sets STACK_SIZE info.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "r600cf"
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
class R600ControlFlowFinalizer : public MachineFunctionPass {
private:
typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
enum ControlFlowInstruction {
CF_TC,
CF_VC,
CF_CALL_FS,
CF_WHILE_LOOP,
CF_END_LOOP,
CF_LOOP_BREAK,
CF_LOOP_CONTINUE,
CF_JUMP,
CF_ELSE,
CF_POP,
CF_END
};
static char ID;
const R600InstrInfo *TII;
const R600RegisterInfo *TRI;
unsigned MaxFetchInst;
const AMDGPUSubtarget &ST;
bool IsTrivialInst(MachineInstr *MI) const {
switch (MI->getOpcode()) {
case AMDGPU::KILL:
case AMDGPU::RETURN:
return true;
default:
return false;
}
}
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
unsigned Opcode = 0;
bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
switch (CFI) {
case CF_TC:
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
break;
case CF_VC:
Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
break;
case CF_CALL_FS:
Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
break;
case CF_WHILE_LOOP:
Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
break;
case CF_END_LOOP:
Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
break;
case CF_LOOP_BREAK:
Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
break;
case CF_LOOP_CONTINUE:
Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
break;
case CF_JUMP:
Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
break;
case CF_ELSE:
Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
break;
case CF_POP:
Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
break;
case CF_END:
if (ST.hasCaymanISA()) {
Opcode = AMDGPU::CF_END_CM;
break;
}
Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
break;
}
assert (Opcode && "No opcode selected");
return TII->get(Opcode);
}
bool isCompatibleWithClause(const MachineInstr *MI,
std::set<unsigned> &DstRegs) const {
unsigned DstMI, SrcMI;
for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
E = MI->operands_end(); I != E; ++I) {
const MachineOperand &MO = *I;
if (!MO.isReg())
continue;
if (MO.isDef()) {
unsigned Reg = MO.getReg();
if (AMDGPU::R600_Reg128RegClass.contains(Reg))
DstMI = Reg;
else
DstMI = TRI->getMatchingSuperReg(Reg,
TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&AMDGPU::R600_Reg128RegClass);
}
if (MO.isUse()) {
unsigned Reg = MO.getReg();
if (AMDGPU::R600_Reg128RegClass.contains(Reg))
SrcMI = Reg;
else
SrcMI = TRI->getMatchingSuperReg(Reg,
TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&AMDGPU::R600_Reg128RegClass);
}
}
if ((DstRegs.find(SrcMI) == DstRegs.end())) {
DstRegs.insert(DstMI);
return true;
} else
return false;
}
ClauseFile
MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
const {
MachineBasicBlock::iterator ClauseHead = I;
std::vector<MachineInstr *> ClauseContent;
unsigned AluInstCount = 0;
bool IsTex = TII->usesTextureCache(ClauseHead);
std::set<unsigned> DstRegs;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
if (IsTrivialInst(I))
continue;
if (AluInstCount >= MaxFetchInst)
break;
if ((IsTex && !TII->usesTextureCache(I)) ||
(!IsTex && !TII->usesVertexCache(I)))
break;
if (!isCompatibleWithClause(I, DstRegs))
break;
AluInstCount ++;
ClauseContent.push_back(I);
}
MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
getHWInstrDesc(IsTex?CF_TC:CF_VC))
.addImm(0) // ADDR
.addImm(AluInstCount - 1); // COUNT
return ClauseFile(MIb, ClauseContent);
}
void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
static const unsigned LiteralRegs[] = {
AMDGPU::ALU_LITERAL_X,
AMDGPU::ALU_LITERAL_Y,
AMDGPU::ALU_LITERAL_Z,
AMDGPU::ALU_LITERAL_W
};
const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
TII->getSrcs(MI);
for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
continue;
int64_t Imm = Srcs[i].second;
std::vector<int64_t>::iterator It =
std::find(Lits.begin(), Lits.end(), Imm);
if (It != Lits.end()) {
unsigned Index = It - Lits.begin();
Srcs[i].first->setReg(LiteralRegs[Index]);
} else {
assert(Lits.size() < 4 && "Too many literals in Instruction Group");
Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
Lits.push_back(Imm);
}
}
}
MachineBasicBlock::iterator insertLiterals(
MachineBasicBlock::iterator InsertPos,
const std::vector<unsigned> &Literals) const {
MachineBasicBlock *MBB = InsertPos->getParent();
for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
unsigned LiteralPair0 = Literals[i];
unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
TII->get(AMDGPU::LITERALS))
.addImm(LiteralPair0)
.addImm(LiteralPair1);
}
return InsertPos;
}
ClauseFile
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
const {
MachineBasicBlock::iterator ClauseHead = I;
std::vector<MachineInstr *> ClauseContent;
I++;
for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
if (IsTrivialInst(I)) {
++I;
continue;
}
if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
break;
std::vector<int64_t> Literals;
if (I->isBundle()) {
MachineInstr *DeleteMI = I;
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
while (++BI != E && BI->isBundledWithPred()) {
BI->unbundleFromPred();
for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = BI->getOperand(i);
if (MO.isReg() && MO.isInternalRead())
MO.setIsInternalRead(false);
}
getLiteral(BI, Literals);
ClauseContent.push_back(BI);
}
I = BI;
DeleteMI->eraseFromParent();
} else {
getLiteral(I, Literals);
ClauseContent.push_back(I);
I++;
}
for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
unsigned literal0 = Literals[i];
unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
TII->get(AMDGPU::LITERALS))
.addImm(literal0)
.addImm(literal2);
ClauseContent.push_back(MILit);
}
}
assert(ClauseContent.size() < 128 && "ALU clause is too big");
ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
return ClauseFile(ClauseHead, ClauseContent);
}
void
EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
CounterPropagateAddr(Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
.addImm(CfCount);
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
BB->splice(InsertPos, BB, Clause.second[i]);
}
CfCount += 2 * Clause.second.size();
}
void
EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
Clause.first->getOperand(0).setImm(0);
CounterPropagateAddr(Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
.addImm(CfCount);
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
BB->splice(InsertPos, BB, Clause.second[i]);
}
CfCount += Clause.second.size();
}
void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
}
void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
const {
for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
It != E; ++It) {
MachineInstr *MI = *It;
CounterPropagateAddr(MI, Addr);
}
}
unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
switch (ST.getGeneration()) {
case AMDGPUSubtarget::R600:
case AMDGPUSubtarget::R700:
if (hasPush)
StackSubEntry += 2;
break;
case AMDGPUSubtarget::EVERGREEN:
if (hasPush)
StackSubEntry ++;
case AMDGPUSubtarget::NORTHERN_ISLANDS:
StackSubEntry += 2;
break;
default: llvm_unreachable("Not a VLIW4/VLIW5 GPU");
}
return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
}
public:
R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
TII (0), TRI(0),
ST(tm.getSubtarget<AMDGPUSubtarget>()) {
const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
MaxFetchInst = ST.getTexVTXClauseSize();
}
virtual bool runOnMachineFunction(MachineFunction &MF) {
TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo());
unsigned MaxStack = 0;
unsigned CurrentStack = 0;
bool HasPush = false;
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
++MB) {
MachineBasicBlock &MBB = *MB;
unsigned CfCount = 0;
std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
std::vector<MachineInstr * > IfThenElseStack;
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
if (MFI->ShaderType == 1) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
getHWInstrDesc(CF_CALL_FS));
CfCount++;
MaxStack = 1;
}
std::vector<ClauseFile> FetchClauses, AluClauses;
std::vector<MachineInstr *> LastAlu(1);
std::vector<MachineInstr *> ToPopAfter;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
DEBUG(dbgs() << CfCount << ":"; I->dump(););
FetchClauses.push_back(MakeFetchClause(MBB, I));
CfCount++;
+ LastAlu.back() = 0;
continue;
}
MachineBasicBlock::iterator MI = I;
if (MI->getOpcode() != AMDGPU::ENDIF)
LastAlu.back() = 0;
if (MI->getOpcode() == AMDGPU::CF_ALU)
LastAlu.back() = MI;
I++;
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
CurrentStack++;
MaxStack = std::max(MaxStack, CurrentStack);
HasPush = true;
case AMDGPU::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
case AMDGPU::WHILELOOP: {
CurrentStack+=4;
MaxStack = std::max(MaxStack, CurrentStack);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_WHILE_LOOP))
.addImm(1);
std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
std::set<MachineInstr *>());
Pair.second.insert(MIb);
LoopStack.push_back(Pair);
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::ENDLOOP: {
CurrentStack-=4;
std::pair<unsigned, std::set<MachineInstr *> > Pair =
LoopStack.back();
LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount);
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
.addImm(Pair.first + 1);
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::IF_PREDICATE_SET: {
LastAlu.push_back(0);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_JUMP))
.addImm(0)
.addImm(0);
IfThenElseStack.push_back(MIb);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::ELSE: {
MachineInstr * JumpInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
CounterPropagateAddr(JumpInst, CfCount);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_ELSE))
.addImm(0)
.addImm(0);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
IfThenElseStack.push_back(MIb);
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::ENDIF: {
CurrentStack--;
if (LastAlu.back()) {
ToPopAfter.push_back(LastAlu.back());
} else {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_POP))
.addImm(CfCount + 1)
.addImm(1);
(void)MIb;
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
CfCount++;
}
MachineInstr *IfOrElseInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
CounterPropagateAddr(IfOrElseInst, CfCount);
IfOrElseInst->getOperand(1).setImm(1);
LastAlu.pop_back();
MI->eraseFromParent();
break;
}
case AMDGPU::BREAK: {
CfCount ++;
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_LOOP_BREAK))
.addImm(0);
LoopStack.back().second.insert(MIb);
MI->eraseFromParent();
break;
}
case AMDGPU::CONTINUE: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_LOOP_CONTINUE))
.addImm(0);
LoopStack.back().second.insert(MIb);
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::RETURN: {
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
CfCount++;
MI->eraseFromParent();
if (CfCount % 2) {
BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
CfCount++;
}
for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
EmitFetchClause(I, FetchClauses[i], CfCount);
for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
EmitALUClause(I, AluClauses[i], CfCount);
}
default:
if (TII->isExport(MI->getOpcode())) {
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
}
break;
}
}
for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
MachineInstr *Alu = ToPopAfter[i];
BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
TII->get(AMDGPU::CF_ALU_POP_AFTER))
.addImm(Alu->getOperand(0).getImm())
.addImm(Alu->getOperand(1).getImm())
.addImm(Alu->getOperand(2).getImm())
.addImm(Alu->getOperand(3).getImm())
.addImm(Alu->getOperand(4).getImm())
.addImm(Alu->getOperand(5).getImm())
.addImm(Alu->getOperand(6).getImm())
.addImm(Alu->getOperand(7).getImm())
.addImm(Alu->getOperand(8).getImm());
Alu->eraseFromParent();
}
MFI->StackSize = getHWStackSize(MaxStack, HasPush);
}
return false;
}
const char *getPassName() const {
return "R600 Control Flow Finalizer Pass";
}
};
char R600ControlFlowFinalizer::ID = 0;
} // end anonymous namespace
llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
return new R600ControlFlowFinalizer(TM);
}
Index: head/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
===================================================================
--- head/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp (revision 265925)
@@ -1,1389 +1,1395 @@
//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief R600 Implementation of TargetInstrInfo.
//
//===----------------------------------------------------------------------===//
#include "R600InstrInfo.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenDFAPacketizer.inc"
using namespace llvm;
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
RI(tm),
ST(tm.getSubtarget<AMDGPUSubtarget>())
{ }
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
return RI;
}
bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
}
bool R600InstrInfo::isVector(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
}
void
R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
unsigned VectorComponents = 0;
if (AMDGPU::R600_Reg128RegClass.contains(DestReg) &&
AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
VectorComponents = 4;
} else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) &&
AMDGPU::R600_Reg64RegClass.contains(SrcReg)) {
VectorComponents = 2;
}
if (VectorComponents > 0) {
for (unsigned I = 0; I < VectorComponents; I++) {
unsigned SubRegIndex = RI.getSubRegFromChannel(I);
buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
RI.getSubReg(DestReg, SubRegIndex),
RI.getSubReg(SrcReg, SubRegIndex))
.addReg(DestReg,
RegState::Define | RegState::Implicit);
}
} else {
MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
DestReg, SrcReg);
NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
.setIsKill(KillSrc);
}
}
/// \returns true if \p MBBI can be moved into a new basic.
bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const {
for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
E = MBBI->operands_end(); I != E; ++I) {
if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
return false;
}
return true;
}
unsigned R600InstrInfo::getIEQOpcode() const {
return AMDGPU::SETE_INT;
}
bool R600InstrInfo::isMov(unsigned Opcode) const {
switch(Opcode) {
default: return false;
case AMDGPU::MOV:
case AMDGPU::MOV_IMM_F32:
case AMDGPU::MOV_IMM_I32:
return true;
}
}
// Some instructions act as place holders to emulate operations that the GPU
// hardware does automatically. This function can be used to check if
// an opcode falls into this category.
bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
switch (Opcode) {
default: return false;
case AMDGPU::RETURN:
return true;
}
}
bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
return false;
}
bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
switch(Opcode) {
default: return false;
case AMDGPU::CUBE_r600_pseudo:
case AMDGPU::CUBE_r600_real:
case AMDGPU::CUBE_eg_pseudo:
case AMDGPU::CUBE_eg_real:
return true;
}
}
bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
unsigned TargetFlags = get(Opcode).TSFlags;
return (TargetFlags & R600_InstFlag::ALU_INST);
}
bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
unsigned TargetFlags = get(Opcode).TSFlags;
return ((TargetFlags & R600_InstFlag::OP1) |
(TargetFlags & R600_InstFlag::OP2) |
(TargetFlags & R600_InstFlag::OP3));
}
bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
unsigned TargetFlags = get(Opcode).TSFlags;
return ((TargetFlags & R600_InstFlag::LDS_1A) |
(TargetFlags & R600_InstFlag::LDS_1A1D) |
(TargetFlags & R600_InstFlag::LDS_1A2D));
}
bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
}
bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
}
bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
if (isALUInstr(MI->getOpcode()))
return true;
if (isVector(*MI) || isCubeOp(MI->getOpcode()))
return true;
switch (MI->getOpcode()) {
case AMDGPU::PRED_X:
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
case AMDGPU::COPY:
case AMDGPU::DOT_4:
return true;
default:
return false;
}
}
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
if (ST.hasCaymanISA())
return false;
return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
}
bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
return isTransOnly(MI->getOpcode());
}
bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
}
bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const {
return isVectorOnly(MI->getOpcode());
}
bool R600InstrInfo::isExport(unsigned Opcode) const {
return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
}
bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
return ST.hasVertexCache() && IS_VTX(get(Opcode));
}
bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
}
bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
}
bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
usesTextureCache(MI->getOpcode());
}
bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
switch (Opcode) {
case AMDGPU::KILLGT:
case AMDGPU::GROUP_BARRIER:
return true;
default:
return false;
}
}
bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
}
bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
}
bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
if (!isALUInstr(MI->getOpcode())) {
return false;
}
for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
E = MI->operands_end(); I != E; ++I) {
if (!I->isReg() || !I->isUse() ||
TargetRegisterInfo::isVirtualRegister(I->getReg()))
continue;
if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
return true;
}
return false;
}
int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
static const unsigned OpTable[] = {
AMDGPU::OpName::src0,
AMDGPU::OpName::src1,
AMDGPU::OpName::src2
};
assert (SrcNum < 3);
return getOperandIdx(Opcode, OpTable[SrcNum]);
}
#define SRC_SEL_ROWS 11
int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
{AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
{AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
{AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
{AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
{AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
{AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
{AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
{AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
{AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
};
for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
return getOperandIdx(Opcode, SrcSelTable[i][1]);
}
}
return -1;
}
#undef SRC_SEL_ROWS
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
R600InstrInfo::getSrcs(MachineInstr *MI) const {
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
if (MI->getOpcode() == AMDGPU::DOT_4) {
static const unsigned OpTable[8][2] = {
{AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
{AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
{AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
{AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
{AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
{AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
{AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
{AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
};
for (unsigned j = 0; j < 8; j++) {
MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
OpTable[j][0]));
unsigned Reg = MO.getReg();
if (Reg == AMDGPU::ALU_CONST) {
unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
OpTable[j][1])).getImm();
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
continue;
}
}
return Result;
}
static const unsigned OpTable[3][2] = {
{AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
};
for (unsigned j = 0; j < 3; j++) {
int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
if (SrcIdx < 0)
break;
MachineOperand &MO = MI->getOperand(SrcIdx);
unsigned Reg = MI->getOperand(SrcIdx).getReg();
if (Reg == AMDGPU::ALU_CONST) {
unsigned Sel = MI->getOperand(
getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
continue;
}
if (Reg == AMDGPU::ALU_LITERAL_X) {
unsigned Imm = MI->getOperand(
getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
continue;
}
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
}
return Result;
}
std::vector<std::pair<int, unsigned> >
R600InstrInfo::ExtractSrcs(MachineInstr *MI,
const DenseMap<unsigned, unsigned> &PV,
unsigned &ConstCount) const {
ConstCount = 0;
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
const std::pair<int, unsigned> DummyPair(-1, 0);
std::vector<std::pair<int, unsigned> > Result;
unsigned i = 0;
for (unsigned n = Srcs.size(); i < n; ++i) {
unsigned Reg = Srcs[i].first->getReg();
unsigned Index = RI.getEncodingValue(Reg) & 0xff;
if (Reg == AMDGPU::OQAP) {
Result.push_back(std::pair<int, unsigned>(Index, 0));
}
if (PV.find(Reg) != PV.end()) {
// 255 is used to tells its a PS/PV reg
Result.push_back(std::pair<int, unsigned>(255, 0));
continue;
}
if (Index > 127) {
ConstCount++;
Result.push_back(DummyPair);
continue;
}
unsigned Chan = RI.getHWRegChan(Reg);
Result.push_back(std::pair<int, unsigned>(Index, Chan));
}
for (; i < 3; ++i)
Result.push_back(DummyPair);
return Result;
}
static std::vector<std::pair<int, unsigned> >
Swizzle(std::vector<std::pair<int, unsigned> > Src,
R600InstrInfo::BankSwizzle Swz) {
if (Src[0] == Src[1])
Src[1].first = -1;
switch (Swz) {
case R600InstrInfo::ALU_VEC_012_SCL_210:
break;
case R600InstrInfo::ALU_VEC_021_SCL_122:
std::swap(Src[1], Src[2]);
break;
case R600InstrInfo::ALU_VEC_102_SCL_221:
std::swap(Src[0], Src[1]);
break;
case R600InstrInfo::ALU_VEC_120_SCL_212:
std::swap(Src[0], Src[1]);
std::swap(Src[0], Src[2]);
break;
case R600InstrInfo::ALU_VEC_201:
std::swap(Src[0], Src[2]);
std::swap(Src[0], Src[1]);
break;
case R600InstrInfo::ALU_VEC_210:
std::swap(Src[0], Src[2]);
break;
}
return Src;
}
static unsigned
getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
switch (Swz) {
case R600InstrInfo::ALU_VEC_012_SCL_210: {
unsigned Cycles[3] = { 2, 1, 0};
return Cycles[Op];
}
case R600InstrInfo::ALU_VEC_021_SCL_122: {
unsigned Cycles[3] = { 1, 2, 2};
return Cycles[Op];
}
case R600InstrInfo::ALU_VEC_120_SCL_212: {
unsigned Cycles[3] = { 2, 1, 2};
return Cycles[Op];
}
case R600InstrInfo::ALU_VEC_102_SCL_221: {
unsigned Cycles[3] = { 2, 2, 1};
return Cycles[Op];
}
default:
llvm_unreachable("Wrong Swizzle for Trans Slot");
return 0;
}
}
/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
/// in the same Instruction Group while meeting read port limitations given a
/// Swz swizzle sequence.
unsigned R600InstrInfo::isLegalUpTo(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
const std::vector<std::pair<int, unsigned> > &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const {
int Vector[4][3];
memset(Vector, -1, sizeof(Vector));
for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
const std::vector<std::pair<int, unsigned> > &Srcs =
Swizzle(IGSrcs[i], Swz[i]);
for (unsigned j = 0; j < 3; j++) {
const std::pair<int, unsigned> &Src = Srcs[j];
if (Src.first < 0 || Src.first == 255)
continue;
if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
// The value from output queue A (denoted by register OQAP) can
// only be fetched during the first cycle.
return false;
}
// OQAP does not count towards the normal read port restrictions
continue;
}
if (Vector[Src.second][j] < 0)
Vector[Src.second][j] = Src.first;
if (Vector[Src.second][j] != Src.first)
return i;
}
}
// Now check Trans Alu
for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
const std::pair<int, unsigned> &Src = TransSrcs[i];
unsigned Cycle = getTransSwizzle(TransSwz, i);
if (Src.first < 0)
continue;
if (Src.first == 255)
continue;
if (Vector[Src.second][Cycle] < 0)
Vector[Src.second][Cycle] = Src.first;
if (Vector[Src.second][Cycle] != Src.first)
return IGSrcs.size() - 1;
}
return IGSrcs.size();
}
/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
/// (in lexicographic term) swizzle sequence assuming that all swizzles after
/// Idx can be skipped
static bool
NextPossibleSolution(
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
unsigned Idx) {
assert(Idx < SwzCandidate.size());
int ResetIdx = Idx;
while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
ResetIdx --;
for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
}
if (ResetIdx == -1)
return false;
int NextSwizzle = SwzCandidate[ResetIdx] + 1;
SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
return true;
}
/// Enumerate all possible Swizzle sequence to find one that can meet all
/// read port requirements.
bool R600InstrInfo::FindSwizzleForVectorSlot(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
const std::vector<std::pair<int, unsigned> > &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const {
unsigned ValidUpTo = 0;
do {
ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
if (ValidUpTo == IGSrcs.size())
return true;
} while (NextPossibleSolution(SwzCandidate, ValidUpTo));
return false;
}
/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
/// a const, and can't read a gpr at cycle 1 if they read 2 const.
static bool
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
const std::vector<std::pair<int, unsigned> > &TransOps,
unsigned ConstCount) {
// TransALU can't read 3 constants
if (ConstCount > 2)
return false;
for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
const std::pair<int, unsigned> &Src = TransOps[i];
unsigned Cycle = getTransSwizzle(TransSwz, i);
if (Src.first < 0)
continue;
if (ConstCount > 0 && Cycle == 0)
return false;
if (ConstCount > 1 && Cycle == 1)
return false;
}
return true;
}
bool
R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
const DenseMap<unsigned, unsigned> &PV,
std::vector<BankSwizzle> &ValidSwizzle,
bool isLastAluTrans)
const {
//Todo : support shared src0 - src1 operand
std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
ValidSwizzle.clear();
unsigned ConstCount;
BankSwizzle TransBS = ALU_VEC_012_SCL_210;
for (unsigned i = 0, e = IG.size(); i < e; ++i) {
IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
unsigned Op = getOperandIdx(IG[i]->getOpcode(),
AMDGPU::OpName::bank_swizzle);
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
IG[i]->getOperand(Op).getImm());
}
std::vector<std::pair<int, unsigned> > TransOps;
if (!isLastAluTrans)
return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
TransOps = IGSrcs.back();
IGSrcs.pop_back();
ValidSwizzle.pop_back();
static const R600InstrInfo::BankSwizzle TransSwz[] = {
ALU_VEC_012_SCL_210,
ALU_VEC_021_SCL_122,
ALU_VEC_120_SCL_212,
ALU_VEC_102_SCL_221
};
for (unsigned i = 0; i < 4; i++) {
TransBS = TransSwz[i];
if (!isConstCompatible(TransBS, TransOps, ConstCount))
continue;
bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
TransBS);
if (Result) {
ValidSwizzle.push_back(TransBS);
return true;
}
}
return false;
}
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
assert (Consts.size() <= 12 && "Too many operands in instructions group");
unsigned Pair1 = 0, Pair2 = 0;
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
unsigned ReadConstHalf = Consts[i] & 2;
unsigned ReadConstIndex = Consts[i] & (~3);
unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
if (!Pair1) {
Pair1 = ReadHalfConst;
continue;
}
if (Pair1 == ReadHalfConst)
continue;
if (!Pair2) {
Pair2 = ReadHalfConst;
continue;
}
if (Pair2 != ReadHalfConst)
return false;
}
return true;
}
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
const {
std::vector<unsigned> Consts;
SmallSet<int64_t, 4> Literals;
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
MachineInstr *MI = MIs[i];
if (!isALUInstr(MI->getOpcode()))
continue;
const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs =
getSrcs(MI);
for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
std::pair<MachineOperand *, unsigned> Src = Srcs[j];
if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
Literals.insert(Src.second);
if (Literals.size() > 4)
return false;
if (Src.first->getReg() == AMDGPU::ALU_CONST)
Consts.push_back(Src.second);
if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
unsigned Chan = RI.getHWRegChan(Src.first->getReg());
Consts.push_back((Index << 2) | Chan);
}
}
}
return fitsConstReadLimitations(Consts);
}
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const {
const InstrItineraryData *II = TM->getInstrItineraryData();
return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
}
static bool
isPredicateSetter(unsigned Opcode) {
switch (Opcode) {
case AMDGPU::PRED_X:
return true;
default:
return false;
}
}
static MachineInstr *
findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
while (I != MBB.begin()) {
--I;
MachineInstr *MI = I;
if (isPredicateSetter(MI->getOpcode()))
return MI;
}
return NULL;
}
static
bool isJump(unsigned Opcode) {
return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
}
static bool isBranch(unsigned Opcode) {
return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
Opcode == AMDGPU::BRANCH_COND_f32;
}
bool
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
// Most of the following comes from the ARM implementation of AnalyzeBranch
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
return false;
--I;
while (I->isDebugValue()) {
if (I == MBB.begin())
return false;
--I;
}
// AMDGPU::BRANCH* instructions are only available after isel and are not
// handled
if (isBranch(I->getOpcode()))
return true;
if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
return false;
}
- // Get the last instruction in the block.
+ // Remove successive JUMP
+ while (I != MBB.begin() && llvm::prior(I)->getOpcode() == AMDGPU::JUMP) {
+ MachineBasicBlock::iterator PriorI = llvm::prior(I);
+ if (AllowModify)
+ I->removeFromParent();
+ I = PriorI;
+ }
MachineInstr *LastInst = I;
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() ||
!isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
if (LastOpc == AMDGPU::JUMP) {
TBB = LastInst->getOperand(0).getMBB();
return false;
} else if (LastOpc == AMDGPU::JUMP_COND) {
MachineInstr *predSet = I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
return false;
}
return true; // Can't handle indirect branch.
}
// Get the instruction before it if it is a terminator.
MachineInstr *SecondLastInst = I;
unsigned SecondLastOpc = SecondLastInst->getOpcode();
// If the block ends with a B and a Bcc, handle it.
if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
MachineInstr *predSet = --I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
TBB = SecondLastInst->getOperand(0).getMBB();
FBB = LastInst->getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
return false;
}
// Otherwise, can't handle this.
return true;
}
int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
const MachineInstr *MI = op.getParent();
switch (MI->getDesc().OpInfo->RegClass) {
default: // FIXME: fallthrough??
case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
};
}
static
MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
It != E; ++It) {
if (It->getOpcode() == AMDGPU::CF_ALU ||
It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
return llvm::prior(It.base());
}
return MBB.end();
}
unsigned
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const {
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
if (FBB == 0) {
if (Cond.empty()) {
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
return 1;
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
assert(PredSet && "No previous predicate !");
addFlag(PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
return 1;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
return 1;
}
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
assert(PredSet && "No previous predicate !");
addFlag(PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
return 2;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
return 2;
}
}
unsigned
R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
// Note : we leave PRED* instructions there.
// They may be needed when predicating instructions.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) {
return 0;
}
--I;
switch (I->getOpcode()) {
default:
return 0;
case AMDGPU::JUMP_COND: {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
break;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
CfAlu->setDesc(get(AMDGPU::CF_ALU));
break;
}
case AMDGPU::JUMP:
I->eraseFromParent();
break;
}
I = MBB.end();
if (I == MBB.begin()) {
return 1;
}
--I;
switch (I->getOpcode()) {
// FIXME: only one case??
default:
return 1;
case AMDGPU::JUMP_COND: {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
break;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
CfAlu->setDesc(get(AMDGPU::CF_ALU));
break;
}
case AMDGPU::JUMP:
I->eraseFromParent();
break;
}
return 2;
}
bool
R600InstrInfo::isPredicated(const MachineInstr *MI) const {
int idx = MI->findFirstPredOperandIdx();
if (idx < 0)
return false;
unsigned Reg = MI->getOperand(idx).getReg();
switch (Reg) {
default: return false;
case AMDGPU::PRED_SEL_ONE:
case AMDGPU::PRED_SEL_ZERO:
case AMDGPU::PREDICATE_BIT:
return true;
}
}
bool
R600InstrInfo::isPredicable(MachineInstr *MI) const {
// XXX: KILL* instructions can be predicated, but they must be the last
// instruction in a clause, so this means any instructions after them cannot
// be predicated. Until we have proper support for instruction clauses in the
// backend, we will mark KILL* instructions as unpredicable.
if (MI->getOpcode() == AMDGPU::KILLGT) {
return false;
} else if (MI->getOpcode() == AMDGPU::CF_ALU) {
// If the clause start in the middle of MBB then the MBB has more
// than a single clause, unable to predicate several clauses.
if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
return false;
// TODO: We don't support KC merging atm
if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
return false;
return true;
} else if (isVector(*MI)) {
return false;
} else {
return AMDGPUInstrInfo::isPredicable(MI);
}
}
bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
unsigned ExtraPredCycles,
const BranchProbability &Probability) const{
return true;
}
bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumTCycles,
unsigned ExtraTCycles,
MachineBasicBlock &FMBB,
unsigned NumFCycles,
unsigned ExtraFCycles,
const BranchProbability &Probability) const {
return true;
}
bool
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
const BranchProbability &Probability)
const {
return true;
}
bool
R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const {
return false;
}
bool
R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
MachineOperand &MO = Cond[1];
switch (MO.getImm()) {
case OPCODE_IS_ZERO_INT:
MO.setImm(OPCODE_IS_NOT_ZERO_INT);
break;
case OPCODE_IS_NOT_ZERO_INT:
MO.setImm(OPCODE_IS_ZERO_INT);
break;
case OPCODE_IS_ZERO:
MO.setImm(OPCODE_IS_NOT_ZERO);
break;
case OPCODE_IS_NOT_ZERO:
MO.setImm(OPCODE_IS_ZERO);
break;
default:
return true;
}
MachineOperand &MO2 = Cond[2];
switch (MO2.getReg()) {
case AMDGPU::PRED_SEL_ZERO:
MO2.setReg(AMDGPU::PRED_SEL_ONE);
break;
case AMDGPU::PRED_SEL_ONE:
MO2.setReg(AMDGPU::PRED_SEL_ZERO);
break;
default:
return true;
}
return false;
}
bool
R600InstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
return isPredicateSetter(MI->getOpcode());
}
bool
R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const {
return false;
}
bool
R600InstrInfo::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
int PIdx = MI->findFirstPredOperandIdx();
if (MI->getOpcode() == AMDGPU::CF_ALU) {
MI->getOperand(8).setImm(0);
return true;
}
if (MI->getOpcode() == AMDGPU::DOT_4) {
MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
.setReg(Pred[2].getReg());
MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
.setReg(Pred[2].getReg());
MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
.setReg(Pred[2].getReg());
MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
.setReg(Pred[2].getReg());
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
return true;
}
if (PIdx != -1) {
MachineOperand &PMO = MI->getOperand(PIdx);
PMO.setReg(Pred[2].getReg());
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
return true;
}
return false;
}
unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const {
return 2;
}
unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
if (PredCost)
*PredCost = 2;
return 2;
}
void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
const AMDGPUFrameLowering *TFL =
static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
int End = getIndirectIndexEnd(MF);
if (End == -1)
return;
for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
Reserved.set(SuperReg);
for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
Reserved.set(Reg);
}
}
}
unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const {
// XXX: Remove when we support a stack width > 2
assert(Channel == 0);
return RegIndex;
}
const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
return &AMDGPU::R600_TReg32_XRegClass;
}
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const {
unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X, OffsetReg);
setImmOperand(MOVA, AMDGPU::OpName::write, 0);
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
AddrReg, ValueReg)
.addReg(AMDGPU::AR_X,
RegState::Implicit | RegState::Kill);
setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
return Mov;
}
MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const {
unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X,
OffsetReg);
setImmOperand(MOVA, AMDGPU::OpName::write, 0);
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
ValueReg,
AddrReg)
.addReg(AMDGPU::AR_X,
RegState::Implicit | RegState::Kill);
setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
return Mov;
}
unsigned R600InstrInfo::getMaxAlusPerClause() const {
return 115;
}
MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned Opcode,
unsigned DstReg,
unsigned Src0Reg,
unsigned Src1Reg) const {
MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
DstReg); // $dst
if (Src1Reg) {
MIB.addImm(0) // $update_exec_mask
.addImm(0); // $update_predicate
}
MIB.addImm(1) // $write
.addImm(0) // $omod
.addImm(0) // $dst_rel
.addImm(0) // $dst_clamp
.addReg(Src0Reg) // $src0
.addImm(0) // $src0_neg
.addImm(0) // $src0_rel
.addImm(0) // $src0_abs
.addImm(-1); // $src0_sel
if (Src1Reg) {
MIB.addReg(Src1Reg) // $src1
.addImm(0) // $src1_neg
.addImm(0) // $src1_rel
.addImm(0) // $src1_abs
.addImm(-1); // $src1_sel
}
//XXX: The r600g finalizer expects this to be 1, once we've moved the
//scheduling to the backend, we can change the default to 0.
MIB.addImm(1) // $last
.addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
.addImm(0) // $literal
.addImm(0); // $bank_swizzle
return MIB;
}
#define OPERAND_CASE(Label) \
case Label: { \
static const unsigned Ops[] = \
{ \
Label##_X, \
Label##_Y, \
Label##_Z, \
Label##_W \
}; \
return Ops[Slot]; \
}
static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
switch (Op) {
OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
OPERAND_CASE(AMDGPU::OpName::update_pred)
OPERAND_CASE(AMDGPU::OpName::write)
OPERAND_CASE(AMDGPU::OpName::omod)
OPERAND_CASE(AMDGPU::OpName::dst_rel)
OPERAND_CASE(AMDGPU::OpName::clamp)
OPERAND_CASE(AMDGPU::OpName::src0)
OPERAND_CASE(AMDGPU::OpName::src0_neg)
OPERAND_CASE(AMDGPU::OpName::src0_rel)
OPERAND_CASE(AMDGPU::OpName::src0_abs)
OPERAND_CASE(AMDGPU::OpName::src0_sel)
OPERAND_CASE(AMDGPU::OpName::src1)
OPERAND_CASE(AMDGPU::OpName::src1_neg)
OPERAND_CASE(AMDGPU::OpName::src1_rel)
OPERAND_CASE(AMDGPU::OpName::src1_abs)
OPERAND_CASE(AMDGPU::OpName::src1_sel)
OPERAND_CASE(AMDGPU::OpName::pred_sel)
default:
llvm_unreachable("Wrong Operand");
}
}
#undef OPERAND_CASE
MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
const {
assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
unsigned Opcode;
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::R700)
Opcode = AMDGPU::DOT4_r600;
else
Opcode = AMDGPU::DOT4_eg;
MachineBasicBlock::iterator I = MI;
MachineOperand &Src0 = MI->getOperand(
getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
MachineOperand &Src1 = MI->getOperand(
getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
MachineInstr *MIB = buildDefaultInstruction(
MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
static const unsigned Operands[14] = {
AMDGPU::OpName::update_exec_mask,
AMDGPU::OpName::update_pred,
AMDGPU::OpName::write,
AMDGPU::OpName::omod,
AMDGPU::OpName::dst_rel,
AMDGPU::OpName::clamp,
AMDGPU::OpName::src0_neg,
AMDGPU::OpName::src0_rel,
AMDGPU::OpName::src0_abs,
AMDGPU::OpName::src0_sel,
AMDGPU::OpName::src1_neg,
AMDGPU::OpName::src1_rel,
AMDGPU::OpName::src1_abs,
AMDGPU::OpName::src1_sel,
};
MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
.setReg(MO.getReg());
for (unsigned i = 0; i < 14; i++) {
MachineOperand &MO = MI->getOperand(
getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
assert (MO.isImm());
setImmOperand(MIB, Operands[i], MO.getImm());
}
MIB->getOperand(20).setImm(0);
return MIB;
}
MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
unsigned DstReg,
uint64_t Imm) const {
MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
AMDGPU::ALU_LITERAL_X);
setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
return MovImm;
}
MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg) const {
return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
}
int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
return getOperandIdx(MI.getOpcode(), Op);
}
int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
return AMDGPU::getNamedOperandIdx(Opcode, Op);
}
void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op,
int64_t Imm) const {
int Idx = getOperandIdx(*MI, Op);
assert(Idx != -1 && "Operand not supported for this instruction.");
assert(MI->getOperand(Idx).isImm());
MI->getOperand(Idx).setImm(Imm);
}
//===----------------------------------------------------------------------===//
// Instruction flag getters/setters
//===----------------------------------------------------------------------===//
bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
}
MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
unsigned Flag) const {
unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
int FlagIndex = 0;
if (Flag != 0) {
// If we pass something other than the default value of Flag to this
// function, it means we are want to set a flag on an instruction
// that uses native encoding.
assert(HAS_NATIVE_OPERANDS(TargetFlags));
bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
switch (Flag) {
case MO_FLAG_CLAMP:
FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
break;
case MO_FLAG_MASK:
FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
break;
case MO_FLAG_NOT_LAST:
case MO_FLAG_LAST:
FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
break;
case MO_FLAG_NEG:
switch (SrcIdx) {
case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
}
break;
case MO_FLAG_ABS:
assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
"instructions.");
(void)IsOP3;
switch (SrcIdx) {
case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
}
break;
default:
FlagIndex = -1;
break;
}
assert(FlagIndex != -1 && "Flag not supported for this instruction");
} else {
FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
assert(FlagIndex != 0 &&
"Instruction flags not supported for this instruction");
}
MachineOperand &FlagOp = MI->getOperand(FlagIndex);
assert(FlagOp.isImm());
return FlagOp;
}
void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
unsigned Flag) const {
unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
if (Flag == 0) {
return;
}
if (HAS_NATIVE_OPERANDS(TargetFlags)) {
MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
if (Flag == MO_FLAG_NOT_LAST) {
clearFlag(MI, Operand, MO_FLAG_LAST);
} else if (Flag == MO_FLAG_MASK) {
clearFlag(MI, Operand, Flag);
} else {
FlagOp.setImm(1);
}
} else {
MachineOperand &FlagOp = getFlagOp(MI, Operand);
FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
}
}
void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
unsigned Flag) const {
unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
if (HAS_NATIVE_OPERANDS(TargetFlags)) {
MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
FlagOp.setImm(0);
} else {
MachineOperand &FlagOp = getFlagOp(MI);
unsigned InstFlags = FlagOp.getImm();
InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
FlagOp.setImm(InstFlags);
}
}
Index: head/contrib/llvm/lib/Target/R600/R600Instructions.td
===================================================================
--- head/contrib/llvm/lib/Target/R600/R600Instructions.td (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/R600Instructions.td (revision 265925)
@@ -1,2396 +1,2396 @@
//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// R600 Tablegen instruction definitions
//
//===----------------------------------------------------------------------===//
include "R600Intrinsics.td"
include "R600InstrFormats.td"
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
InstR600 <outs, ins, asm, pattern, NullALU> {
let Namespace = "AMDGPU";
}
def MEMxi : Operand<iPTR> {
let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
let PrintMethod = "printMemOperand";
}
def MEMrr : Operand<iPTR> {
let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
}
// Operands for non-registers
class InstFlag<string PM = "printOperand", int Default = 0>
: OperandWithDefaultOps <i32, (ops (i32 Default))> {
let PrintMethod = PM;
}
// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
let PrintMethod = "printSel";
}
def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> {
let PrintMethod = "printBankSwizzle";
}
def LITERAL : InstFlag<"printLiteral">;
def WRITE : InstFlag <"printWrite", 1>;
def OMOD : InstFlag <"printOMOD">;
def REL : InstFlag <"printRel">;
def CLAMP : InstFlag <"printClamp">;
def NEG : InstFlag <"printNeg">;
def ABS : InstFlag <"printAbs">;
def UEM : InstFlag <"printUpdateExecMask">;
def UP : InstFlag <"printUpdatePred">;
// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
// Once we start using the packetizer in this backend we should have this
// default to 0.
def LAST : InstFlag<"printLast", 1>;
def RSel : Operand<i32> {
let PrintMethod = "printRSel";
}
def CT: Operand<i32> {
let PrintMethod = "printCT";
}
def FRAMEri : Operand<iPTR> {
let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
}
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
(ops PRED_SEL_OFF)>;
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
// Class for instructions with only one source register.
// If you add new ins to this instruction, make sure they are listed before
// $literal, because the backend currently assumes that the last operand is
// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in
// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
// and R600InstrInfo::getOperandIdx().
class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
InstR600 <(outs R600_Reg32:$dst),
(ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
BANK_SWIZZLE:$bank_swizzle),
!strconcat(" ", opName,
"$clamp $last $dst$write$dst_rel$omod, "
"$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
"$pred_sel $bank_swizzle"),
pattern,
itin>,
R600ALU_Word0,
R600ALU_Word1_OP2 <inst> {
let src1 = 0;
let src1_rel = 0;
let src1_neg = 0;
let src1_abs = 0;
let update_exec_mask = 0;
let update_pred = 0;
let HasNativeOperands = 1;
let Op1 = 1;
let ALUInst = 1;
let DisableEncoding = "$literal";
let UseNamedOperandTable = 1;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
}
class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
InstrItinClass itin = AnyALU> :
R600_1OP <inst, opName,
[(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
>;
// If you add or change the operands for R600_2OP instructions, you must
// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
InstR600 <(outs R600_Reg32:$dst),
(ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
BANK_SWIZZLE:$bank_swizzle),
!strconcat(" ", opName,
"$clamp $last $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
"$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
"$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
"$pred_sel $bank_swizzle"),
pattern,
itin>,
R600ALU_Word0,
R600ALU_Word1_OP2 <inst> {
let HasNativeOperands = 1;
let Op2 = 1;
let ALUInst = 1;
let DisableEncoding = "$literal";
let UseNamedOperandTable = 1;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
}
class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
InstrItinClass itim = AnyALU> :
R600_2OP <inst, opName,
[(set R600_Reg32:$dst, (node R600_Reg32:$src0,
R600_Reg32:$src1))]
>;
// If you add our change the operands for R600_3OP instructions, you must
// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
// R600InstrInfo::buildDefaultInstruction(), and
// R600InstrInfo::getOperandIdx().
class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
InstR600 <(outs R600_Reg32:$dst),
(ins REL:$dst_rel, CLAMP:$clamp,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
BANK_SWIZZLE:$bank_swizzle),
!strconcat(" ", opName, "$clamp $last $dst$dst_rel, "
"$src0_neg$src0$src0_rel, "
"$src1_neg$src1$src1_rel, "
"$src2_neg$src2$src2_rel, "
"$pred_sel"
"$bank_swizzle"),
pattern,
itin>,
R600ALU_Word0,
R600ALU_Word1_OP3<inst>{
let HasNativeOperands = 1;
let DisableEncoding = "$literal";
let Op3 = 1;
let UseNamedOperandTable = 1;
let ALUInst = 1;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
}
class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
InstrItinClass itin = VecALU> :
InstR600 <(outs R600_Reg32:$dst),
ins,
asm,
pattern,
itin>;
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
def TEX_SHADOW : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
}]
>;
def TEX_RECT : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 5;
}]
>;
def TEX_ARRAY : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 9 || TType == 10 || TType == 16;
}]
>;
def TEX_SHADOW_ARRAY : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 11 || TType == 12 || TType == 17;
}]
>;
def TEX_MSAA : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 14;
}]
>;
def TEX_ARRAY_MSAA : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 15;
}]
>;
class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
dag outs, dag ins, string asm, list<dag> pattern> :
InstR600ISA <outs, ins, asm, pattern>,
CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF {
let rat_id = ratid;
let rat_inst = ratinst;
let rim = 0;
// XXX: Have a separate instruction for non-indexed writes.
let type = 1;
let rw_rel = 0;
let elem_size = 0;
let array_size = 0;
let comp_mask = mask;
let burst_count = 0;
let vpm = 0;
let cf_inst = cfinst;
let mark = 0;
let barrier = 1;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
let IsExport = 1;
}
class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
: InstR600ISA <outs, (ins MEMxi:$src_gpr), name, pattern>,
VTX_WORD1_GPR {
// Static fields
let DST_REL = 0;
// The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
// FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
// however, based on my testing if USE_CONST_FIELDS is set, then all
// these fields need to be set to 0.
let USE_CONST_FIELDS = 0;
let NUM_FORMAT_ALL = 1;
let FORMAT_COMP_ALL = 0;
let SRF_MODE_ALL = 0;
let Inst{63-32} = Word1;
// LLVM can only encode 64-bit instructions, so these fields are manually
// encoded in R600CodeEmitter
//
// bits<16> OFFSET;
// bits<2> ENDIAN_SWAP = 0;
// bits<1> CONST_BUF_NO_STRIDE = 0;
// bits<1> MEGA_FETCH = 0;
// bits<1> ALT_CONST = 0;
// bits<2> BUFFER_INDEX_MODE = 0;
// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
// is done in R600CodeEmitter
//
// Inst{79-64} = OFFSET;
// Inst{81-80} = ENDIAN_SWAP;
// Inst{82} = CONST_BUF_NO_STRIDE;
// Inst{83} = MEGA_FETCH;
// Inst{84} = ALT_CONST;
// Inst{86-85} = BUFFER_INDEX_MODE;
// Inst{95-86} = 0; Reserved
// VTX_WORD3 (Padding)
//
// Inst{127-96} = 0;
let VTXInst = 1;
}
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
[{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }]
>;
def load_param : LoadParamFrag<load>;
def load_param_exti8 : LoadParamFrag<az_extloadi8>;
def load_param_exti16 : LoadParamFrag<az_extloadi16>;
def isR600 : Predicate<"Subtarget.getGeneration() <= AMDGPUSubtarget::R700">;
def isR700 : Predicate<"Subtarget.getGeneration() == AMDGPUSubtarget::R700">;
def isEG : Predicate<
"Subtarget.getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
"Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && "
"!Subtarget.hasCaymanISA()">;
def isCayman : Predicate<"Subtarget.hasCaymanISA()">;
def isEGorCayman : Predicate<"Subtarget.getGeneration() == "
"AMDGPUSubtarget::EVERGREEN"
"|| Subtarget.getGeneration() =="
"AMDGPUSubtarget::NORTHERN_ISLANDS">;
def isR600toCayman : Predicate<
"Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
//===----------------------------------------------------------------------===//
// R600 SDNodes
//===----------------------------------------------------------------------===//
def INTERP_PAIR_XY : AMDGPUShaderInst <
(outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
(ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
"INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
[]>;
def INTERP_PAIR_ZW : AMDGPUShaderInst <
(outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
(ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
"INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
[]>;
def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
[SDNPVariadic]
>;
def DOT4 : SDNode<"AMDGPUISD::DOT4",
SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>,
SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>,
SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>,
[]
>;
def COS_HW : SDNode<"AMDGPUISD::COS_HW",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
>;
def SIN_HW : SDNode<"AMDGPUISD::SIN_HW",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
>;
def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> {
def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR,
(i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw),
(i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz),
(i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z),
(i32 imm:$DST_SEL_W),
(i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID),
(i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z),
(i32 imm:$COORD_TYPE_W)),
(inst R600_Reg128:$SRC_GPR,
imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw,
imm:$offsetx, imm:$offsety, imm:$offsetz,
imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z,
imm:$DST_SEL_W,
imm:$RESOURCE_ID, imm:$SAMPLER_ID,
imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z,
imm:$COORD_TYPE_W)>;
}
//===----------------------------------------------------------------------===//
// Interpolation Instructions
//===----------------------------------------------------------------------===//
def INTERP_VEC_LOAD : AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins i32imm:$src0),
"INTERP_LOAD $src0 : $dst",
[(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>;
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
let bank_swizzle = 5;
}
def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
let bank_swizzle = 5;
}
def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
//===----------------------------------------------------------------------===//
// Export Instructions
//===----------------------------------------------------------------------===//
def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
[SDNPHasChain, SDNPSideEffect]>;
class ExportWord0 {
field bits<32> Word0;
bits<13> arraybase;
bits<2> type;
bits<7> gpr;
bits<2> elem_size;
let Word0{12-0} = arraybase;
let Word0{14-13} = type;
let Word0{21-15} = gpr;
let Word0{22} = 0; // RW_REL
let Word0{29-23} = 0; // INDEX_GPR
let Word0{31-30} = elem_size;
}
class ExportSwzWord1 {
field bits<32> Word1;
bits<3> sw_x;
bits<3> sw_y;
bits<3> sw_z;
bits<3> sw_w;
bits<1> eop;
bits<8> inst;
let Word1{2-0} = sw_x;
let Word1{5-3} = sw_y;
let Word1{8-6} = sw_z;
let Word1{11-9} = sw_w;
}
class ExportBufWord1 {
field bits<32> Word1;
bits<12> arraySize;
bits<4> compMask;
bits<1> eop;
bits<8> inst;
let Word1{11-0} = arraySize;
let Word1{15-12} = compMask;
}
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
(ExportInst
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 0, 7, 7, 7, cf_inst, 0)
>;
def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
(ExportInst
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 7, 0, 7, 7, cf_inst, 0)
>;
def : Pat<(int_R600_store_dummy (i32 imm:$type)),
(ExportInst
(v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
>;
def : Pat<(int_R600_store_dummy 1),
(ExportInst
(v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
>;
def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
(i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
(ExportInst R600_Reg128:$src, imm:$type, imm:$base,
imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
>;
}
multiclass SteamOutputExportPattern<Instruction ExportInst,
bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
// Stream0
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
(ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf0inst, 0)>;
// Stream1
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
(ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf1inst, 0)>;
// Stream2
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
(ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf2inst, 0)>;
// Stream3
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
(ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf3inst, 0)>;
}
// Export Instructions should not be duplicated by TailDuplication pass
// (which assumes that duplicable instruction are affected by exec mask)
let usesCustomInserter = 1, isNotDuplicable = 1 in {
class ExportSwzInst : InstR600ISA<(
outs),
(ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
RSel:$sw_x, RSel:$sw_y, RSel:$sw_z, RSel:$sw_w, i32imm:$inst,
i32imm:$eop),
!strconcat("EXPORT", " $gpr.$sw_x$sw_y$sw_z$sw_w"),
[]>, ExportWord0, ExportSwzWord1 {
let elem_size = 3;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
let IsExport = 1;
}
} // End usesCustomInserter = 1
class ExportBufInst : InstR600ISA<(
outs),
(ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
!strconcat("EXPORT", " $gpr"),
[]>, ExportWord0, ExportBufWord1 {
let elem_size = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
let IsExport = 1;
}
//===----------------------------------------------------------------------===//
// Control Flow Instructions
//===----------------------------------------------------------------------===//
def KCACHE : InstFlag<"printKCache">;
class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
i32imm:$COUNT, i32imm:$Enabled),
!strconcat(OpName, " $COUNT, @$ADDR, "
"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
field bits<64> Inst;
let CF_INST = inst;
let ALT_CONST = 0;
let WHOLE_QUAD_MODE = 0;
let BARRIER = 1;
let UseNamedOperandTable = 1;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
}
class CF_WORD0_R600 {
field bits<32> Word0;
bits<32> ADDR;
let Word0 = ADDR;
}
class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
field bits<64> Inst;
bits<4> CNT;
let CF_INST = inst;
let BARRIER = 1;
let CF_CONST = 0;
let VALID_PIXEL_MODE = 0;
let COND = 0;
let COUNT = CNT{2-0};
let CALL_COUNT = 0;
let COUNT_3 = CNT{3};
let END_OF_PROGRAM = 0;
let WHOLE_QUAD_MODE = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
}
class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
field bits<64> Inst;
let CF_INST = inst;
let BARRIER = 1;
let JUMPTABLE_SEL = 0;
let CF_CONST = 0;
let VALID_PIXEL_MODE = 0;
let COND = 0;
let END_OF_PROGRAM = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
}
def CF_ALU : ALU_CLAUSE<8, "ALU">;
def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
def FETCH_CLAUSE : AMDGPUInst <(outs),
(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
field bits<8> Inst;
bits<8> num;
let Inst = num;
}
def ALU_CLAUSE : AMDGPUInst <(outs),
(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
field bits<8> Inst;
bits<8> num;
let Inst = num;
}
def LITERALS : AMDGPUInst <(outs),
(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
field bits<64> Inst;
bits<32> literal1;
bits<32> literal2;
let Inst{31-0} = literal1;
let Inst{63-32} = literal2;
}
def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
field bits<64> Inst;
}
let Predicates = [isR600toCayman] in {
//===----------------------------------------------------------------------===//
// Common Instructions R600, R700, Evergreen, Cayman
//===----------------------------------------------------------------------===//
def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
// Non-IEEE MUL: 0 * anything = 0
def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
// so some of the instruction names don't match the asm string.
// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
def SETE : R600_2OP <
0x08, "SETE",
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
>;
def SGT : R600_2OP <
0x09, "SETGT",
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
>;
def SGE : R600_2OP <
0xA, "SETGE",
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
>;
def SNE : R600_2OP <
0xB, "SETNE",
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))]
>;
def SETE_DX10 : R600_2OP <
0xC, "SETE_DX10",
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))]
>;
def SETGT_DX10 : R600_2OP <
0xD, "SETGT_DX10",
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))]
>;
def SETGE_DX10 : R600_2OP <
0xE, "SETGE_DX10",
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
>;
def SETNE_DX10 : R600_2OP <
0xF, "SETNE_DX10",
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))]
>;
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
def MOV : R600_1OP <0x19, "MOV", []>;
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
(outs R600_Reg32:$dst),
(ins immType:$imm),
"",
[]
>;
} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
def : Pat <
(imm:$val),
(MOV_IMM_I32 imm:$val)
>;
def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
def : Pat <
(fpimm:$val),
(MOV_IMM_F32 fpimm:$val)
>;
def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
let hasSideEffects = 1 in {
def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
} // end hasSideEffects
def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
def SETE_INT : R600_2OP <
0x3A, "SETE_INT",
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
>;
def SETGT_INT : R600_2OP <
0x3B, "SETGT_INT",
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
>;
def SETGE_INT : R600_2OP <
0x3C, "SETGE_INT",
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
>;
def SETNE_INT : R600_2OP <
0x3D, "SETNE_INT",
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
>;
def SETGT_UINT : R600_2OP <
0x3E, "SETGT_UINT",
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
>;
def SETGE_UINT : R600_2OP <
0x3F, "SETGE_UINT",
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
>;
def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
def CNDE_INT : R600_3OP <
0x1C, "CNDE_INT",
[(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
>;
def CNDGE_INT : R600_3OP <
0x1E, "CNDGE_INT",
[(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))]
>;
def CNDGT_INT : R600_3OP <
0x1D, "CNDGT_INT",
[(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))]
>;
//===----------------------------------------------------------------------===//
// Texture instructions
//===----------------------------------------------------------------------===//
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
class R600_TEX <bits<11> inst, string opName> :
InstR600 <(outs R600_Reg128:$DST_GPR),
(ins R600_Reg128:$SRC_GPR,
RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw,
i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz,
RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W,
i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z,
CT:$COORD_TYPE_W),
!strconcat(opName,
" $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, "
"$SRC_GPR.$srcx$srcy$srcz$srcw "
"RID:$RESOURCE_ID SID:$SAMPLER_ID "
"CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"),
[],
NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
let TEX_INST = inst{4-0};
let SRC_REL = 0;
let DST_REL = 0;
let LOD_BIAS = 0;
let INST_MOD = 0;
let FETCH_WHOLE_QUAD = 0;
let ALT_CONST = 0;
let SAMPLER_INDEX_MODE = 0;
let RESOURCE_INDEX_MODE = 0;
let TEXInst = 1;
}
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">;
def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">;
def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">;
def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">;
def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">;
def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">;
def TEX_LD : R600_TEX <0x03, "TEX_LD">;
def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> {
let INST_MOD = 1;
}
def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">;
def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">;
def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">;
def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">;
def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">;
def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">;
def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">;
defm : TexPattern<0, TEX_SAMPLE>;
defm : TexPattern<1, TEX_SAMPLE_C>;
defm : TexPattern<2, TEX_SAMPLE_L>;
defm : TexPattern<3, TEX_SAMPLE_C_L>;
defm : TexPattern<4, TEX_SAMPLE_LB>;
defm : TexPattern<5, TEX_SAMPLE_C_LB>;
defm : TexPattern<6, TEX_LD, v4i32>;
defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>;
defm : TexPattern<8, TEX_GET_GRADIENTS_H>;
defm : TexPattern<9, TEX_GET_GRADIENTS_V>;
defm : TexPattern<10, TEX_LDPTR, v4i32>;
//===----------------------------------------------------------------------===//
// Helper classes for common instructions
//===----------------------------------------------------------------------===//
class MUL_LIT_Common <bits<5> inst> : R600_3OP <
inst, "MUL_LIT",
[]
>;
class MULADD_Common <bits<5> inst> : R600_3OP <
inst, "MULADD",
[]
>;
class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
inst, "MULADD_IEEE",
[(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
>;
class CNDE_Common <bits<5> inst> : R600_3OP <
inst, "CNDE",
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
>;
class CNDGT_Common <bits<5> inst> : R600_3OP <
inst, "CNDGT",
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
> {
let Itinerary = VecALU;
}
class CNDGE_Common <bits<5> inst> : R600_3OP <
inst, "CNDGE",
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
> {
let Itinerary = VecALU;
}
let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins
// Slot X
UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X,
OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X,
R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X,
R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X,
R600_Pred:$pred_sel_X,
// Slot Y
UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y,
OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y,
R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y,
R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y,
R600_Pred:$pred_sel_Y,
// Slot Z
UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z,
OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z,
R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z,
R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z,
R600_Pred:$pred_sel_Z,
// Slot W
UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W,
OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W,
R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W,
R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W,
R600_Pred:$pred_sel_W,
LITERAL:$literal0, LITERAL:$literal1),
"",
pattern,
AnyALU> {
let UseNamedOperandTable = 1;
}
}
def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4
R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X,
R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y,
R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z,
R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>;
class DOT4_Common <bits<11> inst> : R600_2OP <inst, "DOT4", []>;
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
multiclass CUBE_Common <bits<11> inst> {
def _pseudo : InstR600 <
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src0),
"CUBE $dst $src0",
[(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))],
VecALU
> {
let isPseudo = 1;
let UseNamedOperandTable = 1;
}
def _real : R600_2OP <inst, "CUBE", []>;
}
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
inst, "EXP_IEEE", fexp2
> {
let Itinerary = TransALU;
}
class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "FLT_TO_INT", fp_to_sint
> {
let Itinerary = TransALU;
}
class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "INT_TO_FLT", sint_to_fp
> {
let Itinerary = TransALU;
}
class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "FLT_TO_UINT", fp_to_uint
> {
let Itinerary = TransALU;
}
class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "UINT_TO_FLT", uint_to_fp
> {
let Itinerary = TransALU;
}
class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
inst, "LOG_CLAMPED", []
>;
class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
inst, "LOG_IEEE", flog2
> {
let Itinerary = TransALU;
}
class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULHI_INT", mulhs
> {
let Itinerary = TransALU;
}
class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULHI", mulhu
> {
let Itinerary = TransALU;
}
class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULLO_INT", mul
> {
let Itinerary = TransALU;
}
class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
let Itinerary = TransALU;
}
class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
inst, "RECIP_CLAMPED", []
> {
let Itinerary = TransALU;
}
class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
> {
let Itinerary = TransALU;
}
class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "RECIP_UINT", AMDGPUurecip
> {
let Itinerary = TransALU;
}
class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
> {
let Itinerary = TransALU;
}
class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
inst, "RECIPSQRT_IEEE", []
> {
let Itinerary = TransALU;
}
class SIN_Common <bits<11> inst> : R600_1OP <
inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
let Trig = 1;
let Itinerary = TransALU;
}
class COS_Common <bits<11> inst> : R600_1OP <
inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> {
let Trig = 1;
let Itinerary = TransALU;
}
def CLAMP_R600 : CLAMP <R600_Reg32>;
def FABS_R600 : FABS<R600_Reg32>;
def FNEG_R600 : FNEG<R600_Reg32>;
//===----------------------------------------------------------------------===//
// Helper patterns for complex intrinsics
//===----------------------------------------------------------------------===//
multiclass DIV_Common <InstR600 recip_ieee> {
def : Pat<
(int_AMDGPU_div f32:$src0, f32:$src1),
(MUL_IEEE $src0, (recip_ieee $src1))
>;
def : Pat<
(fdiv f32:$src0, f32:$src1),
(MUL_IEEE $src0, (recip_ieee $src1))
>;
}
class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
: Pat <
(int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w),
(exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
>;
// FROUND pattern
class FROUNDPat<Instruction CNDGE> : Pat <
(AMDGPUround f32:$x),
(CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
>;
//===----------------------------------------------------------------------===//
// R600 / R700 Instructions
//===----------------------------------------------------------------------===//
let Predicates = [isR600] in {
def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
def MULADD_r600 : MULADD_Common<0x10>;
def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
def CNDE_r600 : CNDE_Common<0x18>;
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
def DOT4_r600 : DOT4_Common<0x50>;
defm CUBE_r600 : CUBE_Common<0x52>;
def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
def SIN_r600 : SIN_Common<0x6E>;
def COS_r600 : COS_Common<0x6F>;
def ASHR_r600 : ASHR_Common<0x70>;
def LSHR_r600 : LSHR_Common<0x71>;
def LSHL_r600 : LSHL_Common<0x72>;
def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
def : FROUNDPat <CNDGE_r600>;
def R600_ExportSwz : ExportSwzInst {
let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
let Word1{22} = 0; // VALID_PIXEL_MODE
let Word1{30-23} = inst;
let Word1{31} = 1; // BARRIER
}
defm : ExportPattern<R600_ExportSwz, 39>;
def R600_ExportBuf : ExportBufInst {
let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
let Word1{22} = 0; // VALID_PIXEL_MODE
let Word1{30-23} = inst;
let Word1{31} = 1; // BARRIER
}
defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT),
"TEX $CNT @$ADDR"> {
let POP_COUNT = 0;
}
def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT),
"VTX $CNT @$ADDR"> {
let POP_COUNT = 0;
}
def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
"LOOP_START_DX10 @$ADDR"> {
let POP_COUNT = 0;
let CNT = 0;
}
def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
let POP_COUNT = 0;
let CNT = 0;
}
def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
"LOOP_BREAK @$ADDR"> {
let POP_COUNT = 0;
let CNT = 0;
}
def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
"CONTINUE @$ADDR"> {
let POP_COUNT = 0;
let CNT = 0;
}
def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"JUMP @$ADDR POP:$POP_COUNT"> {
let CNT = 0;
}
def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"ELSE @$ADDR POP:$POP_COUNT"> {
let CNT = 0;
}
def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
let ADDR = 0;
let CNT = 0;
let POP_COUNT = 0;
}
def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"POP @$ADDR POP:$POP_COUNT"> {
let CNT = 0;
}
def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
let CNT = 0;
let POP_COUNT = 0;
let ADDR = 0;
let END_OF_PROGRAM = 1;
}
}
//===----------------------------------------------------------------------===//
// R700 Only instructions
//===----------------------------------------------------------------------===//
let Predicates = [isR700] in {
def SIN_r700 : SIN_Common<0x6E>;
def COS_r700 : COS_Common<0x6F>;
}
//===----------------------------------------------------------------------===//
// Evergreen / Cayman store instructions
//===----------------------------------------------------------------------===//
let Predicates = [isEGorCayman] in {
class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins,
string name, list<dag> pattern>
: EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins,
"MEM_RAT_CACHELESS "#name, pattern>;
class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
list<dag> pattern>
: EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
"MEM_RAT "#name, pattern>;
def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
"MSKOR $rw_gpr.XW, $index_gpr",
[(mskor_global v4i32:$rw_gpr, i32:$index_gpr)]
> {
let eop = 0;
}
} // End Predicates = [isEGorCayman]
//===----------------------------------------------------------------------===//
// Evergreen Only instructions
//===----------------------------------------------------------------------===//
let Predicates = [isEG] in {
def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
def MULHI_INT_eg : MULHI_INT_Common<0x90>;
def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
let usesCustomInserter = 1 in {
// 32-bit store
def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1,
(ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
"STORE_RAW $rw_gpr, $index_gpr, $eop",
[(global_store i32:$rw_gpr, i32:$index_gpr)]
>;
// 64-bit store
def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3,
(ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
"STORE_RAW $rw_gpr.XY, $index_gpr, $eop",
[(global_store v2i32:$rw_gpr, i32:$index_gpr)]
>;
//128-bit store
def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf,
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
"STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop",
[(global_store v4i32:$rw_gpr, i32:$index_gpr)]
>;
} // End usesCustomInserter = 1
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
: VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> {
// Static fields
let VC_INST = 0;
let FETCH_TYPE = 2;
let FETCH_WHOLE_QUAD = 0;
let BUFFER_ID = buffer_id;
let SRC_REL = 0;
// XXX: We can infer this field based on the SRC_GPR. This would allow us
// to store vertex addresses in any channel, not just X.
let SRC_SEL_X = 0;
let Inst{31-0} = Word0;
}
class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
: VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
(outs R600_TReg32_X:$dst_gpr), pattern> {
let MEGA_FETCH_COUNT = 1;
let DST_SEL_X = 0;
let DST_SEL_Y = 7; // Masked
let DST_SEL_Z = 7; // Masked
let DST_SEL_W = 7; // Masked
let DATA_FORMAT = 1; // FMT_8
}
class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
: VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
(outs R600_TReg32_X:$dst_gpr), pattern> {
let MEGA_FETCH_COUNT = 2;
let DST_SEL_X = 0;
let DST_SEL_Y = 7; // Masked
let DST_SEL_Z = 7; // Masked
let DST_SEL_W = 7; // Masked
let DATA_FORMAT = 5; // FMT_16
}
class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
: VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
(outs R600_TReg32_X:$dst_gpr), pattern> {
let MEGA_FETCH_COUNT = 4;
let DST_SEL_X = 0;
let DST_SEL_Y = 7; // Masked
let DST_SEL_Z = 7; // Masked
let DST_SEL_W = 7; // Masked
let DATA_FORMAT = 0xD; // COLOR_32
// This is not really necessary, but there were some GPU hangs that appeared
// to be caused by ALU instructions in the next instruction group that wrote
// to the $src_gpr registers of the VTX_READ.
// e.g.
// %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
// %T2_X<def> = MOV %ZERO
//Adding this constraint prevents this from happening.
let Constraints = "$src_gpr.ptr = $dst_gpr";
}
class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern>
: VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id,
(outs R600_Reg64:$dst_gpr), pattern> {
let MEGA_FETCH_COUNT = 8;
let DST_SEL_X = 0;
let DST_SEL_Y = 1;
let DST_SEL_Z = 7;
let DST_SEL_W = 7;
let DATA_FORMAT = 0x1D; // COLOR_32_32
}
class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
: VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
(outs R600_Reg128:$dst_gpr), pattern> {
let MEGA_FETCH_COUNT = 16;
let DST_SEL_X = 0;
let DST_SEL_Y = 1;
let DST_SEL_Z = 2;
let DST_SEL_W = 3;
let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
// XXX: Need to force VTX_READ_128 instructions to write to the same register
// that holds its buffer address to avoid potential hangs. We can't use
// the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
// registers are different sizes.
}
//===----------------------------------------------------------------------===//
// VTX Read from parameter memory space
//===----------------------------------------------------------------------===//
def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
[(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
>;
def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
[(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
>;
def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
[(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0,
[(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
[(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
//===----------------------------------------------------------------------===//
// VTX Read from global memory space
//===----------------------------------------------------------------------===//
// 8-bit reads
def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
[(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))]
>;
def VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1,
[(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))]
>;
// 32-bit reads
def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
[(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
>;
// 64-bit reads
def VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1,
[(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
>;
// 128-bit reads
def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
[(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
>;
} // End Predicates = [isEG]
//===----------------------------------------------------------------------===//
// Evergreen / Cayman Instructions
//===----------------------------------------------------------------------===//
let Predicates = [isEGorCayman] in {
// BFE_UINT - bit_extract, an optimization for mask and shift
// Src0 = Input
// Src1 = Offset
// Src2 = Width
//
// bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
//
// Example Usage:
// (Offset, Width)
//
// (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
// (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
// (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
// (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
[(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1,
i32:$src2))],
VecALU
>;
- def : BFEPattern <BFE_UINT_eg>;
+// XXX: This pattern is broken, disabling for now. See comment in
+// AMDGPUInstructions.td for more info.
+// def : BFEPattern <BFE_UINT_eg>;
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
defm : BFIPatterns <BFI_INT_eg>;
def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
[(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU
>;
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
def : ROTRPattern <BIT_ALIGN_INT_eg>;
def MULADD_eg : MULADD_Common<0x14>;
def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
def ASHR_eg : ASHR_Common<0x15>;
def LSHR_eg : LSHR_Common<0x16>;
def LSHL_eg : LSHL_Common<0x17>;
def CNDE_eg : CNDE_Common<0x19>;
def CNDGT_eg : CNDGT_Common<0x1A>;
def CNDGE_eg : CNDGE_Common<0x1B>;
def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24",
[(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU
>;
def DOT4_eg : DOT4_Common<0xBE>;
defm CUBE_eg : CUBE_Common<0xC0>;
let hasSideEffects = 1 in {
def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
}
def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
let Pattern = [];
let Itinerary = AnyALU;
}
def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
let Pattern = [];
}
def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
def GROUP_BARRIER : InstR600 <
(outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>,
R600ALU_Word0,
R600ALU_Word1_OP2 <0x54> {
let dst = 0;
let dst_rel = 0;
let src0 = 0;
let src0_rel = 0;
let src0_neg = 0;
let src0_abs = 0;
let src1 = 0;
let src1_rel = 0;
let src1_neg = 0;
let src1_abs = 0;
let write = 0;
let omod = 0;
let clamp = 0;
let last = 1;
let bank_swizzle = 0;
let pred_sel = 0;
let update_exec_mask = 0;
let update_pred = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
let ALUInst = 1;
}
//===----------------------------------------------------------------------===//
// LDS Instructions
//===----------------------------------------------------------------------===//
class R600_LDS <bits<6> op, dag outs, dag ins, string asm,
list<dag> pattern = []> :
InstR600 <outs, ins, asm, pattern, XALU>,
R600_ALU_LDS_Word0,
R600LDS_Word1 {
bits<6> offset = 0;
let lds_op = op;
let Word1{27} = offset{0};
let Word1{12} = offset{1};
let Word1{28} = offset{2};
let Word1{31} = offset{3};
let Word0{12} = offset{4};
let Word0{25} = offset{5};
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
let ALUInst = 1;
let HasNativeOperands = 1;
let UseNamedOperandTable = 1;
}
class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
lds_op,
(outs R600_Reg32:$dst),
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
LAST:$last, R600_Pred:$pred_sel,
BANK_SWIZZLE:$bank_swizzle),
" "#name#" $last OQAP, $src0$src0_rel $pred_sel",
pattern
> {
let src1 = 0;
let src1_rel = 0;
let src2 = 0;
let src2_rel = 0;
- let Defs = [OQAP];
let usesCustomInserter = 1;
let LDS_1A = 1;
let DisableEncoding = "$dst";
}
class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
string dst =""> :
R600_LDS <
lds_op, outs,
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel,
BANK_SWIZZLE:$bank_swizzle),
" "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel",
pattern
> {
field string BaseOp;
let src2 = 0;
let src2_rel = 0;
let LDS_1A1D = 1;
}
class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
R600_LDS_1A1D <lds_op, (outs), name, pattern> {
let BaseOp = name;
}
class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> {
let BaseOp = name;
let usesCustomInserter = 1;
let DisableEncoding = "$dst";
- let Defs = [OQAP];
}
class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
R600_LDS <
lds_op,
(outs),
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel,
LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle),
" "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel",
pattern> {
let LDS_1A2D = 1;
}
def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >;
def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
[(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
>;
def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE",
[(truncstorei8_local i32:$src1, i32:$src0)]
>;
def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE",
[(truncstorei16_local i32:$src1, i32:$src0)]
>;
def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
[(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))]
>;
def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
[(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
>;
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
>;
def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET",
[(set i32:$dst, (sextloadi8_local i32:$src0))]
>;
def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET",
[(set i32:$dst, (az_extloadi8_local i32:$src0))]
>;
def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET",
[(set i32:$dst, (sextloadi16_local i32:$src0))]
>;
def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET",
[(set i32:$dst, (az_extloadi16_local i32:$src0))]
>;
// TRUNC is used for the FLT_TO_INT instructions to work around a
// perceived problem where the rounding modes are applied differently
// depending on the instruction and the slot they are in.
// See:
// https://bugs.freedesktop.org/show_bug.cgi?id=50232
// Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
//
// XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
// which do not need to be truncated since the fp values are 0.0f or 1.0f.
// We should look into handling these cases separately.
def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
// SHA-256 Patterns
def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
def : FROUNDPat <CNDGE_eg>;
def EG_ExportSwz : ExportSwzInst {
let Word1{19-16} = 0; // BURST_COUNT
let Word1{20} = 0; // VALID_PIXEL_MODE
let Word1{21} = eop;
let Word1{29-22} = inst;
let Word1{30} = 0; // MARK
let Word1{31} = 1; // BARRIER
}
defm : ExportPattern<EG_ExportSwz, 83>;
def EG_ExportBuf : ExportBufInst {
let Word1{19-16} = 0; // BURST_COUNT
let Word1{20} = 0; // VALID_PIXEL_MODE
let Word1{21} = eop;
let Word1{29-22} = inst;
let Word1{30} = 0; // MARK
let Word1{31} = 1; // BARRIER
}
defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
"TEX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
"VTX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
"LOOP_START_DX10 @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
"LOOP_BREAK @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
"CONTINUE @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"JUMP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"ELSE @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
let ADDR = 0;
let COUNT = 0;
let POP_COUNT = 0;
}
def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"POP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> {
let COUNT = 0;
let POP_COUNT = 0;
let ADDR = 0;
let END_OF_PROGRAM = 1;
}
} // End Predicates = [isEGorCayman]
//===----------------------------------------------------------------------===//
// Regist loads and stores - for indirect addressing
//===----------------------------------------------------------------------===//
defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
//===----------------------------------------------------------------------===//
// Cayman Instructions
//===----------------------------------------------------------------------===//
let Predicates = [isCayman] in {
def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24",
[(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))], VecALU
>;
def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24",
[(set i32:$dst, (mul I24:$src0, I24:$src1))], VecALU
>;
let isVector = 1 in {
def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
def MULHI_INT_cm : MULHI_INT_Common<0x90>;
def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
// RECIP_UINT emulation for Cayman
// The multiplication scales from [0,1] to the unsigned integer range
def : Pat <
(AMDGPUurecip i32:$src0),
(FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
(MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
>;
def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
let ADDR = 0;
let POP_COUNT = 0;
let COUNT = 0;
}
def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
CF_MEM_RAT_CACHELESS <0x14, 0, mask,
(ins rc:$rw_gpr, R600_TReg32_X:$index_gpr),
"STORE_DWORD $rw_gpr, $index_gpr",
[(global_store vt:$rw_gpr, i32:$index_gpr)]> {
let eop = 0; // This bit is not used on Cayman.
}
def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>;
def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>;
def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>;
class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
: VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
// Static fields
let VC_INST = 0;
let FETCH_TYPE = 2;
let FETCH_WHOLE_QUAD = 0;
let BUFFER_ID = buffer_id;
let SRC_REL = 0;
// XXX: We can infer this field based on the SRC_GPR. This would allow us
// to store vertex addresses in any channel, not just X.
let SRC_SEL_X = 0;
let SRC_SEL_Y = 0;
let STRUCTURED_READ = 0;
let LDS_REQ = 0;
let COALESCED_READ = 0;
let Inst{31-0} = Word0;
}
class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern>
: VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
(outs R600_TReg32_X:$dst_gpr), pattern> {
let DST_SEL_X = 0;
let DST_SEL_Y = 7; // Masked
let DST_SEL_Z = 7; // Masked
let DST_SEL_W = 7; // Masked
let DATA_FORMAT = 1; // FMT_8
}
class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern>
: VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
(outs R600_TReg32_X:$dst_gpr), pattern> {
let DST_SEL_X = 0;
let DST_SEL_Y = 7; // Masked
let DST_SEL_Z = 7; // Masked
let DST_SEL_W = 7; // Masked
let DATA_FORMAT = 5; // FMT_16
}
class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern>
: VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
(outs R600_TReg32_X:$dst_gpr), pattern> {
let DST_SEL_X = 0;
let DST_SEL_Y = 7; // Masked
let DST_SEL_Z = 7; // Masked
let DST_SEL_W = 7; // Masked
let DATA_FORMAT = 0xD; // COLOR_32
// This is not really necessary, but there were some GPU hangs that appeared
// to be caused by ALU instructions in the next instruction group that wrote
// to the $src_gpr registers of the VTX_READ.
// e.g.
// %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
// %T2_X<def> = MOV %ZERO
//Adding this constraint prevents this from happening.
let Constraints = "$src_gpr.ptr = $dst_gpr";
}
class VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern>
: VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id,
(outs R600_Reg64:$dst_gpr), pattern> {
let DST_SEL_X = 0;
let DST_SEL_Y = 1;
let DST_SEL_Z = 7;
let DST_SEL_W = 7;
let DATA_FORMAT = 0x1D; // COLOR_32_32
}
class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern>
: VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
(outs R600_Reg128:$dst_gpr), pattern> {
let DST_SEL_X = 0;
let DST_SEL_Y = 1;
let DST_SEL_Z = 2;
let DST_SEL_W = 3;
let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
// XXX: Need to force VTX_READ_128 instructions to write to the same register
// that holds its buffer address to avoid potential hangs. We can't use
// the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
// registers are different sizes.
}
//===----------------------------------------------------------------------===//
// VTX Read from parameter memory space
//===----------------------------------------------------------------------===//
def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0,
[(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
>;
def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0,
[(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
>;
def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0,
[(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
def VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0,
[(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0,
[(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
//===----------------------------------------------------------------------===//
// VTX Read from global memory space
//===----------------------------------------------------------------------===//
// 8-bit reads
def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1,
[(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))]
>;
def VTX_READ_GLOBAL_16_cm : VTX_READ_16_cm <1,
[(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))]
>;
// 32-bit reads
def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1,
[(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
>;
// 64-bit reads
def VTX_READ_GLOBAL_64_cm : VTX_READ_64_cm <1,
[(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
>;
// 128-bit reads
def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1,
[(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
>;
} // End isCayman
//===----------------------------------------------------------------------===//
// Branch Instructions
//===----------------------------------------------------------------------===//
def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src),
"IF_PREDICATE_SET $src", []>;
//===----------------------------------------------------------------------===//
// Pseudo instructions
//===----------------------------------------------------------------------===//
let isPseudo = 1 in {
def PRED_X : InstR600 <
(outs R600_Predicate_Bit:$dst),
(ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
"", [], NullALU> {
let FlagOperandIdx = 3;
}
let isTerminator = 1, isBranch = 1 in {
def JUMP_COND : InstR600 <
(outs),
(ins brtarget:$target, R600_Predicate_Bit:$p),
"JUMP $target ($p)",
[], AnyALU
>;
def JUMP : InstR600 <
(outs),
(ins brtarget:$target),
"JUMP $target",
[], AnyALU
>
{
let isPredicable = 1;
let isBarrier = 1;
}
} // End isTerminator = 1, isBranch = 1
let usesCustomInserter = 1 in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
def MASK_WRITE : AMDGPUShaderInst <
(outs),
(ins R600_Reg32:$src),
"MASK_WRITE $src",
[]
>;
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
def TXD: InstR600 <
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
"TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
[(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
imm:$resourceId, imm:$samplerId, imm:$textureTarget))],
NullALU > {
let TEXInst = 1;
}
def TXD_SHADOW: InstR600 <
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
"TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
[(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))],
NullALU
> {
let TEXInst = 1;
}
} // End isPseudo = 1
} // End usesCustomInserter = 1
//===---------------------------------------------------------------------===//
// Return instruction
//===---------------------------------------------------------------------===//
let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
usesCustomInserter = 1 in {
def RETURN : ILFormat<(outs), (ins variable_ops),
"RETURN", [(IL_retflag)]>;
}
//===----------------------------------------------------------------------===//
// Constant Buffer Addressing Support
//===----------------------------------------------------------------------===//
let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
def CONST_COPY : Instruction {
let OutOperandList = (outs R600_Reg32:$dst);
let InOperandList = (ins i32imm:$src);
let Pattern =
[(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
let AsmString = "CONST_COPY";
let neverHasSideEffects = 1;
let isAsCheapAsAMove = 1;
let Itinerary = NullALU;
}
} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
def TEX_VTX_CONSTBUF :
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
[(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
VTX_WORD1_GPR, VTX_WORD0_eg {
let VC_INST = 0;
let FETCH_TYPE = 2;
let FETCH_WHOLE_QUAD = 0;
let SRC_REL = 0;
let SRC_SEL_X = 0;
let DST_REL = 0;
let USE_CONST_FIELDS = 0;
let NUM_FORMAT_ALL = 2;
let FORMAT_COMP_ALL = 1;
let SRF_MODE_ALL = 1;
let MEGA_FETCH_COUNT = 16;
let DST_SEL_X = 0;
let DST_SEL_Y = 1;
let DST_SEL_Z = 2;
let DST_SEL_W = 3;
let DATA_FORMAT = 35;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
// LLVM can only encode 64-bit instructions, so these fields are manually
// encoded in R600CodeEmitter
//
// bits<16> OFFSET;
// bits<2> ENDIAN_SWAP = 0;
// bits<1> CONST_BUF_NO_STRIDE = 0;
// bits<1> MEGA_FETCH = 0;
// bits<1> ALT_CONST = 0;
// bits<2> BUFFER_INDEX_MODE = 0;
// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
// is done in R600CodeEmitter
//
// Inst{79-64} = OFFSET;
// Inst{81-80} = ENDIAN_SWAP;
// Inst{82} = CONST_BUF_NO_STRIDE;
// Inst{83} = MEGA_FETCH;
// Inst{84} = ALT_CONST;
// Inst{86-85} = BUFFER_INDEX_MODE;
// Inst{95-86} = 0; Reserved
// VTX_WORD3 (Padding)
//
// Inst{127-96} = 0;
let VTXInst = 1;
}
def TEX_VTX_TEXBUF:
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
[(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
VTX_WORD1_GPR, VTX_WORD0_eg {
let VC_INST = 0;
let FETCH_TYPE = 2;
let FETCH_WHOLE_QUAD = 0;
let SRC_REL = 0;
let SRC_SEL_X = 0;
let DST_REL = 0;
let USE_CONST_FIELDS = 1;
let NUM_FORMAT_ALL = 0;
let FORMAT_COMP_ALL = 0;
let SRF_MODE_ALL = 1;
let MEGA_FETCH_COUNT = 16;
let DST_SEL_X = 0;
let DST_SEL_Y = 1;
let DST_SEL_Z = 2;
let DST_SEL_W = 3;
let DATA_FORMAT = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
// LLVM can only encode 64-bit instructions, so these fields are manually
// encoded in R600CodeEmitter
//
// bits<16> OFFSET;
// bits<2> ENDIAN_SWAP = 0;
// bits<1> CONST_BUF_NO_STRIDE = 0;
// bits<1> MEGA_FETCH = 0;
// bits<1> ALT_CONST = 0;
// bits<2> BUFFER_INDEX_MODE = 0;
// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
// is done in R600CodeEmitter
//
// Inst{79-64} = OFFSET;
// Inst{81-80} = ENDIAN_SWAP;
// Inst{82} = CONST_BUF_NO_STRIDE;
// Inst{83} = MEGA_FETCH;
// Inst{84} = ALT_CONST;
// Inst{86-85} = BUFFER_INDEX_MODE;
// Inst{95-86} = 0; Reserved
// VTX_WORD3 (Padding)
//
// Inst{127-96} = 0;
let VTXInst = 1;
}
//===--------------------------------------------------------------------===//
// Instructions support
//===--------------------------------------------------------------------===//
//===---------------------------------------------------------------------===//
// Custom Inserter for Branches and returns, this eventually will be a
// seperate pass
//===---------------------------------------------------------------------===//
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;
defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
}
//===---------------------------------------------------------------------===//
// Flow and Program control Instructions
//===---------------------------------------------------------------------===//
let isTerminator=1 in {
def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
!strconcat("SWITCH", " $src"), []>;
def CASE : ILFormat< (outs), (ins GPRI32:$src),
!strconcat("CASE", " $src"), []>;
def BREAK : ILFormat< (outs), (ins),
"BREAK", []>;
def CONTINUE : ILFormat< (outs), (ins),
"CONTINUE", []>;
def DEFAULT : ILFormat< (outs), (ins),
"DEFAULT", []>;
def ELSE : ILFormat< (outs), (ins),
"ELSE", []>;
def ENDSWITCH : ILFormat< (outs), (ins),
"ENDSWITCH", []>;
def ENDMAIN : ILFormat< (outs), (ins),
"ENDMAIN", []>;
def END : ILFormat< (outs), (ins),
"END", []>;
def ENDFUNC : ILFormat< (outs), (ins),
"ENDFUNC", []>;
def ENDIF : ILFormat< (outs), (ins),
"ENDIF", []>;
def WHILELOOP : ILFormat< (outs), (ins),
"WHILE", []>;
def ENDLOOP : ILFormat< (outs), (ins),
"ENDLOOP", []>;
def FUNC : ILFormat< (outs), (ins),
"FUNC", []>;
def RETDYN : ILFormat< (outs), (ins),
"RET_DYN", []>;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
defm IFC : BranchInstr2<"IFC">;
defm BREAKC : BranchInstr2<"BREAKC">;
defm CONTINUEC : BranchInstr2<"CONTINUEC">;
}
//===----------------------------------------------------------------------===//
// ISel Patterns
//===----------------------------------------------------------------------===//
// CND*_INT Pattterns for f32 True / False values
class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
(selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
(cnd $src0, $src1, $src2)
>;
def : CND_INT_f32 <CNDE_INT, SETEQ>;
def : CND_INT_f32 <CNDGT_INT, SETGT>;
def : CND_INT_f32 <CNDGE_INT, SETGE>;
//CNDGE_INT extra pattern
def : Pat <
(selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT),
(CNDGE_INT $src0, $src1, $src2)
>;
// KIL Patterns
def KILP : Pat <
(int_AMDGPU_kilp),
(MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
>;
def KIL : Pat <
(int_AMDGPU_kill f32:$src0),
(MASK_WRITE (KILLGT (f32 ZERO), $src0))
>;
def : Extract_Element <f32, v4f32, 0, sub0>;
def : Extract_Element <f32, v4f32, 1, sub1>;
def : Extract_Element <f32, v4f32, 2, sub2>;
def : Extract_Element <f32, v4f32, 3, sub3>;
def : Insert_Element <f32, v4f32, 0, sub0>;
def : Insert_Element <f32, v4f32, 1, sub1>;
def : Insert_Element <f32, v4f32, 2, sub2>;
def : Insert_Element <f32, v4f32, 3, sub3>;
def : Extract_Element <i32, v4i32, 0, sub0>;
def : Extract_Element <i32, v4i32, 1, sub1>;
def : Extract_Element <i32, v4i32, 2, sub2>;
def : Extract_Element <i32, v4i32, 3, sub3>;
def : Insert_Element <i32, v4i32, 0, sub0>;
def : Insert_Element <i32, v4i32, 1, sub1>;
def : Insert_Element <i32, v4i32, 2, sub2>;
def : Insert_Element <i32, v4i32, 3, sub3>;
def : Vector4_Build <v4f32, f32>;
def : Vector4_Build <v4i32, i32>;
def : Extract_Element <f32, v2f32, 0, sub0>;
def : Extract_Element <f32, v2f32, 1, sub1>;
def : Insert_Element <f32, v2f32, 0, sub0>;
def : Insert_Element <f32, v2f32, 1, sub1>;
def : Extract_Element <i32, v2i32, 0, sub0>;
def : Extract_Element <i32, v2i32, 1, sub1>;
def : Insert_Element <i32, v2i32, 0, sub0>;
def : Insert_Element <i32, v2i32, 1, sub1>;
// bitconvert patterns
def : BitConvert <i32, f32, R600_Reg32>;
def : BitConvert <f32, i32, R600_Reg32>;
def : BitConvert <v2f32, v2i32, R600_Reg64>;
def : BitConvert <v2i32, v2f32, R600_Reg64>;
def : BitConvert <v4f32, v4i32, R600_Reg128>;
def : BitConvert <v4i32, v4f32, R600_Reg128>;
// DWORDADDR pattern
def : DwordAddrPat <i32, R600_Reg32>;
} // End isR600toCayman Predicate
def getLDSNoRetOp : InstrMapping {
let FilterClass = "R600_LDS_1A1D";
let RowFields = ["BaseOp"];
let ColFields = ["DisableEncoding"];
let KeyCol = ["$dst"];
let ValueCols = [[""""]];
}
Index: head/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
===================================================================
--- head/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp (revision 265925)
@@ -1,263 +1,263 @@
//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Copies from VGPR to SGPR registers are illegal and the register coalescer
/// will sometimes generate these illegal copies in situations like this:
///
/// Register Class <vsrc> is the union of <vgpr> and <sgpr>
///
/// BB0:
/// %vreg0 <sgpr> = SCALAR_INST
/// %vreg1 <vsrc> = COPY %vreg0 <sgpr>
/// ...
/// BRANCH %cond BB1, BB2
/// BB1:
/// %vreg2 <vgpr> = VECTOR_INST
/// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
/// BB2:
/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
///
///
/// The coalescer will begin at BB0 and eliminate its copy, then the resulting
/// code will look like this:
///
/// BB0:
/// %vreg0 <sgpr> = SCALAR_INST
/// ...
/// BRANCH %cond BB1, BB2
/// BB1:
/// %vreg2 <vgpr> = VECTOR_INST
/// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
/// BB2:
/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
///
/// Now that the result of the PHI instruction is an SGPR, the register
/// allocator is now forced to constrain the register class of %vreg3 to
/// <sgpr> so we end up with final code like this:
///
/// BB0:
/// %vreg0 <sgpr> = SCALAR_INST
/// ...
/// BRANCH %cond BB1, BB2
/// BB1:
/// %vreg2 <vgpr> = VECTOR_INST
/// %vreg3 <sgpr> = COPY %vreg2 <vgpr>
/// BB2:
/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
///
/// Now this code contains an illegal copy from a VGPR to an SGPR.
///
/// In order to avoid this problem, this pass searches for PHI instructions
/// which define a <vsrc> register and constrains its definition class to
/// <vgpr> if the user of the PHI's definition register is a vector instruction.
/// If the PHI's definition class is constrained to <vgpr> then the coalescer
/// will be unable to perform the COPY removal from the above example which
/// ultimately led to the creation of an illegal COPY.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sgpr-copies"
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
private:
static char ID;
const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
unsigned Reg,
unsigned SubReg) const;
const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
unsigned Reg,
unsigned SubReg) const;
bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI) const;
public:
SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const {
return "SI Fix SGPR copies";
}
};
} // End anonymous namespace
char SIFixSGPRCopies::ID = 0;
FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
return new SIFixSGPRCopies(tm);
}
static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (!MI.getOperand(i).isReg() ||
!TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
continue;
if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
return true;
}
return false;
}
/// This functions walks the use list of Reg until it finds an Instruction
/// that isn't a COPY returns the register class of that instruction.
/// \return The register defined by the first non-COPY instruction.
const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
unsigned Reg,
unsigned SubReg) const {
// The Reg parameter to the function must always be defined by either a PHI
// or a COPY, therefore it cannot be a physical register.
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Reg cannot be a physical register");
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
RC = TRI->getSubRegClass(RC, SubReg);
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
E = MRI.use_end(); I != E; ++I) {
switch (I->getOpcode()) {
case AMDGPU::COPY:
RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
I->getOperand(0).getReg(),
I->getOperand(0).getSubReg()));
break;
}
}
return RC;
}
const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
unsigned Reg,
unsigned SubReg) const {
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
return TRI->getSubRegClass(RC, SubReg);
}
MachineInstr *Def = MRI.getVRegDef(Reg);
if (Def->getOpcode() != AMDGPU::COPY) {
return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
}
return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
Def->getOperand(1).getSubReg());
}
bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI) const {
unsigned DstReg = Copy.getOperand(0).getReg();
unsigned SrcReg = Copy.getOperand(1).getReg();
unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
const TargetRegisterClass *SrcRC;
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
DstRC == &AMDGPU::M0RegRegClass)
return false;
- SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg);
+ SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
}
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
MF.getTarget().getRegisterInfo());
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
MF.getTarget().getInstrInfo());
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
DEBUG(MI.print(dbgs()));
TII->moveToVALU(MI);
}
switch (MI.getOpcode()) {
default: continue;
case AMDGPU::PHI: {
DEBUG(dbgs() << " Fixing PHI:\n");
DEBUG(MI.print(dbgs()));
for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
unsigned Reg = MI.getOperand(i).getReg();
const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg,
MI.getOperand(0).getSubReg());
MRI.constrainRegClass(Reg, RC);
}
unsigned Reg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
MI.getOperand(0).getSubReg());
if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
}
if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
break;
// If a PHI node defines an SGPR and any of its operands are VGPRs,
// then we need to move it to the VALU.
for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
unsigned Reg = MI.getOperand(i).getReg();
if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
TII->moveToVALU(MI);
break;
}
}
break;
}
case AMDGPU::REG_SEQUENCE: {
if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
!hasVGPROperands(MI, TRI))
continue;
DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n");
DEBUG(MI.print(dbgs()));
TII->moveToVALU(MI);
break;
}
}
}
}
return false;
}
Index: head/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
===================================================================
--- head/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp (revision 265925)
@@ -1,368 +1,374 @@
//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Insert wait instructions for memory reads and writes.
///
/// Memory reads and writes are issued asynchronously, so we need to insert
/// S_WAITCNT instructions when we want to access any of their results or
/// overwrite any register that's used asynchronously.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
/// \brief One variable for each of the hardware counters
typedef union {
struct {
unsigned VM;
unsigned EXP;
unsigned LGKM;
} Named;
unsigned Array[3];
} Counters;
typedef Counters RegCounters[512];
typedef std::pair<unsigned, unsigned> RegInterval;
class SIInsertWaits : public MachineFunctionPass {
private:
static char ID;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
/// \brief Constant hardware limits
static const Counters WaitCounts;
/// \brief Constant zero value
static const Counters ZeroCounts;
/// \brief Counter values we have already waited on.
Counters WaitedOn;
/// \brief Counter values for last instruction issued.
Counters LastIssued;
/// \brief Registers used by async instructions.
RegCounters UsedRegs;
/// \brief Registers defined by async instructions.
RegCounters DefinedRegs;
/// \brief Different export instruction types seen since last wait.
unsigned ExpInstrTypesSeen;
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
/// \brief Is operand relevant for async execution?
bool isOpRelevant(MachineOperand &Op);
/// \brief Get register interval an operand affects.
RegInterval getRegInterval(MachineOperand &Op);
/// \brief Handle instructions async components
void pushInstruction(MachineInstr &MI);
/// \brief Insert the actual wait instruction
bool insertWait(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const Counters &Counts);
/// \brief Do we need def2def checks?
bool unorderedDefines(MachineInstr &MI);
/// \brief Resolve all operand dependencies to counter requirements
Counters handleOperands(MachineInstr &MI);
public:
SIInsertWaits(TargetMachine &tm) :
MachineFunctionPass(ID),
TII(0),
TRI(0),
ExpInstrTypesSeen(0) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const {
return "SI insert wait instructions";
}
};
} // End anonymous namespace
char SIInsertWaits::ID = 0;
const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
return new SIInsertWaits(tm);
}
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
Counters Result;
Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
// Only consider stores or EXP for EXP_CNT
Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
(MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
// LGKM may uses larger values
if (TSFlags & SIInstrFlags::LGKM_CNT) {
if (TII->isSMRD(MI.getOpcode())) {
MachineOperand &Op = MI.getOperand(0);
assert(Op.isReg() && "First LGKM operand must be a register!");
unsigned Reg = Op.getReg();
unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
Result.Named.LGKM = Size > 4 ? 2 : 1;
} else {
// DS
Result.Named.LGKM = 1;
}
} else {
Result.Named.LGKM = 0;
}
return Result;
}
bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
// Constants are always irrelevant
if (!Op.isReg())
return false;
// Defines are always relevant
if (Op.isDef())
return true;
// For exports all registers are relevant
MachineInstr &MI = *Op.getParent();
if (MI.getOpcode() == AMDGPU::EXP)
return true;
// For stores the stored value is also relevant
if (!MI.getDesc().mayStore())
return false;
for (MachineInstr::mop_iterator I = MI.operands_begin(),
E = MI.operands_end(); I != E; ++I) {
if (I->isReg() && I->isUse())
return Op.isIdenticalTo(*I);
}
return false;
}
RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
return std::make_pair(0, 0);
unsigned Reg = Op.getReg();
unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
assert(Size >= 4);
RegInterval Result;
Result.first = TRI->getEncodingValue(Reg);
Result.second = Result.first + Size / 4;
return Result;
}
void SIInsertWaits::pushInstruction(MachineInstr &MI) {
// Get the hardware counter increments and sum them up
Counters Increment = getHwCounts(MI);
unsigned Sum = 0;
for (unsigned i = 0; i < 3; ++i) {
LastIssued.Array[i] += Increment.Array[i];
Sum += Increment.Array[i];
}
// If we don't increase anything then that's it
if (Sum == 0)
return;
// Remember which export instructions we have seen
if (Increment.Named.EXP) {
ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
}
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI.getOperand(i);
if (!isOpRelevant(Op))
continue;
RegInterval Interval = getRegInterval(Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
// Remember which registers we define
if (Op.isDef())
DefinedRegs[j] = LastIssued;
// and which one we are using
if (Op.isUse())
UsedRegs[j] = LastIssued;
}
}
}
bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const Counters &Required) {
// End of program? No need to wait on anything
if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
return false;
// Figure out if the async instructions execute in order
bool Ordered[3];
// VM_CNT is always ordered
Ordered[0] = true;
// EXP_CNT is unordered if we have both EXP & VM-writes
Ordered[1] = ExpInstrTypesSeen == 3;
// LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
Ordered[2] = false;
// The values we are going to put into the S_WAITCNT instruction
Counters Counts = WaitCounts;
// Do we really need to wait?
bool NeedWait = false;
for (unsigned i = 0; i < 3; ++i) {
if (Required.Array[i] <= WaitedOn.Array[i])
continue;
NeedWait = true;
if (Ordered[i]) {
unsigned Value = LastIssued.Array[i] - Required.Array[i];
// adjust the value to the real hardware posibilities
Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
} else
Counts.Array[i] = 0;
// Remember on what we have waited on
WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
}
if (!NeedWait)
return false;
// Reset EXP_CNT instruction types
if (Counts.Named.EXP == 0)
ExpInstrTypesSeen = 0;
// Build the wait instruction
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm((Counts.Named.VM & 0xF) |
((Counts.Named.EXP & 0x7) << 4) |
((Counts.Named.LGKM & 0x7) << 8));
return true;
}
/// \brief helper function for handleOperands
static void increaseCounters(Counters &Dst, const Counters &Src) {
for (unsigned i = 0; i < 3; ++i)
Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
}
Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
Counters Result = ZeroCounts;
+ // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
+ // but we also want to wait for any other outstanding transfers before
+ // signalling other hardware blocks
+ if (MI.getOpcode() == AMDGPU::S_SENDMSG)
+ return LastIssued;
+
// For each register affected by this
// instruction increase the result sequence
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI.getOperand(i);
RegInterval Interval = getRegInterval(Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
if (Op.isDef()) {
increaseCounters(Result, UsedRegs[j]);
increaseCounters(Result, DefinedRegs[j]);
}
if (Op.isUse())
increaseCounters(Result, DefinedRegs[j]);
}
}
return Result;
}
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
bool Changes = false;
TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
MRI = &MF.getRegInfo();
WaitedOn = ZeroCounts;
LastIssued = ZeroCounts;
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
Changes |= insertWait(MBB, I, handleOperands(*I));
pushInstruction(*I);
}
// Wait for everything at the end of the MBB
Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
}
return Changes;
}
Index: head/contrib/llvm/lib/Target/R600/SIInstrInfo.td
===================================================================
--- head/contrib/llvm/lib/Target/R600/SIInstrInfo.td (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/SIInstrInfo.td (revision 265925)
@@ -1,601 +1,623 @@
//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SI DAG Nodes
//===----------------------------------------------------------------------===//
// SMRD takes a 64bit memory address and can only add an 32bit offset
def SIadd64bit32bit : SDNode<"ISD::ADD",
SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]>
>;
def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>,
[SDNPMayLoad, SDNPMemOperand]
>;
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
SDTypeProfile<0, 13,
[SDTCisVT<0, i128>, // rsrc(SGPR)
SDTCisVT<1, iAny>, // vdata(VGPR)
SDTCisVT<2, i32>, // num_channels(imm)
SDTCisVT<3, i32>, // vaddr(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // inst_offset(imm)
SDTCisVT<6, i32>, // dfmt(imm)
SDTCisVT<7, i32>, // nfmt(imm)
SDTCisVT<8, i32>, // offen(imm)
SDTCisVT<9, i32>, // idxen(imm)
SDTCisVT<10, i32>, // glc(imm)
SDTCisVT<11, i32>, // slc(imm)
SDTCisVT<12, i32> // tfe(imm)
]>,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
SDTCisVT<3, i32>]>
>;
class SDSample<string opcode> : SDNode <opcode,
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
>;
def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
// Transformation function, extract the lower 32bit of a 64bit immediate
def LO32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
}]>;
def LO32f : SDNodeXForm<fpimm, [{
APInt V = N->getValueAPF().bitcastToAPInt().trunc(32);
return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), MVT::f32);
}]>;
// Transformation function, extract the upper 32bit of a 64bit immediate
def HI32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
}]>;
def HI32f : SDNodeXForm<fpimm, [{
APInt V = N->getValueAPF().bitcastToAPInt().lshr(32).trunc(32);
return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), MVT::f32);
}]>;
def IMM8bitDWORD : ImmLeaf <
i32, [{
return (Imm & ~0x3FC) == 0;
}], SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(
N->getZExtValue() >> 2, MVT::i32);
}]>
>;
def as_i1imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1);
}]>;
def as_i8imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8);
}]>;
def as_i16imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16);
}]>;
def IMM12bit : PatLeaf <(imm),
[{return isUInt<12>(N->getZExtValue());}]
>;
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
return
(*(const SITargetLowering *)getTargetLowering()).analyzeImmediate(N) == 0;
}]>;
class SGPRImm <dag frag> : PatLeaf<frag, [{
if (TM.getSubtarget<AMDGPUSubtarget>().getGeneration() <
AMDGPUSubtarget::SOUTHERN_ISLANDS) {
return false;
}
const SIRegisterInfo *SIRI =
static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
U != E; ++U) {
if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
return true;
}
}
return false;
}]>;
def FRAMEri64 : Operand<iPTR> {
let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index);
}
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
def SIOperand {
int ZERO = 0x80;
int VCC = 0x6A;
}
include "SIInstrFormats.td"
//===----------------------------------------------------------------------===//
//
// SI Instruction multiclass helpers.
//
// Instructions with _32 take 32-bit operands.
// Instructions with _64 take 64-bit operands.
//
// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
// encoding is the standard encoding, but instruction that make use of
// any of the instruction modifiers must use the 64-bit encoding.
//
// Instructions with _e32 use the 32-bit encoding.
// Instructions with _e64 use the 64-bit encoding.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Scalar classes
//===----------------------------------------------------------------------===//
class SOP1_32 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
op, (outs SReg_32:$dst), (ins SSrc_32:$src0),
opName#" $dst, $src0", pattern
>;
class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
op, (outs SReg_64:$dst), (ins SSrc_64:$src0),
opName#" $dst, $src0", pattern
>;
class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
>;
class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
opName#" $dst, $src0, $src1", pattern
>;
class SOP2_SHIFT_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
>;
class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC <
op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
>;
class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC <
op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
opName#" $dst, $src0, $src1", pattern
>;
class SOPK_32 <bits<5> op, string opName, list<dag> pattern> : SOPK <
op, (outs SReg_32:$dst), (ins i16imm:$src0),
opName#" $dst, $src0", pattern
>;
class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
op, (outs SReg_64:$dst), (ins i16imm:$src0),
opName#" $dst, $src0", pattern
>;
multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
RegisterClass dstClass> {
def _IMM : SMRD <
op, 1, (outs dstClass:$dst),
(ins baseClass:$sbase, i32imm:$offset),
asm#" $dst, $sbase, $offset", []
>;
def _SGPR : SMRD <
op, 0, (outs dstClass:$dst),
(ins baseClass:$sbase, SReg_32:$soff),
asm#" $dst, $sbase, $soff", []
>;
}
//===----------------------------------------------------------------------===//
// Vector ALU classes
//===----------------------------------------------------------------------===//
class VOP <string opName> {
string OpName = opName;
}
class VOP2_REV <string revOp, bit isOrig> {
string RevOp = revOp;
bit IsOrig = isOrig;
}
multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
string opName, list<dag> pattern> {
def _e32 : VOP1 <
op, (outs drc:$dst), (ins src:$src0),
opName#"_e32 $dst, $src0", pattern
>, VOP <opName>;
def _e64 : VOP3 <
{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
(outs drc:$dst),
(ins src:$src0,
i32imm:$abs, i32imm:$clamp,
i32imm:$omod, i32imm:$neg),
opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", []
>, VOP <opName> {
let src1 = SIOperand.ZERO;
let src2 = SIOperand.ZERO;
}
}
multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
: VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>;
multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
: VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
multiclass VOP1_32_64 <bits<8> op, string opName, list<dag> pattern>
: VOP1_Helper <op, VReg_32, VSrc_64, opName, pattern>;
multiclass VOP1_64_32 <bits<8> op, string opName, list<dag> pattern>
: VOP1_Helper <op, VReg_64, VSrc_32, opName, pattern>;
multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern, string revOp> {
def _e32 : VOP2 <
op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
opName#"_e32 $dst, $src0, $src1", pattern
>, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
def _e64 : VOP3 <
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
(outs vrc:$dst),
(ins arc:$src0, arc:$src1,
i32imm:$abs, i32imm:$clamp,
i32imm:$omod, i32imm:$neg),
opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
>, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
let src2 = SIOperand.ZERO;
}
}
multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
string revOp = opName>
: VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
string revOp = opName>
: VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
- string revOp = opName> {
+ RegisterClass src0_rc, string revOp = opName> {
def _e32 : VOP2 <
- op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
+ op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1),
opName#"_e32 $dst, $src0, $src1", pattern
>, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
def _e64 : VOP3b <
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
(outs VReg_32:$dst),
(ins VSrc_32:$src0, VSrc_32:$src1,
i32imm:$abs, i32imm:$clamp,
i32imm:$omod, i32imm:$neg),
opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
>, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
let src2 = SIOperand.ZERO;
/* the VOP2 variant puts the carry out into VCC, the VOP3 variant
can write it into any SGPR. We currently don't use the carry out,
so for now hardcode it to VCC as well */
let sdst = SIOperand.VCC;
}
}
multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
string opName, ValueType vt, PatLeaf cond> {
def _e32 : VOPC <
op, (ins arc:$src0, vrc:$src1),
opName#"_e32 $dst, $src0, $src1", []
>, VOP <opName>;
def _e64 : VOP3 <
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
(outs SReg_64:$dst),
(ins arc:$src0, arc:$src1,
InstFlag:$abs, InstFlag:$clamp,
InstFlag:$omod, InstFlag:$neg),
opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg",
!if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
[(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
)
>, VOP <opName> {
let src2 = SIOperand.ZERO;
}
}
multiclass VOPC_32 <bits<8> op, string opName,
ValueType vt = untyped, PatLeaf cond = COND_NULL>
: VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond>;
multiclass VOPC_64 <bits<8> op, string opName,
ValueType vt = untyped, PatLeaf cond = COND_NULL>
: VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
op, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
>, VOP <opName>;
class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 <
op, (outs VReg_64:$dst),
(ins VSrc_64:$src0, VSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
>, VOP <opName> {
let src2 = SIOperand.ZERO;
let abs = 0;
let clamp = 0;
let omod = 0;
let neg = 0;
}
class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
op, (outs VReg_64:$dst),
(ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
>, VOP <opName>;
//===----------------------------------------------------------------------===//
// Vector I/O classes
//===----------------------------------------------------------------------===//
class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
op,
(outs regClass:$vdst),
(ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
i8imm:$offset0, i8imm:$offset1),
asm#" $vdst, $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]",
[]> {
let mayLoad = 1;
let mayStore = 0;
}
class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
op,
(outs),
(ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
i8imm:$offset0, i8imm:$offset1),
asm#" $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]",
[]> {
let mayStore = 1;
let mayLoad = 0;
let vdst = 0;
}
class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS <
op,
(outs rc:$vdst),
(ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, i8imm:$offset0,
i8imm:$offset1),
asm#" $gds, $vdst, $addr, $data0, $offset0, $offset1, [M0]",
[]> {
let mayStore = 1;
let mayLoad = 1;
let data1 = 0;
}
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs),
(ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
#" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
[]> {
let mayStore = 1;
let mayLoad = 0;
}
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
- let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */,
- mayLoad = 1 in {
+ let lds = 0, mayLoad = 1 in {
- let offen = 1, idxen = 0, addr64 = 0, offset = 0 in {
- def _OFFEN : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_32:$vaddr),
- asm#" $vdata, $srsrc + $vaddr", []>;
- }
+ let addr64 = 0 in {
- let offen = 0, idxen = 1, addr64 = 0 in {
- def _IDXEN : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset),
- asm#" $vdata, $srsrc[$vaddr] + $offset", []>;
- }
+ let offen = 0, idxen = 0 in {
+ def _OFFSET : MUBUF <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_32:$vaddr,
+ i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
+ i1imm:$slc, i1imm:$tfe),
+ asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ }
- let offen = 0, idxen = 0, addr64 = 1 in {
- def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
- asm#" $vdata, $srsrc + $vaddr + $offset", []>;
- }
+ let offen = 1, idxen = 0, offset = 0 in {
+ def _OFFEN : MUBUF <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_32:$vaddr,
+ SSrc_32:$soffset, i1imm:$glc, i1imm:$slc,
+ i1imm:$tfe),
+ asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ }
+
+ let offen = 0, idxen = 1 in {
+ def _IDXEN : MUBUF <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_32:$vaddr,
+ i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
+ i1imm:$slc, i1imm:$tfe),
+ asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ }
+
+ let offen = 1, idxen = 1 in {
+ def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_64:$vaddr,
+ SSrc_32:$soffset, i1imm:$glc,
+ i1imm:$slc, i1imm:$tfe),
+ asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ }
+ }
+
+ let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
+ def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
+ asm#" $vdata, $srsrc + $vaddr + $offset", []>;
+ }
}
}
class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
i16imm:$offset),
name#" $vdata, $srsrc + $vaddr + $offset",
[]> {
let mayLoad = 0;
let mayStore = 1;
// Encoding
let offen = 0;
let idxen = 0;
let glc = 0;
let addr64 = 1;
let lds = 0;
let slc = 0;
let tfe = 0;
let soffset = 128; // ZERO
}
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs regClass:$dst),
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc,
i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
#" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
[]> {
let mayLoad = 1;
let mayStore = 0;
}
class MIMG_Mask <string op, int channels> {
string Op = op;
int Channels = channels;
}
class MIMG_NoSampler_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
RegisterClass src_rc> : MIMG <
op,
(outs dst_rc:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
SReg_256:$srsrc),
asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
#" $tfe, $lwe, $slc, $vaddr, $srsrc",
[]> {
let SSAMP = 0;
let mayLoad = 1;
let mayStore = 0;
let hasPostISelHook = 1;
}
multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
int channels> {
def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_32>,
MIMG_Mask<asm#"_V1", channels>;
def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>,
MIMG_Mask<asm#"_V2", channels>;
def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>,
MIMG_Mask<asm#"_V4", channels>;
}
multiclass MIMG_NoSampler <bits<7> op, string asm> {
defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VReg_32, 1>;
defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
}
class MIMG_Sampler_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
RegisterClass src_rc> : MIMG <
op,
(outs dst_rc:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
SReg_256:$srsrc, SReg_128:$ssamp),
asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
#" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
[]> {
let mayLoad = 1;
let mayStore = 0;
let hasPostISelHook = 1;
}
multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
int channels> {
def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_32>,
MIMG_Mask<asm#"_V1", channels>;
def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64>,
MIMG_Mask<asm#"_V2", channels>;
def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128>,
MIMG_Mask<asm#"_V4", channels>;
def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256>,
MIMG_Mask<asm#"_V8", channels>;
def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512>,
MIMG_Mask<asm#"_V16", channels>;
}
multiclass MIMG_Sampler <bits<7> op, string asm> {
defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VReg_32, 1>;
defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2>;
defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3>;
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
}
//===----------------------------------------------------------------------===//
// Vector instruction mappings
//===----------------------------------------------------------------------===//
// Maps an opcode in e32 form to its e64 equivalent
def getVOPe64 : InstrMapping {
let FilterClass = "VOP";
let RowFields = ["OpName"];
let ColFields = ["Size"];
let KeyCol = ["4"];
let ValueCols = [["8"]];
}
// Maps an original opcode to its commuted version
def getCommuteRev : InstrMapping {
let FilterClass = "VOP2_REV";
let RowFields = ["RevOp"];
let ColFields = ["IsOrig"];
let KeyCol = ["1"];
let ValueCols = [["0"]];
}
def getMaskedMIMGOp : InstrMapping {
let FilterClass = "MIMG_Mask";
let RowFields = ["Op"];
let ColFields = ["Channels"];
let KeyCol = ["4"];
let ValueCols = [["1"], ["2"], ["3"] ];
}
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
let FilterClass = "VOP2_REV";
let RowFields = ["RevOp"];
let ColFields = ["IsOrig"];
let KeyCol = ["0"];
let ValueCols = [["1"]];
}
include "SIInstructions.td"
Index: head/contrib/llvm/lib/Target/R600/SIInstructions.td
===================================================================
--- head/contrib/llvm/lib/Target/R600/SIInstructions.td (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/SIInstructions.td (revision 265925)
@@ -1,2081 +1,2161 @@
//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// This file was originally auto-generated from a GPU register header file and
// all the instruction definitions were originally commented out. Instructions
// that are not yet supported remain commented out.
//===----------------------------------------------------------------------===//
class InterpSlots {
int P0 = 2;
int P10 = 0;
int P20 = 1;
}
def INTERP : InterpSlots;
def InterpSlot : Operand<i32> {
let PrintMethod = "printInterpSlot";
}
+def SendMsgImm : Operand<i32>;
+
def isSI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
def WAIT_FLAG : InstFlag<"printWaitFlag">;
let Predicates = [isSI] in {
let neverHasSideEffects = 1 in {
let isMoveImm = 1 in {
def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
} // End isMoveImm = 1
def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
} // End neverHasSideEffects = 1
////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in {
def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
} // End hasSideEffects = 1
def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
/*
This instruction is disabled for now until we can figure out how to teach
the instruction selector to correctly use the S_CMP* vs V_CMP*
instructions.
When this instruction is enabled the code generator sometimes produces this
invalid sequence:
SCC = S_CMPK_EQ_I32 SGPR0, imm
VCC = COPY SCC
VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
def S_CMPK_EQ_I32 : SOPK <
0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
"S_CMPK_EQ_I32",
[(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
>;
*/
let isCompare = 1 in {
def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
} // End isCompare = 1
let Defs = [SCC], isCommutable = 1 in {
def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
}
//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
//def EXP : EXP_ <0x00000000, "EXP", []>;
let isCompare = 1 in {
defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">;
defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_OLT>;
defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_OEQ>;
defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_OLE>;
defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_OGT>;
defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32">;
defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_OGE>;
defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", f32, COND_O>;
defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", f32, COND_UO>;
defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">;
defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">;
defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">;
defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">;
defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>;
defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
let hasSideEffects = 1, Defs = [EXEC] in {
defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">;
defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">;
defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">;
defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">;
defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">;
defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">;
defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">;
defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">;
defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">;
defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">;
defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">;
defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">;
defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">;
defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">;
defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">;
defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>;
defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", f64, COND_OEQ>;
defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", f64, COND_OLE>;
defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", f64, COND_OGT>;
defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">;
defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", f64, COND_OGE>;
defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", f64, COND_O>;
defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", f64, COND_UO>;
defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">;
defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">;
defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">;
defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">;
defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>;
defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
let hasSideEffects = 1, Defs = [EXEC] in {
defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">;
defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">;
defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">;
defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">;
defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">;
defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">;
defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">;
defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">;
defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">;
defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">;
defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">;
defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">;
defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">;
defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">;
defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">;
defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32">;
defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32">;
defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32">;
defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32">;
defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32">;
defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32">;
defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32">;
defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32">;
defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32">;
defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32">;
defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32">;
defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
let hasSideEffects = 1, Defs = [EXEC] in {
defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">;
defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">;
defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">;
defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">;
defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">;
defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">;
defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">;
defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">;
defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">;
defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">;
defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">;
defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">;
defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">;
defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">;
defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64">;
defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64">;
defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64">;
defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64">;
defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64">;
defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64">;
defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64">;
defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64">;
defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64">;
defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64">;
defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64">;
defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64">;
defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64">;
defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64">;
let hasSideEffects = 1, Defs = [EXEC] in {
defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64">;
defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64">;
defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64">;
defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64">;
defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64">;
defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64">;
defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64">;
defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64">;
defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64">;
defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64">;
defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64">;
defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64">;
defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64">;
defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64">;
defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64">;
defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">;
defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_SLT>;
defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>;
defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_SLE>;
defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_SGT>;
defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>;
defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
let hasSideEffects = 1, Defs = [EXEC] in {
defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">;
defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">;
defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">;
defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">;
defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">;
defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">;
defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">;
defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>;
defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", i64, COND_EQ>;
defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", i64, COND_SLE>;
defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", i64, COND_SGT>;
defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>;
defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>;
defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
let hasSideEffects = 1, Defs = [EXEC] in {
defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">;
defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">;
defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">;
defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">;
defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">;
defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">;
defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">;
defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>;
defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", i32, COND_EQ>;
defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", i32, COND_ULE>;
defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", i32, COND_UGT>;
defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>;
defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>;
defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
let hasSideEffects = 1, Defs = [EXEC] in {
defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">;
defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">;
defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">;
defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">;
defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">;
defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">;
defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">;
defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>;
defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", i64, COND_EQ>;
defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", i64, COND_ULE>;
defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", i64, COND_UGT>;
defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>;
defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>;
defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
let hasSideEffects = 1, Defs = [EXEC] in {
defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">;
defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">;
defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">;
defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">;
defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">;
defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">;
defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">;
defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
let hasSideEffects = 1, Defs = [EXEC] in {
defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
let hasSideEffects = 1, Defs = [EXEC] in {
defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
} // End hasSideEffects = 1, Defs = [EXEC]
} // End isCompare = 1
def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>;
def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>;
def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>;
def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>;
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <0x00000008, "BUFFER_LOAD_UBYTE", VReg_32>;
defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <0x00000009, "BUFFER_LOAD_SBYTE", VReg_32>;
defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <0x0000000a, "BUFFER_LOAD_USHORT", VReg_32>;
defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32>;
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
def BUFFER_STORE_BYTE : MUBUF_Store_Helper <
0x00000018, "BUFFER_STORE_BYTE", VReg_32
>;
def BUFFER_STORE_SHORT : MUBUF_Store_Helper <
0x0000001a, "BUFFER_STORE_SHORT", VReg_32
>;
def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
0x0000001c, "BUFFER_STORE_DWORD", VReg_32
>;
def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64
>;
def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128
>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>;
def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>;
def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
let mayLoad = 1 in {
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SGPR_32 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
>;
defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
>;
defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
>;
defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
>;
} // mayLoad = 1
//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">;
defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">;
//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "IMAGE_GET_RESINFO">;
//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "IMAGE_SAMPLE">;
//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "IMAGE_SAMPLE_D">;
//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "IMAGE_SAMPLE_L">;
defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "IMAGE_SAMPLE_B">;
//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "IMAGE_SAMPLE_C">;
//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "IMAGE_SAMPLE_C_D">;
//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "IMAGE_SAMPLE_C_L">;
defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>;
//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>;
//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>;
//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>;
//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>;
//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>;
//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>;
//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>;
//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>;
//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>;
//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>;
//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>;
//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>;
//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>;
//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>;
//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>;
//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
let neverHasSideEffects = 1, isMoveImm = 1 in {
defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
} // End neverHasSideEffects = 1, isMoveImm = 1
defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
[(set i32:$dst, (fp_to_sint f64:$src0))]
>;
defm V_CVT_F64_I32 : VOP1_64_32 <0x00000004, "V_CVT_F64_I32",
[(set f64:$dst, (sint_to_fp i32:$src0))]
>;
defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
[(set f32:$dst, (sint_to_fp i32:$src0))]
>;
defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32",
[(set f32:$dst, (uint_to_fp i32:$src0))]
>;
defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32",
[(set i32:$dst, (fp_to_uint f32:$src0))]
>;
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
[(set i32:$dst, (fp_to_sint f32:$src0))]
>;
defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64",
[(set f32:$dst, (fround f64:$src0))]
>;
defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32",
[(set f64:$dst, (fextend f32:$src0))]
>;
//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
[(set f32:$dst, (AMDGPUfract f32:$src0))]
>;
defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32",
[(set f32:$dst, (int_AMDGPU_trunc f32:$src0))]
>;
defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
[(set f32:$dst, (fceil f32:$src0))]
>;
defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
[(set f32:$dst, (frint f32:$src0))]
>;
defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
[(set f32:$dst, (ffloor f32:$src0))]
>;
defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
[(set f32:$dst, (fexp2 f32:$src0))]
>;
defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
[(set f32:$dst, (flog2 f32:$src0))]
>;
defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
[(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
>;
defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
defm V_RSQ_LEGACY_F32 : VOP1_32 <
0x0000002d, "V_RSQ_LEGACY_F32",
[(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
>;
defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
[(set f64:$dst, (fdiv FP_ONE, f64:$src0))]
>;
defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
[(set f32:$dst, (fsqrt f32:$src0))]
>;
defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64",
[(set f64:$dst, (fsqrt f64:$src0))]
>;
defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
def V_INTERP_P1_F32 : VINTRP <
0x00000000,
(outs VReg_32:$dst),
(ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
"V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]",
[]> {
let DisableEncoding = "$m0";
}
def V_INTERP_P2_F32 : VINTRP <
0x00000001,
(outs VReg_32:$dst),
(ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
"V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
[]> {
let Constraints = "$src0 = $dst";
let DisableEncoding = "$src0,$m0";
}
def V_INTERP_MOV_F32 : VINTRP <
0x00000002,
(outs VReg_32:$dst),
(ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
"V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]",
[]> {
let DisableEncoding = "$m0";
}
//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
let isTerminator = 1 in {
def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
[(IL_retflag)]> {
let SIMM16 = 0;
let isBarrier = 1;
let hasCtrlDep = 1;
}
let isBranch = 1 in {
def S_BRANCH : SOPP <
0x00000002, (ins brtarget:$target), "S_BRANCH $target",
[(br bb:$target)]> {
let isBarrier = 1;
}
let DisableEncoding = "$scc" in {
def S_CBRANCH_SCC0 : SOPP <
0x00000004, (ins brtarget:$target, SCCReg:$scc),
"S_CBRANCH_SCC0 $target", []
>;
def S_CBRANCH_SCC1 : SOPP <
0x00000005, (ins brtarget:$target, SCCReg:$scc),
"S_CBRANCH_SCC1 $target",
[]
>;
} // End DisableEncoding = "$scc"
def S_CBRANCH_VCCZ : SOPP <
0x00000006, (ins brtarget:$target, VCCReg:$vcc),
"S_CBRANCH_VCCZ $target",
[]
>;
def S_CBRANCH_VCCNZ : SOPP <
0x00000007, (ins brtarget:$target, VCCReg:$vcc),
"S_CBRANCH_VCCNZ $target",
[]
>;
let DisableEncoding = "$exec" in {
def S_CBRANCH_EXECZ : SOPP <
0x00000008, (ins brtarget:$target, EXECReg:$exec),
"S_CBRANCH_EXECZ $target",
[]
>;
def S_CBRANCH_EXECNZ : SOPP <
0x00000009, (ins brtarget:$target, EXECReg:$exec),
"S_CBRANCH_EXECNZ $target",
[]
>;
} // End DisableEncoding = "$exec"
} // End isBranch = 1
} // End isTerminator = 1
let hasSideEffects = 1 in {
def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
[(int_AMDGPU_barrier_local)]
> {
let SIMM16 = 0;
let isBarrier = 1;
let hasCtrlDep = 1;
let mayLoad = 1;
let mayStore = 1;
}
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
[]
>;
-} // End hasSideEffects
//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
-//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
+
+let Uses = [EXEC] in {
+ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
+ [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
+ > {
+ let DisableEncoding = "$m0";
+ }
+} // End Uses = [EXEC]
+
//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+} // End hasSideEffects
def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
"V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]",
[]
>{
let DisableEncoding = "$vcc";
}
def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
"V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
[(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))]
>;
//f32 pattern for V_CNDMASK_B32_e64
def : Pat <
(f32 (select i1:$src2, f32:$src1, f32:$src0)),
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
def : Pat <
(i32 (trunc i64:$val)),
(EXTRACT_SUBREG $val, sub0)
>;
//use two V_CNDMASK_B32_e64 instructions for f64
def : Pat <
(f64 (select i1:$src2, f64:$src1, f64:$src0)),
(INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(V_CNDMASK_B32_e64 (EXTRACT_SUBREG $src0, sub0),
(EXTRACT_SUBREG $src1, sub0),
$src2), sub0),
(V_CNDMASK_B32_e64 (EXTRACT_SUBREG $src0, sub1),
(EXTRACT_SUBREG $src1, sub1),
$src2), sub1)
>;
defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
[(set f32:$dst, (fadd f32:$src0, f32:$src1))]
>;
defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
[(set f32:$dst, (fsub f32:$src0, f32:$src1))]
>;
defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
} // End isCommutable = 1
defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
let isCommutable = 1 in {
defm V_MUL_LEGACY_F32 : VOP2_32 <
0x00000007, "V_MUL_LEGACY_F32",
[(set f32:$dst, (int_AMDGPU_mul f32:$src0, f32:$src1))]
>;
defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
[(set f32:$dst, (fmul f32:$src0, f32:$src1))]
>;
defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24",
[(set i32:$dst, (mul I24:$src0, I24:$src1))]
>;
//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24",
[(set i32:$dst, (mul U24:$src0, U24:$src1))]
>;
//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
[(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))]
>;
defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
[(set f32:$dst, (AMDGPUfmax f32:$src0, f32:$src1))]
>;
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
>;
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
[(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
>;
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
[(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
>;
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
[(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
>;
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
[(set i32:$dst, (srl i32:$src0, i32:$src1))]
>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
[(set i32:$dst, (sra i32:$src0, i32:$src1))]
>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
let hasPostISelHook = 1 in {
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
}
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
[(set i32:$dst, (and i32:$src0, i32:$src1))]
>;
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
[(set i32:$dst, (or i32:$src0, i32:$src1))]
>;
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
} // End isCommutable = 1
defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>;
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>;
-defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>;
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>;
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
+ "V_SUB_I32">;
let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
-defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
-defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
+ "V_SUBB_U32">;
} // End Uses = [VCC]
} // End isCommutable = 1, Defs = [VCC]
defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
[(set i32:$dst, (int_SI_packf16 f32:$src0, f32:$src1))]
>;
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>;
def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>;
def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>;
def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>;
def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>;
def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>;
def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>;
def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>;
def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>;
def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>;
def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>;
////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
let neverHasSideEffects = 1 in {
def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>;
def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24",
[(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))]
>;
def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24",
[(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))]
>;
} // End neverHasSideEffects
def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
defm : BFIPatterns <V_BFI_B32>;
def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
[(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
>;
def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64",
[(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
>;
//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
def : ROTRPattern <V_ALIGNBIT_B32>;
def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64",
[(set i64:$dst, (shl i64:$src0, i32:$src1))]
>;
def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64",
[(set i64:$dst, (srl i64:$src0, i32:$src1))]
>;
def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64",
[(set i64:$dst, (sra i64:$src0, i32:$src1))]
>;
let isCommutable = 1 in {
def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
} // isCommutable = 1
def : Pat <
(fadd f64:$src0, f64:$src1),
(V_ADD_F64 $src0, $src1, (i64 0))
>;
def : Pat <
(fmul f64:$src0, f64:$src1),
(V_MUL_F64 $src0, $src1, (i64 0))
>;
def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
let isCommutable = 1 in {
def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
} // isCommutable = 1
def : Pat <
(mul i32:$src0, i32:$src1),
(V_MUL_LO_I32 $src0, $src1, (i32 0))
>;
def : Pat <
(mulhu i32:$src0, i32:$src1),
(V_MUL_HI_U32 $src0, $src1, (i32 0))
>;
def : Pat <
(mulhs i32:$src0, i32:$src1),
(V_MUL_HI_I32 $src0, $src1, (i32 0))
>;
def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32",
[(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))]
>;
} // End isCommutable = 1
def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32",
[(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))]
>;
let Uses = [SCC] in { // Carry in comes from SCC
let isCommutable = 1 in {
def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32",
[(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
} // End isCommutable = 1
def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
[(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
} // End Uses = [SCC]
} // End Defs = [SCC]
def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
def S_CSELECT_B32 : SOP2 <
0x0000000a, (outs SReg_32:$dst),
(ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
[]
>;
def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
[(set i64:$dst, (and i64:$src0, i64:$src1))]
>;
def : Pat <
(i1 (and i1:$src0, i1:$src1)),
(S_AND_B64 $src0, $src1)
>;
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
def : Pat <
(i1 (or i1:$src0, i1:$src1)),
(S_OR_B64 $src0, $src1)
>;
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
[(set i1:$dst, (xor i1:$src0, i1:$src1))]
>;
def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
// Use added complexity so these patterns are preferred to the VALU patterns.
let AddedComplexity = 1 in {
def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32",
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64",
[(set i64:$dst, (shl i64:$src0, i32:$src1))]
>;
def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32",
[(set i32:$dst, (srl i32:$src0, i32:$src1))]
>;
def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64",
[(set i64:$dst, (srl i64:$src0, i32:$src1))]
>;
def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32",
[(set i32:$dst, (sra i32:$src0, i32:$src1))]
>;
def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64",
[(set i64:$dst, (sra i64:$src0, i32:$src1))]
>;
} // End AddedComplexity = 1
def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
let isCodeGenOnly = 1, isPseudo = 1 in {
def LOAD_CONST : AMDGPUShaderInst <
(outs GPRF32:$dst),
(ins i32imm:$src),
"LOAD_CONST $dst, $src",
[(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
>;
// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
Uses = [EXEC], Defs = [EXEC] in {
let isBranch = 1, isTerminator = 1 in {
def SI_IF : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$vcc, brtarget:$target),
"SI_IF $dst, $vcc, $target",
[(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
>;
def SI_ELSE : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src, brtarget:$target),
"SI_ELSE $dst, $src, $target",
[(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> {
let Constraints = "$src = $dst";
}
def SI_LOOP : InstSI <
(outs),
(ins SReg_64:$saved, brtarget:$target),
"SI_LOOP $saved, $target",
[(int_SI_loop i64:$saved, bb:$target)]
>;
} // end isBranch = 1, isTerminator = 1
def SI_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src),
"SI_ELSE $dst, $src",
[(set i64:$dst, (int_SI_break i64:$src))]
>;
def SI_IF_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$vcc, SReg_64:$src),
"SI_IF_BREAK $dst, $vcc, $src",
[(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))]
>;
def SI_ELSE_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src0, SReg_64:$src1),
"SI_ELSE_BREAK $dst, $src0, $src1",
[(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))]
>;
def SI_END_CF : InstSI <
(outs),
(ins SReg_64:$saved),
"SI_END_CF $saved",
[(int_SI_end_cf i64:$saved)]
>;
def SI_KILL : InstSI <
(outs),
(ins VReg_32:$src),
"SI_KIL $src",
[(int_AMDGPU_kill f32:$src)]
>;
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
// Uses = [EXEC], Defs = [EXEC]
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri64, ADDRIndirect>;
let UseNamedOperandTable = 1 in {
def SI_RegisterLoad : AMDGPUShaderInst <
(outs VReg_32:$dst, SReg_64:$temp),
(ins FRAMEri64:$addr, i32imm:$chan),
"", []
> {
let isRegisterLoad = 1;
let mayLoad = 1;
}
class SIRegStore<dag outs> : AMDGPUShaderInst <
outs,
(ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan),
"", []
> {
let isRegisterStore = 1;
let mayStore = 1;
}
let usesCustomInserter = 1 in {
def SI_RegisterStorePseudo : SIRegStore<(outs)>;
} // End usesCustomInserter = 1
def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
} // End UseNamedOperandTable = 1
def SI_INDIRECT_SRC : InstSI <
(outs VReg_32:$dst, SReg_64:$temp),
(ins unknown:$src, VSrc_32:$idx, i32imm:$off),
"SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
[]
>;
class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
(outs rc:$dst, SReg_64:$temp),
(ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
"SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
[]
> {
let Constraints = "$src = $dst";
}
def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VReg_32>;
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
let usesCustomInserter = 1 in {
// This pseudo instruction takes a pointer as input and outputs a resource
// constant that can be used with the ADDR64 MUBUF instructions.
def SI_ADDR64_RSRC : InstSI <
(outs SReg_128:$srsrc),
(ins SReg_64:$ptr),
"", []
>;
def V_SUB_F64 : InstSI <
(outs VReg_64:$dst),
(ins VReg_64:$src0, VReg_64:$src1),
"V_SUB_F64 $dst, $src0, $src1",
[]
>;
} // end usesCustomInserter
} // end IsCodeGenOnly, isPseudo
def : Pat<
(int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
(V_CNDMASK_B32_e64 $src2, $src1, (V_CMP_GT_F32_e64 0, $src0))
>;
def : Pat <
(int_AMDGPU_kilp),
(SI_KILL (V_MOV_B32_e32 0xbf800000))
>;
/* int_SI_vs_load_input */
def : Pat<
(SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
+ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
/* int_SI_export */
def : Pat <
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
f32:$src0, f32:$src1, f32:$src2, f32:$src3),
(EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
$src0, $src1, $src2, $src3)
>;
def : Pat <
(f64 (fsub f64:$src0, f64:$src1)),
(V_SUB_F64 $src0, $src1)
>;
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
/* SIsample for simple 1D texture lookup */
def : Pat <
(SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
(IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
(name vt:$addr, v32i8:$rsrc, i128:$sampler, imm),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT),
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowArrayPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
/* SIsample* for texture lookups consuming more address parameters */
multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
def : SamplePattern <SIsample, sample, addr_type>;
def : SampleRectPattern <SIsample, sample, addr_type>;
def : SampleArrayPattern <SIsample, sample, addr_type>;
def : SampleShadowPattern <SIsample, sample_c, addr_type>;
def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
def : SamplePattern <SIsamplel, sample_l, addr_type>;
def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
def : SamplePattern <SIsampleb, sample_b, addr_type>;
def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
def : SamplePattern <SIsampled, sample_d, addr_type>;
def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
}
defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2,
IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
v2i32>;
defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4,
IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
v4i32>;
defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8,
IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
v8i32>;
defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
v16i32>;
/* int_SI_imageload for texture fetches consuming varying address parameters */
class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
(name addr_type:$addr, v32i8:$rsrc, imm),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
>;
class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
(name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
>;
class ImageLoadMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
(name addr_type:$addr, v32i8:$rsrc, TEX_MSAA),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
>;
class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
(name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
>;
multiclass ImageLoadPatterns<MIMG opcode, ValueType addr_type> {
def : ImageLoadPattern <int_SI_imageload, opcode, addr_type>;
def : ImageLoadArrayPattern <int_SI_imageload, opcode, addr_type>;
}
multiclass ImageLoadMSAAPatterns<MIMG opcode, ValueType addr_type> {
def : ImageLoadMSAAPattern <int_SI_imageload, opcode, addr_type>;
def : ImageLoadArrayMSAAPattern <int_SI_imageload, opcode, addr_type>;
}
defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V2, v2i32>;
defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V4, v4i32>;
defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V2, v2i32>;
defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V4, v4i32>;
/* Image resource information */
def : Pat <
(int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
(IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
>;
def : Pat <
(int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
(IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
>;
def : Pat <
(int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA),
(IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
>;
/********** ============================================ **********/
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
foreach Index = 0-2 in {
def Extract_Element_v2i32_#Index : Extract_Element <
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v2i32_#Index : Insert_Element <
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v2f32_#Index : Extract_Element <
f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v2f32_#Index : Insert_Element <
f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
>;
}
foreach Index = 0-3 in {
def Extract_Element_v4i32_#Index : Extract_Element <
i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v4i32_#Index : Insert_Element <
i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v4f32_#Index : Extract_Element <
f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v4f32_#Index : Insert_Element <
f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
>;
}
foreach Index = 0-7 in {
def Extract_Element_v8i32_#Index : Extract_Element <
i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v8i32_#Index : Insert_Element <
i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v8f32_#Index : Extract_Element <
f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v8f32_#Index : Insert_Element <
f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
>;
}
foreach Index = 0-15 in {
def Extract_Element_v16i32_#Index : Extract_Element <
i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v16i32_#Index : Insert_Element <
i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v16f32_#Index : Extract_Element <
f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v16f32_#Index : Insert_Element <
f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
>;
}
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;
def : BitConvert <f32, i32, SReg_32>;
def : BitConvert <f32, i32, VReg_32>;
def : BitConvert <i64, f64, VReg_64>;
def : BitConvert <f64, i64, VReg_64>;
def : BitConvert <v2f32, v2i32, VReg_64>;
def : BitConvert <v2i32, v2f32, VReg_64>;
def : BitConvert <v2i32, i64, VReg_64>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
def : BitConvert <v4i32, i128, VReg_128>;
def : BitConvert <i128, v4i32, VReg_128>;
def : BitConvert <v8i32, v32i8, SReg_256>;
def : BitConvert <v32i8, v8i32, SReg_256>;
def : BitConvert <v8i32, v32i8, VReg_256>;
def : BitConvert <v32i8, v8i32, VReg_256>;
/********** =================== **********/
/********** Src & Dst modifiers **********/
/********** =================== **********/
def : Pat <
(int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
(V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
>;
+/********** ================================ **********/
+/********** Floating point absolute/negative **********/
+/********** ================================ **********/
+
+// Manipulate the sign bit directly, as e.g. using the source negation modifier
+// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0,
+// breaking the piglit *s-floatBitsToInt-neg* tests
+
+// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly
+// removing these patterns
+
def : Pat <
+ (fneg (fabs f32:$src)),
+ (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
+>;
+
+def : Pat <
(fabs f32:$src),
- (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
- 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+ (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */
>;
def : Pat <
(fneg f32:$src),
- (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
- 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
+ (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */
>;
/********** ================== **********/
/********** Immediate Patterns **********/
/********** ================== **********/
def : Pat <
(SGPRImm<(i32 imm)>:$imm),
(S_MOV_B32 imm:$imm)
>;
def : Pat <
(SGPRImm<(f32 fpimm)>:$imm),
(S_MOV_B32 fpimm:$imm)
>;
def : Pat <
(i32 imm:$imm),
(V_MOV_B32_e32 imm:$imm)
>;
def : Pat <
(f32 fpimm:$imm),
(V_MOV_B32_e32 fpimm:$imm)
>;
def : Pat <
(i1 imm:$imm),
(S_MOV_B64 imm:$imm)
>;
def : Pat <
(i64 InlineImm<i64>:$imm),
(S_MOV_B64 InlineImm<i64>:$imm)
>;
// i64 immediates aren't supported in hardware, split it into two 32bit values
def : Pat <
(i64 imm:$imm),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(S_MOV_B32 (i32 (LO32 imm:$imm))), sub0),
(S_MOV_B32 (i32 (HI32 imm:$imm))), sub1)
>;
def : Pat <
(f64 fpimm:$imm),
(INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(V_MOV_B32_e32 (f32 (LO32f fpimm:$imm))), sub0),
(V_MOV_B32_e32 (f32 (HI32f fpimm:$imm))), sub1)
>;
/********** ===================== **********/
/********** Interpolation Paterns **********/
/********** ===================== **********/
def : Pat <
(int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
(V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params)
>;
def : Pat <
(int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
(V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
imm:$attr_chan, imm:$attr, i32:$params),
(EXTRACT_SUBREG $ij, sub1),
imm:$attr_chan, imm:$attr, $params)
>;
/********** ================== **********/
/********** Intrinsic Patterns **********/
/********** ================== **********/
/* llvm.AMDGPU.pow */
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
def : Pat <
(int_AMDGPU_div f32:$src0, f32:$src1),
(V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
>;
def : Pat<
(fdiv f32:$src0, f32:$src1),
(V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1))
>;
def : Pat<
(fdiv f64:$src0, f64:$src1),
(V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0))
>;
def : Pat <
(fcos f32:$src0),
(V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
>;
def : Pat <
(fsin f32:$src0),
(V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
>;
def : Pat <
(int_AMDGPU_cube v4f32:$src),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
(V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0),
(EXTRACT_SUBREG $src, sub1),
(EXTRACT_SUBREG $src, sub2)),
sub0),
(V_CUBESC_F32 (EXTRACT_SUBREG $src, sub0),
(EXTRACT_SUBREG $src, sub1),
(EXTRACT_SUBREG $src, sub2)),
sub1),
(V_CUBEMA_F32 (EXTRACT_SUBREG $src, sub0),
(EXTRACT_SUBREG $src, sub1),
(EXTRACT_SUBREG $src, sub2)),
sub2),
(V_CUBEID_F32 (EXTRACT_SUBREG $src, sub0),
(EXTRACT_SUBREG $src, sub1),
(EXTRACT_SUBREG $src, sub2)),
sub3)
>;
def : Pat <
(i32 (sext i1:$src0)),
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
>;
+def : Pat <
+ (i32 (zext i1:$src0)),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
+>;
+
// 1. Offset as 8bit DWORD immediate
def : Pat <
(SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
(S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset)
>;
// 2. Offset loaded in an 32bit SGPR
def : Pat <
(SIload_constant i128:$sbase, imm:$offset),
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
// 3. Offset in an 32Bit VGPR
def : Pat <
(SIload_constant i128:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
+ (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
>;
// The multiplication scales from [0,1] to the unsigned integer range
def : Pat <
(AMDGPUurecip i32:$src0),
(V_CVT_U32_F32_e32
(V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
>;
def : Pat <
(int_SI_tid),
(V_MBCNT_HI_U32_B32_e32 0xffffffff,
(V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0, 0, 0))
>;
/********** ================== **********/
/********** VOP3 Patterns **********/
/********** ================== **********/
def : Pat <
(f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)),
(V_MAD_F32 $src0, $src1, $src2)
>;
/********** ======================= **********/
/********** Load/Store Patterns **********/
/********** ======================= **********/
class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag i32:$src0),
(vt (inst 0, $src0, $src0, $src0, 0, 0))
>;
def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
def : DSReadPat <DS_READ_B32, i32, local_load>;
def : Pat <
(local_load i32:$src0),
(i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
>;
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag i32:$src1, i32:$src0),
(inst 0, $src0, $src1, $src1, 0, 0)
>;
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
def : DSWritePat <DS_WRITE_B32, i32, local_store>;
def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
(DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>;
def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val),
(DS_SUB_U32_RTN 0, $ptr, $val, 0, 0)>;
/********** ================== **********/
/********** SMRD Patterns **********/
/********** ================== **********/
multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
// 1. Offset as 8bit DWORD immediate
def : Pat <
(constant_load (SIadd64bit32bit i64:$sbase, IMM8bitDWORD:$offset)),
(vt (Instr_IMM $sbase, IMM8bitDWORD:$offset))
>;
// 2. Offset loaded in an 32bit SGPR
def : Pat <
(constant_load (SIadd64bit32bit i64:$sbase, imm:$offset)),
(vt (Instr_SGPR $sbase, (S_MOV_B32 imm:$offset)))
>;
// 3. No offset at all
def : Pat <
(constant_load i64:$sbase),
(vt (Instr_IMM $sbase, 0))
>;
}
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
//===----------------------------------------------------------------------===//
// MUBUF Patterns
//===----------------------------------------------------------------------===//
multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
PatFrag global_ld, PatFrag constant_ld> {
def : Pat <
(vt (global_ld (add i64:$ptr, (i64 IMM12bit:$offset)))),
(Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, (as_i16imm $offset))
>;
def : Pat <
(vt (global_ld i64:$ptr)),
(Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, 0)
>;
def : Pat <
(vt (global_ld (add i64:$ptr, i64:$offset))),
(Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0)
>;
def : Pat <
(vt (constant_ld (add i64:$ptr, i64:$offset))),
(Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0)
>;
}
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32,
sextloadi8_global, sextloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32,
az_extloadi8_global, az_extloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32,
sextloadi16_global, sextloadi16_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32,
az_extloadi16_global, az_extloadi16_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32,
global_load, constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
global_load, constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
az_extloadi32_global, az_extloadi32_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32,
global_load, constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32,
global_load, constant_load>;
multiclass MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> {
def : Pat <
(st vt:$value, i64:$ptr),
(Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, 0)
>;
def : Pat <
(st vt:$value, (add i64:$ptr, i64:$offset)),
(Instr $value, (SI_ADDR64_RSRC $ptr), $offset, 0)
>;
}
defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE, i32, truncstorei8_global>;
defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT, i32, truncstorei16_global>;
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORD, i32, global_store>;
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
+// BUFFER_LOAD_DWORD*, addr64=0
+multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
+ MUBUF bothen> {
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ imm:$offset, 0, 0, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (as_i1imm $slc), (as_i1imm $tfe))
+ >;
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ imm, 1, 0, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+ (as_i1imm $tfe))
+ >;
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ imm:$offset, 0, 1, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (as_i1imm $slc), (as_i1imm $tfe))
+ >;
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
+ imm, 1, 1, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+ (as_i1imm $tfe))
+ >;
+}
+
+defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
+ BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
+defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
+ BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
+defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
+ BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
+
//===----------------------------------------------------------------------===//
// MTBUF Patterns
//===----------------------------------------------------------------------===//
// TBUFFER_STORE_FORMAT_*, addr64=0
class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
(SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
i32:$soffset, imm:$inst_offset, imm:$dfmt,
imm:$nfmt, imm:$offen, imm:$idxen,
imm:$glc, imm:$slc, imm:$tfe),
(opcode
$vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
(as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
(as_i1imm $slc), (as_i1imm $tfe), $soffset)
>;
def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
/********** ====================== **********/
/********** Indirect adressing **********/
/********** ====================== **********/
multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
// 1. Extract with offset
def : Pat<
(vector_extract vt:$vec, (add i32:$idx, imm:$off)),
(f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
>;
// 2. Extract without offset
def : Pat<
(vector_extract vt:$vec, i32:$idx),
(f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
>;
// 3. Insert with offset
def : Pat<
(vector_insert vt:$vec, f32:$val, (add i32:$idx, imm:$off)),
(IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
(vector_insert vt:$vec, f32:$val, i32:$idx),
(IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
>;
}
defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>;
defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>;
defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>;
defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>;
/********** =============== **********/
/********** Conditions **********/
/********** =============== **********/
def : Pat<
(i1 (setcc f32:$src0, f32:$src1, SETO)),
(V_CMP_O_F32_e64 $src0, $src1)
>;
def : Pat<
(i1 (setcc f32:$src0, f32:$src1, SETUO)),
(V_CMP_U_F32_e64 $src0, $src1)
>;
//===----------------------------------------------------------------------===//
// Miscellaneous Patterns
//===----------------------------------------------------------------------===//
def : Pat <
(i64 (trunc i128:$x)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(i32 (EXTRACT_SUBREG $x, sub0)), sub0),
(i32 (EXTRACT_SUBREG $x, sub1)), sub1)
>;
def : Pat <
(i32 (trunc i64:$a)),
(EXTRACT_SUBREG $a, sub0)
+>;
+
+def : Pat <
+ (i1 (trunc i32:$a)),
+ (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
>;
// V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector
// case, the sgpr-copies pass will fix this to use the vector version.
def : Pat <
(i32 (addc i32:$src0, i32:$src1)),
(S_ADD_I32 $src0, $src1)
>;
def : Pat <
(or i64:$a, i64:$b),
(INSERT_SUBREG
(INSERT_SUBREG (IMPLICIT_DEF),
(V_OR_B32_e32 (EXTRACT_SUBREG $a, sub0), (EXTRACT_SUBREG $b, sub0)), sub0),
(V_OR_B32_e32 (EXTRACT_SUBREG $a, sub1), (EXTRACT_SUBREG $b, sub1)), sub1)
>;
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>;
} // End isSI predicate
Index: head/contrib/llvm/lib/Target/R600/SIIntrinsics.td
===================================================================
--- head/contrib/llvm/lib/Target/R600/SIIntrinsics.td (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/SIIntrinsics.td (revision 265925)
@@ -1,66 +1,82 @@
//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// SI Intrinsic Definitions
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
// Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
def int_SI_tbuffer_store : Intrinsic <
[],
[llvm_anyint_ty, // rsrc(SGPR)
llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
llvm_i32_ty, // vaddr(VGPR)
llvm_i32_ty, // soffset(SGPR)
llvm_i32_ty, // inst_offset(imm)
llvm_i32_ty, // dfmt(imm)
llvm_i32_ty, // nfmt(imm)
llvm_i32_ty, // offen(imm)
llvm_i32_ty, // idxen(imm)
llvm_i32_ty, // glc(imm)
llvm_i32_ty, // slc(imm)
llvm_i32_ty], // tfe(imm)
[]>;
+ // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
+ def int_SI_buffer_load_dword : Intrinsic <
+ [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
+ [llvm_anyint_ty, // rsrc(SGPR)
+ llvm_anyint_ty, // vaddr(VGPR)
+ llvm_i32_ty, // soffset(SGPR)
+ llvm_i32_ty, // inst_offset(imm)
+ llvm_i32_ty, // offen(imm)
+ llvm_i32_ty, // idxen(imm)
+ llvm_i32_ty, // glc(imm)
+ llvm_i32_ty, // slc(imm)
+ llvm_i32_ty], // tfe(imm)
+ [IntrReadArgMem]>;
+
+ def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_sample : Sample;
def int_SI_sampleb : Sample;
def int_SI_sampled : Sample;
def int_SI_samplel : Sample;
def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
/* Interpolation Intrinsics */
def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>;
/* Control flow Intrinsics */
def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>;
def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>;
def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>;
def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>;
}
Index: head/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
===================================================================
--- head/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp (revision 265925)
@@ -1,527 +1,545 @@
//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief This pass lowers the pseudo control flow instructions to real
/// machine instructions.
///
/// All control flow is handled using predicated instructions and
/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
/// by writting to the 64-bit EXEC register (each bit corresponds to a
/// single vector ALU). Typically, for predicates, a vector ALU will write
/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
/// Vector ALU) and then the ScalarALU will AND the VCC register with the
/// EXEC to update the predicates.
///
/// For example:
/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
/// %SGPR0 = SI_IF %VCC
/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
/// %SGPR0 = SI_ELSE %SGPR0
/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
/// SI_END_CF %SGPR0
///
/// becomes:
///
/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask
/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
/// S_CBRANCH_EXECZ label0 // This instruction is an optional
/// // optimization which allows us to
/// // branch if all the bits of
/// // EXEC are zero.
/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
///
/// label0:
/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block
/// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
/// S_BRANCH_EXECZ label1 // Use our branch optimization
/// // instruction again.
/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
/// label1:
/// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
class SILowerControlFlowPass : public MachineFunctionPass {
private:
static const unsigned SkipThreshold = 12;
static char ID;
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
void Skip(MachineInstr &From, MachineOperand &To);
void SkipIfDead(MachineInstr &MI);
void If(MachineInstr &MI);
void Else(MachineInstr &MI);
void Break(MachineInstr &MI);
void IfBreak(MachineInstr &MI);
void ElseBreak(MachineInstr &MI);
void Loop(MachineInstr &MI);
void EndCf(MachineInstr &MI);
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
public:
SILowerControlFlowPass(TargetMachine &tm) :
MachineFunctionPass(ID), TRI(0), TII(0) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const {
return "SI Lower control flow instructions";
}
};
} // End anonymous namespace
char SILowerControlFlowPass::ID = 0;
FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
return new SILowerControlFlowPass(tm);
}
+static bool isDS(unsigned Opcode) {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::DS_ADD_U32_RTN:
+ case AMDGPU::DS_SUB_U32_RTN:
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B8:
+ case AMDGPU::DS_WRITE_B16:
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_I8:
+ case AMDGPU::DS_READ_U8:
+ case AMDGPU::DS_READ_I16:
+ case AMDGPU::DS_READ_U16:
+ return true;
+ }
+}
+
bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
MachineBasicBlock *To) {
unsigned NumInstr = 0;
for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
MBB = *MBB->succ_begin()) {
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
NumInstr < SkipThreshold && I != E; ++I) {
if (I->isBundle() || !I->isBundled())
if (++NumInstr >= SkipThreshold)
return true;
}
}
return false;
}
void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
return;
DebugLoc DL = From.getDebugLoc();
BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
.addOperand(To)
.addReg(AMDGPU::EXEC);
}
void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
- if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+ if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType !=
+ ShaderType::PIXEL ||
+ !shouldSkip(&MBB, &MBB.getParent()->back()))
return;
MachineBasicBlock::iterator Insert = &MI;
++Insert;
// If the exec mask is non-zero, skip the next two instructions
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
.addImm(3)
.addReg(AMDGPU::EXEC);
// Exec mask is zero: Export to NULL target...
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
.addImm(0)
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
.addImm(0)
.addImm(1)
.addImm(1)
.addReg(AMDGPU::VGPR0)
.addReg(AMDGPU::VGPR0)
.addReg(AMDGPU::VGPR0)
.addReg(AMDGPU::VGPR0);
// ... and terminate wavefront
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
}
void SILowerControlFlowPass::If(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Reg = MI.getOperand(0).getReg();
unsigned Vcc = MI.getOperand(1).getReg();
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
.addReg(Vcc);
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
.addReg(AMDGPU::EXEC)
.addReg(Reg);
Skip(MI, MI.getOperand(2));
MI.eraseFromParent();
}
void SILowerControlFlowPass::Else(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
unsigned Src = MI.getOperand(1).getReg();
BuildMI(MBB, MBB.getFirstNonPHI(), DL,
TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
.addReg(Src); // Saved EXEC
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
.addReg(AMDGPU::EXEC)
.addReg(Dst);
Skip(MI, MI.getOperand(2));
MI.eraseFromParent();
}
void SILowerControlFlowPass::Break(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
unsigned Src = MI.getOperand(1).getReg();
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
.addReg(AMDGPU::EXEC)
.addReg(Src);
MI.eraseFromParent();
}
void SILowerControlFlowPass::IfBreak(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
unsigned Vcc = MI.getOperand(1).getReg();
unsigned Src = MI.getOperand(2).getReg();
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
.addReg(Vcc)
.addReg(Src);
MI.eraseFromParent();
}
void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
unsigned Saved = MI.getOperand(1).getReg();
unsigned Src = MI.getOperand(2).getReg();
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
.addReg(Saved)
.addReg(Src);
MI.eraseFromParent();
}
void SILowerControlFlowPass::Loop(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Src = MI.getOperand(0).getReg();
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
.addReg(AMDGPU::EXEC)
.addReg(Src);
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
.addOperand(MI.getOperand(1))
.addReg(AMDGPU::EXEC);
MI.eraseFromParent();
}
void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Reg = MI.getOperand(0).getReg();
BuildMI(MBB, MBB.getFirstNonPHI(), DL,
TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
.addReg(AMDGPU::EXEC)
.addReg(Reg);
MI.eraseFromParent();
}
void SILowerControlFlowPass::Branch(MachineInstr &MI) {
MachineBasicBlock *Next = MI.getParent()->getNextNode();
MachineBasicBlock *Target = MI.getOperand(0).getMBB();
if (Target == Next)
MI.eraseFromParent();
else
assert(0);
}
void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
- // Kill is only allowed in pixel shaders
+ // Kill is only allowed in pixel / geometry shaders
assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
- ShaderType::PIXEL);
+ ShaderType::PIXEL ||
+ MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
+ ShaderType::GEOMETRY);
// Clear this pixel from the exec mask if the operand is negative
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
.addImm(0)
.addOperand(MI.getOperand(0));
MI.eraseFromParent();
}
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock::iterator I = MI;
unsigned Save = MI.getOperand(1).getReg();
unsigned Idx = MI.getOperand(3).getReg();
if (AMDGPU::SReg_32RegClass.contains(Idx)) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addReg(Idx);
MBB.insert(I, MovRel);
MI.eraseFromParent();
return;
}
assert(AMDGPU::SReg_64RegClass.contains(Save));
assert(AMDGPU::VReg_32RegClass.contains(Idx));
// Save the EXEC mask
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
.addReg(AMDGPU::EXEC);
// Read the next variant into VCC (lower 32 bits) <- also loop target
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
.addReg(Idx);
// Move index from VCC into M0
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addReg(AMDGPU::VCC);
// Compare the just read M0 value to all possible Idx values
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
.addReg(AMDGPU::M0)
.addReg(Idx);
// Update EXEC, save the original EXEC value to VCC
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
.addReg(AMDGPU::VCC);
// Do the actual move
MBB.insert(I, MovRel);
// Update EXEC, switch all done bits to 0 and all todo bits to 1
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
.addReg(AMDGPU::EXEC)
.addReg(AMDGPU::VCC);
// Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
.addImm(-7)
.addReg(AMDGPU::EXEC);
// Restore EXEC
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
.addReg(Save);
MI.eraseFromParent();
}
void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
unsigned Vec = MI.getOperand(2).getReg();
unsigned Off = MI.getOperand(4).getImm();
unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
if (!SubReg)
SubReg = Vec;
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
.addReg(SubReg + Off)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
LoadM0(MI, MovRel);
}
void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
unsigned Off = MI.getOperand(4).getImm();
unsigned Val = MI.getOperand(5).getReg();
unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
if (!SubReg)
SubReg = Dst;
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
.addReg(SubReg + Off, RegState::Define)
.addReg(Val)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
LoadM0(MI, MovRel);
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
bool NeedM0 = false;
bool NeedWQM = false;
unsigned Depth = 0;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
I != MBB.end(); I = Next) {
Next = llvm::next(I);
MachineInstr &MI = *I;
+ if (isDS(MI.getOpcode())) {
+ NeedM0 = true;
+ NeedWQM = true;
+ }
+
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF:
++Depth;
If(MI);
break;
case AMDGPU::SI_ELSE:
Else(MI);
break;
case AMDGPU::SI_BREAK:
Break(MI);
break;
case AMDGPU::SI_IF_BREAK:
IfBreak(MI);
break;
case AMDGPU::SI_ELSE_BREAK:
ElseBreak(MI);
break;
case AMDGPU::SI_LOOP:
++Depth;
Loop(MI);
break;
case AMDGPU::SI_END_CF:
if (--Depth == 0 && HaveKill) {
SkipIfDead(MI);
HaveKill = false;
}
EndCf(MI);
break;
case AMDGPU::SI_KILL:
if (Depth == 0)
SkipIfDead(MI);
else
HaveKill = true;
Kill(MI);
break;
case AMDGPU::S_BRANCH:
Branch(MI);
break;
case AMDGPU::SI_INDIRECT_SRC:
IndirectSrc(MI);
break;
case AMDGPU::SI_INDIRECT_DST_V1:
case AMDGPU::SI_INDIRECT_DST_V2:
case AMDGPU::SI_INDIRECT_DST_V4:
case AMDGPU::SI_INDIRECT_DST_V8:
case AMDGPU::SI_INDIRECT_DST_V16:
IndirectDst(MI);
break;
- case AMDGPU::DS_READ_B32:
- NeedWQM = true;
- // Fall through
- case AMDGPU::DS_WRITE_B32:
- case AMDGPU::DS_ADD_U32_RTN:
- NeedM0 = true;
- break;
-
case AMDGPU::V_INTERP_P1_F32:
case AMDGPU::V_INTERP_P2_F32:
case AMDGPU::V_INTERP_MOV_F32:
NeedWQM = true;
break;
}
}
}
if (NeedM0) {
MachineBasicBlock &MBB = MF.front();
// Initialize M0 to a value that won't cause LDS access to be discarded
// due to offset clamping
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32),
AMDGPU::M0).addImm(0xffffffff);
}
- if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) {
+ if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC).addReg(AMDGPU::EXEC);
}
return true;
}
Index: head/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===================================================================
--- head/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp (revision 265925)
@@ -1,2684 +1,2705 @@
//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/X86BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
struct X86Operand;
static const char OpPrecedence[] = {
0, // IC_PLUS
0, // IC_MINUS
1, // IC_MULTIPLY
1, // IC_DIVIDE
2, // IC_RPAREN
3, // IC_LPAREN
0, // IC_IMM
0 // IC_REGISTER
};
class X86AsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
ParseInstructionInfo *InstInfo;
private:
enum InfixCalculatorTok {
IC_PLUS = 0,
IC_MINUS,
IC_MULTIPLY,
IC_DIVIDE,
IC_RPAREN,
IC_LPAREN,
IC_IMM,
IC_REGISTER
};
class InfixCalculator {
typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
SmallVector<ICToken, 4> PostfixStack;
public:
int64_t popOperand() {
assert (!PostfixStack.empty() && "Poped an empty stack!");
ICToken Op = PostfixStack.pop_back_val();
assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
&& "Expected and immediate or register!");
return Op.second;
}
void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
assert ((Op == IC_IMM || Op == IC_REGISTER) &&
"Unexpected operand!");
PostfixStack.push_back(std::make_pair(Op, Val));
}
void popOperator() { InfixOperatorStack.pop_back(); }
void pushOperator(InfixCalculatorTok Op) {
// Push the new operator if the stack is empty.
if (InfixOperatorStack.empty()) {
InfixOperatorStack.push_back(Op);
return;
}
// Push the new operator if it has a higher precedence than the operator
// on the top of the stack or the operator on the top of the stack is a
// left parentheses.
unsigned Idx = InfixOperatorStack.size() - 1;
InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
InfixOperatorStack.push_back(Op);
return;
}
// The operator on the top of the stack has higher precedence than the
// new operator.
unsigned ParenCount = 0;
while (1) {
// Nothing to process.
if (InfixOperatorStack.empty())
break;
Idx = InfixOperatorStack.size() - 1;
StackOp = InfixOperatorStack[Idx];
if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
break;
// If we have an even parentheses count and we see a left parentheses,
// then stop processing.
if (!ParenCount && StackOp == IC_LPAREN)
break;
if (StackOp == IC_RPAREN) {
++ParenCount;
InfixOperatorStack.pop_back();
} else if (StackOp == IC_LPAREN) {
--ParenCount;
InfixOperatorStack.pop_back();
} else {
InfixOperatorStack.pop_back();
PostfixStack.push_back(std::make_pair(StackOp, 0));
}
}
// Push the new operator.
InfixOperatorStack.push_back(Op);
}
int64_t execute() {
// Push any remaining operators onto the postfix stack.
while (!InfixOperatorStack.empty()) {
InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
PostfixStack.push_back(std::make_pair(StackOp, 0));
}
if (PostfixStack.empty())
return 0;
SmallVector<ICToken, 16> OperandStack;
for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
ICToken Op = PostfixStack[i];
if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
OperandStack.push_back(Op);
} else {
assert (OperandStack.size() > 1 && "Too few operands.");
int64_t Val;
ICToken Op2 = OperandStack.pop_back_val();
ICToken Op1 = OperandStack.pop_back_val();
switch (Op.first) {
default:
report_fatal_error("Unexpected operator!");
break;
case IC_PLUS:
Val = Op1.second + Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_MINUS:
Val = Op1.second - Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_MULTIPLY:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Multiply operation with an immediate and a register!");
Val = Op1.second * Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_DIVIDE:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Divide operation with an immediate and a register!");
assert (Op2.second != 0 && "Division by zero!");
Val = Op1.second / Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
}
}
}
assert (OperandStack.size() == 1 && "Expected a single result.");
return OperandStack.pop_back_val().second;
}
};
enum IntelExprState {
IES_PLUS,
IES_MINUS,
IES_MULTIPLY,
IES_DIVIDE,
IES_LBRAC,
IES_RBRAC,
IES_LPAREN,
IES_RPAREN,
IES_REGISTER,
IES_INTEGER,
IES_IDENTIFIER,
IES_ERROR
};
class IntelExprStateMachine {
IntelExprState State, PrevState;
unsigned BaseReg, IndexReg, TmpReg, Scale;
int64_t Imm;
const MCExpr *Sym;
StringRef SymName;
bool StopOnLBrac, AddImmPrefix;
InfixCalculator IC;
InlineAsmIdentifierInfo Info;
public:
IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
AddImmPrefix(addimmprefix) { Info.clear(); }
unsigned getBaseReg() { return BaseReg; }
unsigned getIndexReg() { return IndexReg; }
unsigned getScale() { return Scale; }
const MCExpr *getSym() { return Sym; }
StringRef getSymName() { return SymName; }
int64_t getImm() { return Imm + IC.execute(); }
bool isValidEndState() {
return State == IES_RBRAC || State == IES_INTEGER;
}
bool getStopOnLBrac() { return StopOnLBrac; }
bool getAddImmPrefix() { return AddImmPrefix; }
bool hadError() { return State == IES_ERROR; }
InlineAsmIdentifierInfo &getIdentifierInfo() {
return Info;
}
void onPlus() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_RPAREN:
case IES_REGISTER:
State = IES_PLUS;
IC.pushOperator(IC_PLUS);
if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
// If we already have a BaseReg, then assume this is the IndexReg with
// a scale of 1.
if (!BaseReg) {
BaseReg = TmpReg;
} else {
assert (!IndexReg && "BaseReg/IndexReg already set!");
IndexReg = TmpReg;
Scale = 1;
}
}
break;
}
PrevState = CurrState;
}
void onMinus() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_PLUS:
case IES_MULTIPLY:
case IES_DIVIDE:
case IES_LPAREN:
case IES_RPAREN:
case IES_LBRAC:
case IES_RBRAC:
case IES_INTEGER:
case IES_REGISTER:
State = IES_MINUS;
// Only push the minus operator if it is not a unary operator.
if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
CurrState == IES_LPAREN || CurrState == IES_LBRAC))
IC.pushOperator(IC_MINUS);
if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
// If we already have a BaseReg, then assume this is the IndexReg with
// a scale of 1.
if (!BaseReg) {
BaseReg = TmpReg;
} else {
assert (!IndexReg && "BaseReg/IndexReg already set!");
IndexReg = TmpReg;
Scale = 1;
}
}
break;
}
PrevState = CurrState;
}
void onRegister(unsigned Reg) {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_PLUS:
case IES_LPAREN:
State = IES_REGISTER;
TmpReg = Reg;
IC.pushOperand(IC_REGISTER);
break;
case IES_MULTIPLY:
// Index Register - Scale * Register
if (PrevState == IES_INTEGER) {
assert (!IndexReg && "IndexReg already set!");
State = IES_REGISTER;
IndexReg = Reg;
// Get the scale and replace the 'Scale * Register' with '0'.
Scale = IC.popOperand();
IC.pushOperand(IC_IMM);
IC.popOperator();
} else {
State = IES_ERROR;
}
break;
}
PrevState = CurrState;
}
void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_PLUS:
case IES_MINUS:
State = IES_INTEGER;
Sym = SymRef;
SymName = SymRefName;
IC.pushOperand(IC_IMM);
break;
}
}
void onInteger(int64_t TmpInt) {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_PLUS:
case IES_MINUS:
case IES_DIVIDE:
case IES_MULTIPLY:
case IES_LPAREN:
State = IES_INTEGER;
if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
// Index Register - Register * Scale
assert (!IndexReg && "IndexReg already set!");
IndexReg = TmpReg;
Scale = TmpInt;
// Get the scale and replace the 'Register * Scale' with '0'.
IC.popOperator();
} else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
CurrState == IES_MINUS) {
// Unary minus. No need to pop the minus operand because it was never
// pushed.
IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
} else {
IC.pushOperand(IC_IMM, TmpInt);
}
break;
}
PrevState = CurrState;
}
void onStar() {
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_REGISTER:
case IES_RPAREN:
State = IES_MULTIPLY;
IC.pushOperator(IC_MULTIPLY);
break;
}
}
void onDivide() {
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_RPAREN:
State = IES_DIVIDE;
IC.pushOperator(IC_DIVIDE);
break;
}
}
void onLBrac() {
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_RBRAC:
State = IES_PLUS;
IC.pushOperator(IC_PLUS);
break;
}
}
void onRBrac() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_REGISTER:
case IES_RPAREN:
State = IES_RBRAC;
if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
// If we already have a BaseReg, then assume this is the IndexReg with
// a scale of 1.
if (!BaseReg) {
BaseReg = TmpReg;
} else {
assert (!IndexReg && "BaseReg/IndexReg already set!");
IndexReg = TmpReg;
Scale = 1;
}
}
break;
}
PrevState = CurrState;
}
void onLParen() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_PLUS:
case IES_MINUS:
case IES_MULTIPLY:
case IES_DIVIDE:
case IES_LPAREN:
// FIXME: We don't handle this type of unary minus, yet.
if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
CurrState == IES_MINUS) {
State = IES_ERROR;
break;
}
State = IES_LPAREN;
IC.pushOperator(IC_LPAREN);
break;
}
PrevState = CurrState;
}
void onRParen() {
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_REGISTER:
case IES_RPAREN:
State = IES_RPAREN;
IC.pushOperator(IC_RPAREN);
break;
}
}
};
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
bool Error(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None,
bool MatchingInlineAsm = false) {
if (MatchingInlineAsm) return true;
return Parser.Error(L, Msg, Ranges);
}
X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
Error(Loc, Msg);
return 0;
}
X86Operand *ParseOperand();
X86Operand *ParseATTOperand();
X86Operand *ParseIntelOperand();
X86Operand *ParseIntelOffsetOfOperator();
bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
X86Operand *ParseIntelOperator(unsigned OpKind);
X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
unsigned Size);
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
int64_t ImmDisp, unsigned Size);
bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc Start, SMLoc End,
unsigned Size, StringRef Identifier,
InlineAsmIdentifierInfo &Info);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
bool processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm);
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
/// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
bool isSrcOp(X86Operand &Op);
/// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
/// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
bool isDstOp(X86Operand &Op);
bool is64BitMode() const {
// FIXME: Can tablegen auto-generate this?
return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
}
void SwitchMode() {
unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
setAvailableFeatures(FB);
}
bool isParsingIntelSyntax() {
return getParser().getAssemblerDialect();
}
/// @name Auto-generated Matcher Functions
/// {
#define GET_ASSEMBLER_HEADER
#include "X86GenAsmMatcher.inc"
/// }
public:
X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII)
: MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
};
} // end anonymous namespace
/// @name Auto-generated Match Functions
/// {
static unsigned MatchRegisterName(StringRef Name);
/// }
static bool isImmSExti16i8Value(uint64_t Value) {
return (( Value <= 0x000000000000007FULL)||
(0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
static bool isImmSExti32i8Value(uint64_t Value) {
return (( Value <= 0x000000000000007FULL)||
(0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
static bool isImmZExtu32u8Value(uint64_t Value) {
return (Value <= 0x00000000000000FFULL);
}
static bool isImmSExti64i8Value(uint64_t Value) {
return (( Value <= 0x000000000000007FULL)||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
static bool isImmSExti64i32Value(uint64_t Value) {
return (( Value <= 0x000000007FFFFFFFULL)||
(0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
namespace {
/// X86Operand - Instances of this class represent a parsed X86 machine
/// instruction.
struct X86Operand : public MCParsedAsmOperand {
enum KindTy {
Token,
Register,
Immediate,
Memory
} Kind;
SMLoc StartLoc, EndLoc;
SMLoc OffsetOfLoc;
StringRef SymName;
void *OpDecl;
bool AddressOf;
struct TokOp {
const char *Data;
unsigned Length;
};
struct RegOp {
unsigned RegNo;
};
struct ImmOp {
const MCExpr *Val;
};
struct MemOp {
unsigned SegReg;
const MCExpr *Disp;
unsigned BaseReg;
unsigned IndexReg;
unsigned Scale;
unsigned Size;
};
union {
struct TokOp Tok;
struct RegOp Reg;
struct ImmOp Imm;
struct MemOp Mem;
};
X86Operand(KindTy K, SMLoc Start, SMLoc End)
: Kind(K), StartLoc(Start), EndLoc(End) {}
StringRef getSymName() { return SymName; }
void *getOpDecl() { return OpDecl; }
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
/// getLocRange - Get the range between the first and last token of this
/// operand.
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
/// getOffsetOfLoc - Get the location of the offset operator.
SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
virtual void print(raw_ostream &OS) const {}
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
}
void setTokenValue(StringRef Value) {
assert(Kind == Token && "Invalid access!");
Tok.Data = Value.data();
Tok.Length = Value.size();
}
unsigned getReg() const {
assert(Kind == Register && "Invalid access!");
return Reg.RegNo;
}
const MCExpr *getImm() const {
assert(Kind == Immediate && "Invalid access!");
return Imm.Val;
}
const MCExpr *getMemDisp() const {
assert(Kind == Memory && "Invalid access!");
return Mem.Disp;
}
unsigned getMemSegReg() const {
assert(Kind == Memory && "Invalid access!");
return Mem.SegReg;
}
unsigned getMemBaseReg() const {
assert(Kind == Memory && "Invalid access!");
return Mem.BaseReg;
}
unsigned getMemIndexReg() const {
assert(Kind == Memory && "Invalid access!");
return Mem.IndexReg;
}
unsigned getMemScale() const {
assert(Kind == Memory && "Invalid access!");
return Mem.Scale;
}
bool isToken() const {return Kind == Token; }
bool isImm() const { return Kind == Immediate; }
bool isImmSExti16i8() const {
if (!isImm())
return false;
// If this isn't a constant expr, just assume it fits and let relaxation
// handle it.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE)
return true;
// Otherwise, check the value is in a range that makes sense for this
// extension.
return isImmSExti16i8Value(CE->getValue());
}
bool isImmSExti32i8() const {
if (!isImm())
return false;
// If this isn't a constant expr, just assume it fits and let relaxation
// handle it.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE)
return true;
// Otherwise, check the value is in a range that makes sense for this
// extension.
return isImmSExti32i8Value(CE->getValue());
}
bool isImmZExtu32u8() const {
if (!isImm())
return false;
// If this isn't a constant expr, just assume it fits and let relaxation
// handle it.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE)
return true;
// Otherwise, check the value is in a range that makes sense for this
// extension.
return isImmZExtu32u8Value(CE->getValue());
}
bool isImmSExti64i8() const {
if (!isImm())
return false;
// If this isn't a constant expr, just assume it fits and let relaxation
// handle it.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE)
return true;
// Otherwise, check the value is in a range that makes sense for this
// extension.
return isImmSExti64i8Value(CE->getValue());
}
bool isImmSExti64i32() const {
if (!isImm())
return false;
// If this isn't a constant expr, just assume it fits and let relaxation
// handle it.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE)
return true;
// Otherwise, check the value is in a range that makes sense for this
// extension.
return isImmSExti64i32Value(CE->getValue());
}
bool isOffsetOf() const {
return OffsetOfLoc.getPointer();
}
bool needAddressOf() const {
return AddressOf;
}
bool isMem() const { return Kind == Memory; }
bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
}
bool isMem16() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 16);
}
bool isMem32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32);
}
bool isMem64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64);
}
bool isMem80() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 80);
}
bool isMem128() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 128);
}
bool isMem256() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 256);
}
bool isMem512() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 512);
}
bool isMemVX32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
}
bool isMemVY32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
}
bool isMemVX64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
}
bool isMemVY64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
}
bool isMemVZ32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
}
bool isMemVZ64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
}
bool isAbsMem() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1;
}
bool isMemOffs8() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 8);
}
bool isMemOffs16() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 16);
}
bool isMemOffs32() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 32);
}
bool isMemOffs64() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 64);
}
bool isReg() const { return Kind == Register; }
bool isGR32orGR64() const {
return Kind == Register &&
(X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
}
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible.
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
else
Inst.addOperand(MCOperand::CreateExpr(Expr));
}
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
static unsigned getGR32FromGR64(unsigned RegNo) {
switch (RegNo) {
default: llvm_unreachable("Unexpected register");
case X86::RAX: return X86::EAX;
case X86::RCX: return X86::ECX;
case X86::RDX: return X86::EDX;
case X86::RBX: return X86::EBX;
case X86::RBP: return X86::EBP;
case X86::RSP: return X86::ESP;
case X86::RSI: return X86::ESI;
case X86::RDI: return X86::EDI;
case X86::R8: return X86::R8D;
case X86::R9: return X86::R9D;
case X86::R10: return X86::R10D;
case X86::R11: return X86::R11D;
case X86::R12: return X86::R12D;
case X86::R13: return X86::R13D;
case X86::R14: return X86::R14D;
case X86::R15: return X86::R15D;
case X86::RIP: return X86::EIP;
}
}
void addGR32orGR64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
unsigned RegNo = getReg();
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
RegNo = getGR32FromGR64(RegNo);
Inst.addOperand(MCOperand::CreateReg(RegNo));
}
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
void addMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 5) && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
Inst.addOperand(MCOperand::CreateImm(getMemScale()));
Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
addExpr(Inst, getMemDisp());
Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
}
void addAbsMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 1) && "Invalid number of operands!");
// Add as immediates when possible.
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
else
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
}
void addMemOffsOperands(MCInst &Inst, unsigned N) const {
assert((N == 1) && "Invalid number of operands!");
// Add as immediates when possible.
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
else
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
}
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
return Res;
}
static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
bool AddressOf = false,
SMLoc OffsetOfLoc = SMLoc(),
StringRef SymName = StringRef(),
void *OpDecl = 0) {
X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
Res->AddressOf = AddressOf;
Res->OffsetOfLoc = OffsetOfLoc;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
return Res;
}
static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
Res->Imm.Val = Val;
return Res;
}
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
unsigned Size = 0, StringRef SymName = StringRef(),
void *OpDecl = 0) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = 0;
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
Res->AddressOf = false;
return Res;
}
/// Create a generalized memory operand.
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
unsigned Size = 0,
StringRef SymName = StringRef(),
void *OpDecl = 0) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
// The scale should always be one of {1,2,4,8}.
assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
"Invalid scale!");
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = SegReg;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = BaseReg;
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
Res->AddressOf = false;
return Res;
}
};
} // end anonymous namespace.
bool X86AsmParser::isSrcOp(X86Operand &Op) {
unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
return (Op.isMem() &&
(Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
}
bool X86AsmParser::isDstOp(X86Operand &Op) {
unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
return Op.isMem() &&
(Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
}
bool X86AsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
RegNo = 0;
const AsmToken &PercentTok = Parser.getTok();
StartLoc = PercentTok.getLoc();
// If we encounter a %, ignore it. This code handles registers with and
// without the prefix, unprefixed registers can occur in cfi directives.
if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
Parser.Lex(); // Eat percent token.
const AsmToken &Tok = Parser.getTok();
EndLoc = Tok.getEndLoc();
if (Tok.isNot(AsmToken::Identifier)) {
if (isParsingIntelSyntax()) return true;
return Error(StartLoc, "invalid register name",
SMRange(StartLoc, EndLoc));
}
RegNo = MatchRegisterName(Tok.getString());
// If the match failed, try the register name as lowercase.
if (RegNo == 0)
RegNo = MatchRegisterName(Tok.getString().lower());
if (!is64BitMode()) {
// FIXME: This should be done using Requires<In32BitMode> and
// Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
// checked.
// FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
// REX prefix.
if (RegNo == X86::RIZ ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
X86II::isX86_64NonExtLowByteReg(RegNo) ||
X86II::isX86_64ExtendedReg(RegNo))
return Error(StartLoc, "register %"
+ Tok.getString() + " is only available in 64-bit mode",
SMRange(StartLoc, EndLoc));
}
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
RegNo = X86::ST0;
Parser.Lex(); // Eat 'st'
// Check to see if we have '(4)' after %st.
if (getLexer().isNot(AsmToken::LParen))
return false;
// Lex the paren.
getParser().Lex();
const AsmToken &IntTok = Parser.getTok();
if (IntTok.isNot(AsmToken::Integer))
return Error(IntTok.getLoc(), "expected stack index");
switch (IntTok.getIntVal()) {
case 0: RegNo = X86::ST0; break;
case 1: RegNo = X86::ST1; break;
case 2: RegNo = X86::ST2; break;
case 3: RegNo = X86::ST3; break;
case 4: RegNo = X86::ST4; break;
case 5: RegNo = X86::ST5; break;
case 6: RegNo = X86::ST6; break;
case 7: RegNo = X86::ST7; break;
default: return Error(IntTok.getLoc(), "invalid stack index");
}
if (getParser().Lex().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat ')'
return false;
}
EndLoc = Parser.getTok().getEndLoc();
// If this is "db[0-7]", match it as an alias
// for dr[0-7].
if (RegNo == 0 && Tok.getString().size() == 3 &&
Tok.getString().startswith("db")) {
switch (Tok.getString()[2]) {
case '0': RegNo = X86::DR0; break;
case '1': RegNo = X86::DR1; break;
case '2': RegNo = X86::DR2; break;
case '3': RegNo = X86::DR3; break;
case '4': RegNo = X86::DR4; break;
case '5': RegNo = X86::DR5; break;
case '6': RegNo = X86::DR6; break;
case '7': RegNo = X86::DR7; break;
}
if (RegNo != 0) {
EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat it.
return false;
}
}
if (RegNo == 0) {
if (isParsingIntelSyntax()) return true;
return Error(StartLoc, "invalid register name",
SMRange(StartLoc, EndLoc));
}
Parser.Lex(); // Eat identifier token.
return false;
}
X86Operand *X86AsmParser::ParseOperand() {
if (isParsingIntelSyntax())
return ParseIntelOperand();
return ParseATTOperand();
}
/// getIntelMemOperandSize - Return intel memory operand size.
static unsigned getIntelMemOperandSize(StringRef OpStr) {
unsigned Size = StringSwitch<unsigned>(OpStr)
.Cases("BYTE", "byte", 8)
.Cases("WORD", "word", 16)
.Cases("DWORD", "dword", 32)
.Cases("QWORD", "qword", 64)
.Cases("XWORD", "xword", 80)
.Cases("XMMWORD", "xmmword", 128)
.Cases("YMMWORD", "ymmword", 256)
.Default(0);
return Size;
}
X86Operand *
X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc Start, SMLoc End,
unsigned Size, StringRef Identifier,
InlineAsmIdentifierInfo &Info){
- if (isa<MCSymbolRefExpr>(Disp)) {
- // If this is not a VarDecl then assume it is a FuncDecl or some other label
- // reference. We need an 'r' constraint here, so we need to create register
- // operand to ensure proper matching. Just pick a GPR based on the size of
- // a pointer.
- if (!Info.IsVarDecl) {
- unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
- return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
- SMLoc(), Identifier, Info.OpDecl);
- }
+ // If this is not a VarDecl then assume it is a FuncDecl or some other label
+ // reference. We need an 'r' constraint here, so we need to create register
+ // operand to ensure proper matching. Just pick a GPR based on the size of
+ // a pointer.
+ if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
+ SMLoc(), Identifier, Info.OpDecl);
+ }
+
+ // We either have a direct symbol reference, or an offset from a symbol. The
+ // parser always puts the symbol on the LHS, so look there for size
+ // calculation purposes.
+ const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
+ bool IsSymRef =
+ isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
+ if (IsSymRef) {
if (!Size) {
Size = Info.Type * 8; // Size is in terms of bits in this context.
if (Size)
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
/*Len=*/0, Size));
}
}
// When parsing inline assembly we set the base register to a non-zero value
// if we don't know the actual value at this time. This is necessary to
// get the matching correct in some cases.
BaseReg = BaseReg ? BaseReg : 1;
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
End, Size, Identifier, Info.OpDecl);
}
static void
RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
StringRef SymName, int64_t ImmDisp,
int64_t FinalImmDisp, SMLoc &BracLoc,
SMLoc &StartInBrac, SMLoc &End) {
// Remove the '[' and ']' from the IR string.
AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
// If ImmDisp is non-zero, then we parsed a displacement before the
// bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
// If ImmDisp doesn't match the displacement computed by the state machine
// then we have an additional displacement in the bracketed expression.
if (ImmDisp != FinalImmDisp) {
if (ImmDisp) {
// We have an immediate displacement before the bracketed expression.
// Adjust this to match the final immediate displacement.
bool Found = false;
for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
E = AsmRewrites->end(); I != E; ++I) {
if ((*I).Loc.getPointer() > BracLoc.getPointer())
continue;
if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
assert (!Found && "ImmDisp already rewritten.");
(*I).Kind = AOK_Imm;
(*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
(*I).Val = FinalImmDisp;
Found = true;
break;
}
}
assert (Found && "Unable to rewrite ImmDisp.");
(void)Found;
} else {
// We have a symbolic and an immediate displacement, but no displacement
// before the bracketed expression. Put the immediate displacement
// before the bracketed expression.
AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
}
}
// Remove all the ImmPrefix rewrites within the brackets.
for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
E = AsmRewrites->end(); I != E; ++I) {
if ((*I).Loc.getPointer() < StartInBrac.getPointer())
continue;
if ((*I).Kind == AOK_ImmPrefix)
(*I).Kind = AOK_Delete;
}
const char *SymLocPtr = SymName.data();
// Skip everything before the symbol.
if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
assert(Len > 0 && "Expected a non-negative length.");
AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
}
// Skip everything after the symbol.
if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
assert(Len > 0 && "Expected a non-negative length.");
AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
}
}
bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
const AsmToken &Tok = Parser.getTok();
bool Done = false;
while (!Done) {
bool UpdateLocLex = true;
// The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
// identifier. Don't try an parse it as a register.
if (Tok.getString().startswith("."))
break;
// If we're parsing an immediate expression, we don't expect a '['.
if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
break;
switch (getLexer().getKind()) {
default: {
if (SM.isValidEndState()) {
Done = true;
break;
}
return Error(Tok.getLoc(), "unknown token in expression");
}
case AsmToken::EndOfStatement: {
Done = true;
break;
}
case AsmToken::Identifier: {
// This could be a register or a symbolic displacement.
unsigned TmpReg;
const MCExpr *Val;
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
if(!ParseRegister(TmpReg, IdentLoc, End)) {
SM.onRegister(TmpReg);
UpdateLocLex = false;
break;
} else {
if (!isParsingInlineAsm()) {
if (getParser().parsePrimaryExpr(Val, End))
return Error(Tok.getLoc(), "Unexpected identifier!");
} else {
- InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
- if (ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated=*/false, End))
- return true;
+ // This is a dot operator, not an adjacent identifier.
+ if (Identifier.find('.') != StringRef::npos) {
+ return false;
+ } else {
+ InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return true;
+ }
}
SM.onIdentifierExpr(Val, Identifier);
UpdateLocLex = false;
break;
}
return Error(Tok.getLoc(), "Unexpected identifier!");
}
case AsmToken::Integer:
if (isParsingInlineAsm() && SM.getAddImmPrefix())
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
Tok.getLoc()));
SM.onInteger(Tok.getIntVal());
break;
case AsmToken::Plus: SM.onPlus(); break;
case AsmToken::Minus: SM.onMinus(); break;
case AsmToken::Star: SM.onStar(); break;
case AsmToken::Slash: SM.onDivide(); break;
case AsmToken::LBrac: SM.onLBrac(); break;
case AsmToken::RBrac: SM.onRBrac(); break;
case AsmToken::LParen: SM.onLParen(); break;
case AsmToken::RParen: SM.onRParen(); break;
}
if (SM.hadError())
return Error(Tok.getLoc(), "unknown token in expression");
if (!Done && UpdateLocLex) {
End = Tok.getLoc();
Parser.Lex(); // Consume the token.
}
}
return false;
}
X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
int64_t ImmDisp,
unsigned Size) {
const AsmToken &Tok = Parser.getTok();
SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
if (getLexer().isNot(AsmToken::LBrac))
return ErrorOperand(BracLoc, "Expected '[' token!");
Parser.Lex(); // Eat '['
SMLoc StartInBrac = Tok.getLoc();
// Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
// may have already parsed an immediate displacement before the bracketed
// expression.
IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
if (ParseIntelExpression(SM, End))
return 0;
- const MCExpr *Disp;
+ const MCExpr *Disp = 0;
if (const MCExpr *Sym = SM.getSym()) {
// A symbolic displacement.
Disp = Sym;
if (isParsingInlineAsm())
RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
ImmDisp, SM.getImm(), BracLoc, StartInBrac,
End);
- } else {
- // An immediate displacement only.
- Disp = MCConstantExpr::Create(SM.getImm(), getContext());
}
- // Parse the dot operator (e.g., [ebx].foo.bar).
- if (Tok.getString().startswith(".")) {
+ if (SM.getImm() || !Disp) {
+ const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
+ if (Disp)
+ Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
+ else
+ Disp = Imm; // An immediate displacement only.
+ }
+
+ // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
+ // will in fact do global lookup the field name inside all global typedefs,
+ // but we don't emulate that.
+ if (Tok.getString().find('.') != StringRef::npos) {
const MCExpr *NewDisp;
if (ParseIntelDotOperator(Disp, NewDisp))
return 0;
End = Tok.getEndLoc();
Parser.Lex(); // Eat the field.
Disp = NewDisp;
}
int BaseReg = SM.getBaseReg();
int IndexReg = SM.getIndexReg();
int Scale = SM.getScale();
if (!isParsingInlineAsm()) {
// handle [-42]
if (!BaseReg && !IndexReg) {
if (!SegReg)
return X86Operand::CreateMem(Disp, Start, End, Size);
else
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
}
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
End, Size);
}
InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
End, Size, SM.getSymName(), Info);
}
// Inline assembly may use variable names with namespace alias qualifiers.
bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
StringRef &Identifier,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End) {
assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
Val = 0;
StringRef LineBuf(Identifier.data());
SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
const AsmToken &Tok = Parser.getTok();
// Advance the token stream until the end of the current token is
// after the end of what the frontend claimed.
const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
while (true) {
End = Tok.getEndLoc();
getLexer().Lex();
assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
if (End.getPointer() == EndPtr) break;
}
// Create the symbol reference.
Identifier = LineBuf;
MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
return false;
}
/// \brief Parse intel style segment override.
X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
SMLoc Start,
unsigned Size) {
assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
const AsmToken &Tok = Parser.getTok(); // Eat colon.
if (Tok.isNot(AsmToken::Colon))
return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
Parser.Lex(); // Eat ':'
int64_t ImmDisp = 0;
if (getLexer().is(AsmToken::Integer)) {
ImmDisp = Tok.getIntVal();
AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
if (isParsingInlineAsm())
InstInfo->AsmRewrites->push_back(
AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
if (getLexer().isNot(AsmToken::LBrac)) {
// An immediate following a 'segment register', 'colon' token sequence can
// be followed by a bracketed expression. If it isn't we know we have our
// final segment override.
const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
/*Scale=*/1, Start, ImmDispToken.getEndLoc(),
Size);
}
}
if (getLexer().is(AsmToken::LBrac))
return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
const MCExpr *Val;
SMLoc End;
if (!isParsingInlineAsm()) {
if (getParser().parsePrimaryExpr(Val, End))
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
return X86Operand::CreateMem(Val, Start, End, Size);
}
InlineAsmIdentifierInfo Info;
StringRef Identifier = Tok.getString();
if (ParseIntelIdentifier(Val, Identifier, Info,
/*Unevaluated=*/false, End))
return 0;
return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
/*Scale=*/1, Start, End, Size, Identifier, Info);
}
/// ParseIntelMemOperand - Parse intel style memory operand.
X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
unsigned Size) {
const AsmToken &Tok = Parser.getTok();
SMLoc End;
// Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
if (getLexer().is(AsmToken::LBrac))
return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
const MCExpr *Val;
if (!isParsingInlineAsm()) {
if (getParser().parsePrimaryExpr(Val, End))
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
return X86Operand::CreateMem(Val, Start, End, Size);
}
InlineAsmIdentifierInfo Info;
StringRef Identifier = Tok.getString();
if (ParseIntelIdentifier(Val, Identifier, Info,
/*Unevaluated=*/false, End))
return 0;
return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
/*Scale=*/1, Start, End, Size, Identifier, Info);
}
/// Parse the '.' operator.
bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
const MCExpr *&NewDisp) {
const AsmToken &Tok = Parser.getTok();
int64_t OrigDispVal, DotDispVal;
// FIXME: Handle non-constant expressions.
if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
OrigDispVal = OrigDisp->getValue();
else
return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
- // Drop the '.'.
- StringRef DotDispStr = Tok.getString().drop_front(1);
+ // Drop the optional '.'.
+ StringRef DotDispStr = Tok.getString();
+ if (DotDispStr.startswith("."))
+ DotDispStr = DotDispStr.drop_front(1);
// .Imm gets lexed as a real.
if (Tok.is(AsmToken::Real)) {
APInt DotDisp;
DotDispStr.getAsInteger(10, DotDisp);
DotDispVal = DotDisp.getZExtValue();
} else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
unsigned DotDisp;
std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
DotDisp))
return Error(Tok.getLoc(), "Unable to lookup field reference!");
DotDispVal = DotDisp;
} else
return Error(Tok.getLoc(), "Unexpected token type!");
if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
unsigned Len = DotDispStr.size();
unsigned Val = OrigDispVal + DotDispVal;
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
Val));
}
NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
return false;
}
/// Parse the 'offset' operator. This operator is used to specify the
/// location rather then the content of a variable.
X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
const AsmToken &Tok = Parser.getTok();
SMLoc OffsetOfLoc = Tok.getLoc();
Parser.Lex(); // Eat offset.
const MCExpr *Val;
InlineAsmIdentifierInfo Info;
SMLoc Start = Tok.getLoc(), End;
StringRef Identifier = Tok.getString();
if (ParseIntelIdentifier(Val, Identifier, Info,
/*Unevaluated=*/false, End))
return 0;
// Don't emit the offset operator.
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
// The offset operator will have an 'r' constraint, thus we need to create
// register operand to ensure proper matching. Just pick a GPR based on
// the size of a pointer.
unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
OffsetOfLoc, Identifier, Info.OpDecl);
}
enum IntelOperatorKind {
IOK_LENGTH,
IOK_SIZE,
IOK_TYPE
};
/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
/// returns the number of elements in an array. It returns the value 1 for
/// non-array variables. The SIZE operator returns the size of a C or C++
/// variable. A variable's size is the product of its LENGTH and TYPE. The
/// TYPE operator returns the size of a C or C++ type or variable. If the
/// variable is an array, TYPE returns the size of a single element.
X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
const AsmToken &Tok = Parser.getTok();
SMLoc TypeLoc = Tok.getLoc();
Parser.Lex(); // Eat operator.
const MCExpr *Val = 0;
InlineAsmIdentifierInfo Info;
SMLoc Start = Tok.getLoc(), End;
StringRef Identifier = Tok.getString();
if (ParseIntelIdentifier(Val, Identifier, Info,
/*Unevaluated=*/true, End))
return 0;
if (!Info.OpDecl)
return ErrorOperand(Start, "unable to lookup expression");
unsigned CVal = 0;
switch(OpKind) {
default: llvm_unreachable("Unexpected operand kind!");
case IOK_LENGTH: CVal = Info.Length; break;
case IOK_SIZE: CVal = Info.Size; break;
case IOK_TYPE: CVal = Info.Type; break;
}
// Rewrite the type operator and the C or C++ type or variable in terms of an
// immediate. E.g. TYPE foo -> $$4
unsigned Len = End.getPointer() - TypeLoc.getPointer();
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
return X86Operand::CreateImm(Imm, Start, End);
}
X86Operand *X86AsmParser::ParseIntelOperand() {
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
// Offset, length, type and size operators.
if (isParsingInlineAsm()) {
StringRef AsmTokStr = Tok.getString();
if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
return ParseIntelOffsetOfOperator();
if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
return ParseIntelOperator(IOK_LENGTH);
if (AsmTokStr == "size" || AsmTokStr == "SIZE")
return ParseIntelOperator(IOK_SIZE);
if (AsmTokStr == "type" || AsmTokStr == "TYPE")
return ParseIntelOperator(IOK_TYPE);
}
unsigned Size = getIntelMemOperandSize(Tok.getString());
if (Size) {
Parser.Lex(); // Eat operand size (e.g., byte, word).
if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
Parser.Lex(); // Eat ptr.
}
Start = Tok.getLoc();
// Immediate.
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
getLexer().is(AsmToken::LParen)) {
AsmToken StartTok = Tok;
IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
/*AddImmPrefix=*/false);
if (ParseIntelExpression(SM, End))
return 0;
int64_t Imm = SM.getImm();
if (isParsingInlineAsm()) {
unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
if (StartTok.getString().size() == Len)
// Just add a prefix if this wasn't a complex immediate expression.
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
else
// Otherwise, rewrite the complex expression as a single immediate.
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
}
if (getLexer().isNot(AsmToken::LBrac)) {
const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
return X86Operand::CreateImm(ImmExpr, Start, End);
}
// Only positive immediates are valid.
if (Imm < 0)
return ErrorOperand(Start, "expected a positive immediate displacement "
"before bracketed expr.");
// Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
return ParseIntelMemOperand(Imm, Start, Size);
}
// Register.
unsigned RegNo = 0;
if (!ParseRegister(RegNo, Start, End)) {
// If this is a segment register followed by a ':', then this is the start
// of a segment override, otherwise this is a normal register reference.
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
}
// Memory operand.
return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
}
X86Operand *X86AsmParser::ParseATTOperand() {
switch (getLexer().getKind()) {
default:
// Parse a memory operand with no segment register.
return ParseMemOperand(0, Parser.getTok().getLoc());
case AsmToken::Percent: {
// Read the register.
unsigned RegNo;
SMLoc Start, End;
if (ParseRegister(RegNo, Start, End)) return 0;
if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
Error(Start, "%eiz and %riz can only be used as index registers",
SMRange(Start, End));
return 0;
}
// If this is a segment register followed by a ':', then this is the start
// of a memory reference, otherwise this is a normal register reference.
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
getParser().Lex(); // Eat the colon.
return ParseMemOperand(RegNo, Start);
}
case AsmToken::Dollar: {
// $42 -> immediate.
SMLoc Start = Parser.getTok().getLoc(), End;
Parser.Lex();
const MCExpr *Val;
if (getParser().parseExpression(Val, End))
return 0;
return X86Operand::CreateImm(Val, Start, End);
}
}
}
/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
/// has already been parsed if present.
X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
// only way to do this without lookahead is to eat the '(' and see what is
// after it.
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
if (getLexer().isNot(AsmToken::LParen)) {
SMLoc ExprEnd;
if (getParser().parseExpression(Disp, ExprEnd)) return 0;
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
if (getLexer().isNot(AsmToken::LParen)) {
// Unless we have a segment register, treat this as an immediate.
if (SegReg == 0)
return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
}
// Eat the '('.
Parser.Lex();
} else {
// Okay, we have a '('. We don't know if this is an expression or not, but
// so we have to eat the ( to see beyond it.
SMLoc LParenLoc = Parser.getTok().getLoc();
Parser.Lex(); // Eat the '('.
if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
// Nothing to do here, fall into the code below with the '(' part of the
// memory operand consumed.
} else {
SMLoc ExprEnd;
// It must be an parenthesized expression, parse it now.
if (getParser().parseParenExpression(Disp, ExprEnd))
return 0;
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
if (getLexer().isNot(AsmToken::LParen)) {
// Unless we have a segment register, treat this as an immediate.
if (SegReg == 0)
return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
}
// Eat the '('.
Parser.Lex();
}
}
// If we reached here, then we just ate the ( of the memory operand. Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
SMLoc IndexLoc;
if (getLexer().is(AsmToken::Percent)) {
SMLoc StartLoc, EndLoc;
if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
Error(StartLoc, "eiz and riz can only be used as index registers",
SMRange(StartLoc, EndLoc));
return 0;
}
}
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
IndexLoc = Parser.getTok().getLoc();
// Following the comma we should have either an index register, or a scale
// value. We don't support the later form, but we want to parse it
// correctly.
//
// Not that even though it would be completely consistent to support syntax
// like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
if (getLexer().is(AsmToken::Percent)) {
SMLoc L;
if (ParseRegister(IndexReg, L, L)) return 0;
if (getLexer().isNot(AsmToken::RParen)) {
// Parse the scale amount:
// ::= ',' [scale-expression]
if (getLexer().isNot(AsmToken::Comma)) {
Error(Parser.getTok().getLoc(),
"expected comma in scale expression");
return 0;
}
Parser.Lex(); // Eat the comma.
if (getLexer().isNot(AsmToken::RParen)) {
SMLoc Loc = Parser.getTok().getLoc();
int64_t ScaleVal;
if (getParser().parseAbsoluteExpression(ScaleVal)){
Error(Loc, "expected scale expression");
return 0;
}
// Validate the scale amount.
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
return 0;
}
Scale = (unsigned)ScaleVal;
}
}
} else if (getLexer().isNot(AsmToken::RParen)) {
// A scale amount without an index is ignored.
// index.
SMLoc Loc = Parser.getTok().getLoc();
int64_t Value;
if (getParser().parseAbsoluteExpression(Value))
return 0;
if (Value != 1)
Warning(Loc, "scale factor without index register is ignored");
Scale = 1;
}
}
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
if (getLexer().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
return 0;
}
SMLoc MemEnd = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ')'.
// If we have both a base register and an index register make sure they are
// both 64-bit or 32-bit registers.
// To support VSIB, IndexReg can be 128-bit or 256-bit registers.
if (BaseReg != 0 && IndexReg != 0) {
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
(X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
IndexReg != X86::RIZ) {
Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
return 0;
}
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
(X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
IndexReg != X86::EIZ){
Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
return 0;
}
}
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
MemStart, MemEnd);
}
bool X86AsmParser::
ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
InstInfo = &Info;
StringRef PatchedName = Name;
// FIXME: Hack to recognize setneb as setne.
if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
PatchedName != "setb" && PatchedName != "setnb")
PatchedName = PatchedName.substr(0, Name.size()-1);
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
const MCExpr *ExtraImmOp = 0;
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
bool IsVCMP = PatchedName[0] == 'v';
unsigned SSECCIdx = IsVCMP ? 4 : 3;
unsigned SSEComparisonCode = StringSwitch<unsigned>(
PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
.Case("eq", 0x00)
.Case("lt", 0x01)
.Case("le", 0x02)
.Case("unord", 0x03)
.Case("neq", 0x04)
.Case("nlt", 0x05)
.Case("nle", 0x06)
.Case("ord", 0x07)
/* AVX only from here */
.Case("eq_uq", 0x08)
.Case("nge", 0x09)
.Case("ngt", 0x0A)
.Case("false", 0x0B)
.Case("neq_oq", 0x0C)
.Case("ge", 0x0D)
.Case("gt", 0x0E)
.Case("true", 0x0F)
.Case("eq_os", 0x10)
.Case("lt_oq", 0x11)
.Case("le_oq", 0x12)
.Case("unord_s", 0x13)
.Case("neq_us", 0x14)
.Case("nlt_uq", 0x15)
.Case("nle_uq", 0x16)
.Case("ord_s", 0x17)
.Case("eq_us", 0x18)
.Case("nge_uq", 0x19)
.Case("ngt_uq", 0x1A)
.Case("false_os", 0x1B)
.Case("neq_os", 0x1C)
.Case("ge_oq", 0x1D)
.Case("gt_oq", 0x1E)
.Case("true_us", 0x1F)
.Default(~0U);
if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
getParser().getContext());
if (PatchedName.endswith("ss")) {
PatchedName = IsVCMP ? "vcmpss" : "cmpss";
} else if (PatchedName.endswith("sd")) {
PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
} else if (PatchedName.endswith("ps")) {
PatchedName = IsVCMP ? "vcmpps" : "cmpps";
} else {
assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
PatchedName = IsVCMP ? "vcmppd" : "cmppd";
}
}
}
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
if (ExtraImmOp && !isParsingIntelSyntax())
Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
// Determine whether this is an instruction prefix.
bool isPrefix =
Name == "lock" || Name == "rep" ||
Name == "repe" || Name == "repz" ||
Name == "repne" || Name == "repnz" ||
Name == "rex64" || Name == "data16";
// This does the actual operand parsing. Don't parse any more if we have a
// prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
// just want to parse the "lock" as the first instruction and the "incl" as
// the next one.
if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
// Parse '*' modifier.
if (getLexer().is(AsmToken::Star)) {
SMLoc Loc = Parser.getTok().getLoc();
Operands.push_back(X86Operand::CreateToken("*", Loc));
Parser.Lex(); // Eat the star.
}
// Read the first operand.
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
else {
Parser.eatToEndOfStatement();
return true;
}
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
else {
Parser.eatToEndOfStatement();
return true;
}
}
if (STI.getFeatureBits() & X86::FeatureAVX512) {
// Parse mask register {%k1}
if (getLexer().is(AsmToken::LCurly)) {
SMLoc Loc = Parser.getTok().getLoc();
Operands.push_back(X86Operand::CreateToken("{", Loc));
Parser.Lex(); // Eat the {
if (X86Operand *Op = ParseOperand()) {
Operands.push_back(Op);
if (!getLexer().is(AsmToken::RCurly)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "Expected } at this point");
}
Loc = Parser.getTok().getLoc();
Operands.push_back(X86Operand::CreateToken("}", Loc));
Parser.Lex(); // Eat the }
} else {
Parser.eatToEndOfStatement();
return true;
}
}
// Parse "zeroing non-masked" semantic {z}
if (getLexer().is(AsmToken::LCurly)) {
SMLoc Loc = Parser.getTok().getLoc();
Operands.push_back(X86Operand::CreateToken("{z}", Loc));
Parser.Lex(); // Eat the {
if (!getLexer().is(AsmToken::Identifier) || getLexer().getTok().getIdentifier() != "z") {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "Expected z at this point");
}
Parser.Lex(); // Eat the z
if (!getLexer().is(AsmToken::RCurly)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "Expected } at this point");
}
Parser.Lex(); // Eat the }
}
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
}
if (getLexer().is(AsmToken::EndOfStatement))
Parser.Lex(); // Consume the EndOfStatement
else if (isPrefix && getLexer().is(AsmToken::Slash))
Parser.Lex(); // Consume the prefix separator Slash
if (ExtraImmOp && isParsingIntelSyntax())
Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
// This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
// "outb %al, %dx". Out doesn't take a memory form, but this is a widely
// documented form in various unofficial manuals, so a lot of code uses it.
if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
Operands.size() == 3) {
X86Operand &Op = *(X86Operand*)Operands.back();
if (Op.isMem() && Op.Mem.SegReg == 0 &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
SMLoc Loc = Op.getEndLoc();
Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
delete &Op;
}
}
// Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
Operands.size() == 3) {
X86Operand &Op = *(X86Operand*)Operands.begin()[1];
if (Op.isMem() && Op.Mem.SegReg == 0 &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
SMLoc Loc = Op.getEndLoc();
Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
delete &Op;
}
}
// Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
if (Name.startswith("ins") && Operands.size() == 3 &&
(Name == "insb" || Name == "insw" || Name == "insl")) {
X86Operand &Op = *(X86Operand*)Operands.begin()[1];
X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
Operands.pop_back();
Operands.pop_back();
delete &Op;
delete &Op2;
}
}
// Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
if (Name.startswith("outs") && Operands.size() == 3 &&
(Name == "outsb" || Name == "outsw" || Name == "outsl")) {
X86Operand &Op = *(X86Operand*)Operands.begin()[1];
X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
Operands.pop_back();
Operands.pop_back();
delete &Op;
delete &Op2;
}
}
// Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
if (Name.startswith("movs") && Operands.size() == 3 &&
(Name == "movsb" || Name == "movsw" || Name == "movsl" ||
(is64BitMode() && Name == "movsq"))) {
X86Operand &Op = *(X86Operand*)Operands.begin()[1];
X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
if (isSrcOp(Op) && isDstOp(Op2)) {
Operands.pop_back();
Operands.pop_back();
delete &Op;
delete &Op2;
}
}
// Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
if (Name.startswith("lods") && Operands.size() == 3 &&
(Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
if (isSrcOp(*Op1) && Op2->isReg()) {
const char *ins;
unsigned reg = Op2->getReg();
bool isLods = Name == "lods";
if (reg == X86::AL && (isLods || Name == "lodsb"))
ins = "lodsb";
else if (reg == X86::AX && (isLods || Name == "lodsw"))
ins = "lodsw";
else if (reg == X86::EAX && (isLods || Name == "lodsl"))
ins = "lodsl";
else if (reg == X86::RAX && (isLods || Name == "lodsq"))
ins = "lodsq";
else
ins = NULL;
if (ins != NULL) {
Operands.pop_back();
Operands.pop_back();
delete Op1;
delete Op2;
if (Name != ins)
static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
}
}
}
// Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
if (Name.startswith("stos") && Operands.size() == 3 &&
(Name == "stos" || Name == "stosb" || Name == "stosw" ||
Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
if (isDstOp(*Op2) && Op1->isReg()) {
const char *ins;
unsigned reg = Op1->getReg();
bool isStos = Name == "stos";
if (reg == X86::AL && (isStos || Name == "stosb"))
ins = "stosb";
else if (reg == X86::AX && (isStos || Name == "stosw"))
ins = "stosw";
else if (reg == X86::EAX && (isStos || Name == "stosl"))
ins = "stosl";
else if (reg == X86::RAX && (isStos || Name == "stosq"))
ins = "stosq";
else
ins = NULL;
if (ins != NULL) {
Operands.pop_back();
Operands.pop_back();
delete Op1;
delete Op2;
if (Name != ins)
static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
}
}
}
// FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
// "shift <op>".
if ((Name.startswith("shr") || Name.startswith("sar") ||
Name.startswith("shl") || Name.startswith("sal") ||
Name.startswith("rcl") || Name.startswith("rcr") ||
Name.startswith("rol") || Name.startswith("ror")) &&
Operands.size() == 3) {
if (isParsingIntelSyntax()) {
// Intel syntax
X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
delete Operands[2];
Operands.pop_back();
}
} else {
X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
delete Operands[1];
Operands.erase(Operands.begin() + 1);
}
}
}
// Transforms "int $3" into "int3" as a size optimization. We can't write an
// instalias with an immediate operand yet.
if (Name == "int" && Operands.size() == 2) {
X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
delete Operands[1];
Operands.erase(Operands.begin() + 1);
static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
}
}
return false;
}
static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
bool isCmp) {
MCInst TmpInst;
TmpInst.setOpcode(Opcode);
if (!isCmp)
TmpInst.addOperand(MCOperand::CreateReg(Reg));
TmpInst.addOperand(MCOperand::CreateReg(Reg));
TmpInst.addOperand(Inst.getOperand(0));
Inst = TmpInst;
return true;
}
static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
bool isCmp = false) {
if (!Inst.getOperand(0).isImm() ||
!isImmSExti16i8Value(Inst.getOperand(0).getImm()))
return false;
return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
}
static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
bool isCmp = false) {
if (!Inst.getOperand(0).isImm() ||
!isImmSExti32i8Value(Inst.getOperand(0).getImm()))
return false;
return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
}
static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
bool isCmp = false) {
if (!Inst.getOperand(0).isImm() ||
!isImmSExti64i8Value(Inst.getOperand(0).getImm()))
return false;
return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
}
bool X86AsmParser::
processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
switch (Inst.getOpcode()) {
default: return false;
case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
case X86::VMOVAPDrr:
case X86::VMOVAPDYrr:
case X86::VMOVAPSrr:
case X86::VMOVAPSYrr:
case X86::VMOVDQArr:
case X86::VMOVDQAYrr:
case X86::VMOVDQUrr:
case X86::VMOVDQUYrr:
case X86::VMOVUPDrr:
case X86::VMOVUPDYrr:
case X86::VMOVUPSrr:
case X86::VMOVUPSYrr: {
if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
!X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
return false;
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Invalid opcode");
case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
}
Inst.setOpcode(NewOpc);
return true;
}
case X86::VMOVSDrr:
case X86::VMOVSSrr: {
if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
!X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
return false;
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Invalid opcode");
case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
}
Inst.setOpcode(NewOpc);
return true;
}
}
}
static const char *getSubtargetFeatureName(unsigned Val);
bool X86AsmParser::
MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
assert(Op->isToken() && "Leading operand should always be a mnemonic!");
ArrayRef<SMRange> EmptyRanges = None;
// First, handle aliases that expand to multiple instructions.
// FIXME: This should be replaced with a real .td file alias mechanism.
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
// call.
if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
Op->getToken() == "finit" || Op->getToken() == "fsave" ||
Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
Out.EmitInstruction(Inst);
const char *Repl =
StringSwitch<const char*>(Op->getToken())
.Case("finit", "fninit")
.Case("fsave", "fnsave")
.Case("fstcw", "fnstcw")
.Case("fstcww", "fnstcw")
.Case("fstenv", "fnstenv")
.Case("fstsw", "fnstsw")
.Case("fstsww", "fnstsw")
.Case("fclex", "fnclex")
.Default(0);
assert(Repl && "Unknown wait-prefixed instruction");
delete Operands[0];
Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
}
bool WasOriginallyInvalidOperand = false;
MCInst Inst;
// First, try a direct match.
switch (MatchInstructionImpl(Operands, Inst,
ErrorInfo, MatchingInlineAsm,
isParsingIntelSyntax())) {
default: break;
case Match_Success:
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
// individual transformations can chain off each other.
if (!MatchingInlineAsm)
while (processInstruction(Inst, Operands))
;
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
Out.EmitInstruction(Inst);
Opcode = Inst.getOpcode();
return false;
case Match_MissingFeature: {
assert(ErrorInfo && "Unknown missing feature!");
// Special case the error message for the very common case where only
// a single subtarget feature is missing.
std::string Msg = "instruction requires:";
unsigned Mask = 1;
for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
if (ErrorInfo & Mask) {
Msg += " ";
Msg += getSubtargetFeatureName(ErrorInfo & Mask);
}
Mask <<= 1;
}
return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
}
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
case Match_MnemonicFail:
break;
}
// FIXME: Ideally, we would only attempt suffix matches for things which are
// valid prefixes, and we could just infer the right unambiguous
// type. However, that requires substantially more matcher support than the
// following hack.
// Change the operand to point to a temporary token.
StringRef Base = Op->getToken();
SmallString<16> Tmp;
Tmp += Base;
Tmp += ' ';
Op->setTokenValue(Tmp.str());
// If this instruction starts with an 'f', then it is a floating point stack
// instruction. These come in up to three forms for 32-bit, 64-bit, and
// 80-bit floating point, which use the suffixes s,l,t respectively.
//
// Otherwise, we assume that this may be an integer instruction, which comes
// in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
// Check for the various suffix matches.
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
unsigned Match1, Match2, Match3, Match4;
Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
MatchingInlineAsm, isParsingIntelSyntax());
// If this returned as a missing feature failure, remember that.
if (Match1 == Match_MissingFeature)
ErrorInfoMissingFeature = ErrorInfoIgnore;
Tmp[Base.size()] = Suffixes[1];
Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
MatchingInlineAsm, isParsingIntelSyntax());
// If this returned as a missing feature failure, remember that.
if (Match2 == Match_MissingFeature)
ErrorInfoMissingFeature = ErrorInfoIgnore;
Tmp[Base.size()] = Suffixes[2];
Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
MatchingInlineAsm, isParsingIntelSyntax());
// If this returned as a missing feature failure, remember that.
if (Match3 == Match_MissingFeature)
ErrorInfoMissingFeature = ErrorInfoIgnore;
Tmp[Base.size()] = Suffixes[3];
Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
MatchingInlineAsm, isParsingIntelSyntax());
// If this returned as a missing feature failure, remember that.
if (Match4 == Match_MissingFeature)
ErrorInfoMissingFeature = ErrorInfoIgnore;
// Restore the old token.
Op->setTokenValue(Base);
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
unsigned NumSuccessfulMatches =
(Match1 == Match_Success) + (Match2 == Match_Success) +
(Match3 == Match_Success) + (Match4 == Match_Success);
if (NumSuccessfulMatches == 1) {
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
Out.EmitInstruction(Inst);
Opcode = Inst.getOpcode();
return false;
}
// Otherwise, the match failed, try to produce a decent error message.
// If we had multiple suffix matches, then identify this as an ambiguous
// match.
if (NumSuccessfulMatches > 1) {
char MatchChars[4];
unsigned NumMatches = 0;
if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
SmallString<126> Msg;
raw_svector_ostream OS(Msg);
OS << "ambiguous instructions require an explicit suffix (could be ";
for (unsigned i = 0; i != NumMatches; ++i) {
if (i != 0)
OS << ", ";
if (i + 1 == NumMatches)
OS << "or ";
OS << "'" << Base << MatchChars[i] << "'";
}
OS << ")";
Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
return true;
}
// Okay, we know that none of the variants matched successfully.
// If all of the instructions reported an invalid mnemonic, then the original
// mnemonic was invalid.
if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
(Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
if (!WasOriginallyInvalidOperand) {
ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
Op->getLocRange();
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
Ranges, MatchingInlineAsm);
}
// Recover location info for the operand if we know which was the problem.
if (ErrorInfo != ~0U) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction",
EmptyRanges, MatchingInlineAsm);
X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
if (Operand->getStartLoc().isValid()) {
SMRange OperandRange = Operand->getLocRange();
return Error(Operand->getStartLoc(), "invalid operand for instruction",
OperandRange, MatchingInlineAsm);
}
}
return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
MatchingInlineAsm);
}
// If one instruction matched with a missing feature, report this as a
// missing feature.
if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
(Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
std::string Msg = "instruction requires:";
unsigned Mask = 1;
for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
if (ErrorInfoMissingFeature & Mask) {
Msg += " ";
Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
}
Mask <<= 1;
}
return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
}
// If one instruction matched with an invalid operand, report this as an
// operand failure.
if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
(Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
Error(IDLoc, "invalid operand for instruction", EmptyRanges,
MatchingInlineAsm);
return true;
}
// If all of these were an outright failure, report it in a useless way.
Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
EmptyRanges, MatchingInlineAsm);
return true;
}
bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
return ParseDirectiveWord(2, DirectiveID.getLoc());
else if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
else if (IDVal.startswith(".att_syntax")) {
getParser().setAssemblerDialect(0);
return false;
} else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if(Parser.getTok().getString() == "noprefix") {
// FIXME : Handle noprefix
Parser.Lex();
} else
return true;
}
return false;
}
return true;
}
/// ParseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
if (getParser().parseExpression(Value))
return true;
getParser().getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return Error(L, "unexpected token in directive");
Parser.Lex();
}
}
Parser.Lex();
return false;
}
/// ParseDirectiveCode
/// ::= .code32 | .code64
bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
if (IDVal == ".code32") {
Parser.Lex();
if (is64BitMode()) {
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
}
} else if (IDVal == ".code64") {
Parser.Lex();
if (!is64BitMode()) {
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
}
} else {
return Error(L, "unexpected directive " + IDVal);
}
return false;
}
// Force static initialization.
extern "C" void LLVMInitializeX86AsmParser() {
RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
}
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#define GET_SUBTARGET_FEATURE_NAME
#include "X86GenAsmMatcher.inc"
Index: head/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
===================================================================
--- head/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c (revision 265924)
+++ head/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c (revision 265925)
@@ -1,1749 +1,1748 @@
/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
*
* The LLVM Compiler Infrastructure
*
* This file is distributed under the University of Illinois Open Source
* License. See LICENSE.TXT for details.
*
*===----------------------------------------------------------------------===*
*
* This file is part of the X86 Disassembler.
* It contains the implementation of the instruction decoder.
* Documentation for the disassembler can be found in X86Disassembler.h.
*
*===----------------------------------------------------------------------===*/
#include <stdarg.h> /* for va_*() */
#include <stdio.h> /* for vsnprintf() */
#include <stdlib.h> /* for exit() */
#include <string.h> /* for memset() */
#include "X86DisassemblerDecoder.h"
#include "X86GenDisassemblerTables.inc"
#define TRUE 1
#define FALSE 0
#ifndef NDEBUG
#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
#else
#define debug(s) do { } while (0)
#endif
/*
* contextForAttrs - Client for the instruction context table. Takes a set of
* attributes and returns the appropriate decode context.
*
* @param attrMask - Attributes, from the enumeration attributeBits.
* @return - The InstructionContext to use when looking up an
* an instruction with these attributes.
*/
static InstructionContext contextForAttrs(uint8_t attrMask) {
return CONTEXTS_SYM[attrMask];
}
/*
* modRMRequired - Reads the appropriate instruction table to determine whether
* the ModR/M byte is required to decode a particular instruction.
*
* @param type - The opcode type (i.e., how many bytes it has).
* @param insnContext - The context for the instruction, as returned by
* contextForAttrs.
* @param opcode - The last byte of the instruction's opcode, not counting
* ModR/M extensions and escapes.
* @return - TRUE if the ModR/M byte is required, FALSE otherwise.
*/
static int modRMRequired(OpcodeType type,
InstructionContext insnContext,
uint8_t opcode) {
const struct ContextDecision* decision = 0;
switch (type) {
case ONEBYTE:
decision = &ONEBYTE_SYM;
break;
case TWOBYTE:
decision = &TWOBYTE_SYM;
break;
case THREEBYTE_38:
decision = &THREEBYTE38_SYM;
break;
case THREEBYTE_3A:
decision = &THREEBYTE3A_SYM;
break;
case THREEBYTE_A6:
decision = &THREEBYTEA6_SYM;
break;
case THREEBYTE_A7:
decision = &THREEBYTEA7_SYM;
break;
case XOP8_MAP:
decision = &XOP8_MAP_SYM;
break;
case XOP9_MAP:
decision = &XOP9_MAP_SYM;
break;
case XOPA_MAP:
decision = &XOPA_MAP_SYM;
break;
}
return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
modrm_type != MODRM_ONEENTRY;
}
/*
* decode - Reads the appropriate instruction table to obtain the unique ID of
* an instruction.
*
* @param type - See modRMRequired().
* @param insnContext - See modRMRequired().
* @param opcode - See modRMRequired().
* @param modRM - The ModR/M byte if required, or any value if not.
* @return - The UID of the instruction, or 0 on failure.
*/
static InstrUID decode(OpcodeType type,
InstructionContext insnContext,
uint8_t opcode,
uint8_t modRM) {
const struct ModRMDecision* dec = 0;
switch (type) {
case ONEBYTE:
dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case TWOBYTE:
dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case THREEBYTE_38:
dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case THREEBYTE_3A:
dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case THREEBYTE_A6:
dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case THREEBYTE_A7:
dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case XOP8_MAP:
dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case XOP9_MAP:
dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case XOPA_MAP:
dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
}
switch (dec->modrm_type) {
default:
debug("Corrupt table! Unknown modrm_type");
return 0;
case MODRM_ONEENTRY:
return modRMTable[dec->instructionIDs];
case MODRM_SPLITRM:
if (modFromModRM(modRM) == 0x3)
return modRMTable[dec->instructionIDs+1];
return modRMTable[dec->instructionIDs];
case MODRM_SPLITREG:
if (modFromModRM(modRM) == 0x3)
return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
case MODRM_SPLITMISC:
if (modFromModRM(modRM) == 0x3)
return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
case MODRM_FULL:
return modRMTable[dec->instructionIDs+modRM];
}
}
/*
* specifierForUID - Given a UID, returns the name and operand specification for
* that instruction.
*
* @param uid - The unique ID for the instruction. This should be returned by
* decode(); specifierForUID will not check bounds.
* @return - A pointer to the specification for that instruction.
*/
static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
return &INSTRUCTIONS_SYM[uid];
}
/*
* consumeByte - Uses the reader function provided by the user to consume one
* byte from the instruction's memory and advance the cursor.
*
* @param insn - The instruction with the reader function to use. The cursor
* for this instruction is advanced.
* @param byte - A pointer to a pre-allocated memory buffer to be populated
* with the data read.
* @return - 0 if the read was successful; nonzero otherwise.
*/
static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
if (!ret)
++(insn->readerCursor);
return ret;
}
/*
* lookAtByte - Like consumeByte, but does not advance the cursor.
*
* @param insn - See consumeByte().
* @param byte - See consumeByte().
* @return - See consumeByte().
*/
static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
return insn->reader(insn->readerArg, byte, insn->readerCursor);
}
static void unconsumeByte(struct InternalInstruction* insn) {
insn->readerCursor--;
}
#define CONSUME_FUNC(name, type) \
static int name(struct InternalInstruction* insn, type* ptr) { \
type combined = 0; \
unsigned offset; \
for (offset = 0; offset < sizeof(type); ++offset) { \
uint8_t byte; \
int ret = insn->reader(insn->readerArg, \
&byte, \
insn->readerCursor + offset); \
if (ret) \
return ret; \
combined = combined | ((uint64_t)byte << (offset * 8)); \
} \
*ptr = combined; \
insn->readerCursor += sizeof(type); \
return 0; \
}
/*
* consume* - Use the reader function provided by the user to consume data
* values of various sizes from the instruction's memory and advance the
* cursor appropriately. These readers perform endian conversion.
*
* @param insn - See consumeByte().
* @param ptr - A pointer to a pre-allocated memory of appropriate size to
* be populated with the data read.
* @return - See consumeByte().
*/
CONSUME_FUNC(consumeInt8, int8_t)
CONSUME_FUNC(consumeInt16, int16_t)
CONSUME_FUNC(consumeInt32, int32_t)
CONSUME_FUNC(consumeUInt16, uint16_t)
CONSUME_FUNC(consumeUInt32, uint32_t)
CONSUME_FUNC(consumeUInt64, uint64_t)
/*
* dbgprintf - Uses the logging function provided by the user to log a single
* message, typically without a carriage-return.
*
* @param insn - The instruction containing the logging function.
* @param format - See printf().
* @param ... - See printf().
*/
static void dbgprintf(struct InternalInstruction* insn,
const char* format,
...) {
char buffer[256];
va_list ap;
if (!insn->dlog)
return;
va_start(ap, format);
(void)vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
insn->dlog(insn->dlogArg, buffer);
return;
}
/*
* setPrefixPresent - Marks that a particular prefix is present at a particular
* location.
*
* @param insn - The instruction to be marked as having the prefix.
* @param prefix - The prefix that is present.
* @param location - The location where the prefix is located (in the address
* space of the instruction's reader).
*/
static void setPrefixPresent(struct InternalInstruction* insn,
uint8_t prefix,
uint64_t location)
{
insn->prefixPresent[prefix] = 1;
insn->prefixLocations[prefix] = location;
}
/*
* isPrefixAtLocation - Queries an instruction to determine whether a prefix is
* present at a given location.
*
* @param insn - The instruction to be queried.
* @param prefix - The prefix.
* @param location - The location to query.
* @return - Whether the prefix is at that location.
*/
static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
uint8_t prefix,
uint64_t location)
{
if (insn->prefixPresent[prefix] == 1 &&
insn->prefixLocations[prefix] == location)
return TRUE;
else
return FALSE;
}
/*
* readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
* instruction as having them. Also sets the instruction's default operand,
* address, and other relevant data sizes to report operands correctly.
*
* @param insn - The instruction whose prefixes are to be read.
* @return - 0 if the instruction could be read until the end of the prefix
* bytes, and no prefixes conflicted; nonzero otherwise.
*/
static int readPrefixes(struct InternalInstruction* insn) {
BOOL isPrefix = TRUE;
BOOL prefixGroups[4] = { FALSE };
uint64_t prefixLocation;
uint8_t byte = 0;
uint8_t nextByte;
BOOL hasAdSize = FALSE;
BOOL hasOpSize = FALSE;
dbgprintf(insn, "readPrefixes()");
while (isPrefix) {
prefixLocation = insn->readerCursor;
/* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
if (consumeByte(insn, &byte))
break;
/*
* If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
* break and let it be disassembled as a normal "instruction".
*/
if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
break;
if (insn->readerCursor - 1 == insn->startLocation
&& (byte == 0xf2 || byte == 0xf3)
&& !lookAtByte(insn, &nextByte))
{
/*
* If the byte is 0xf2 or 0xf3, and any of the following conditions are
* met:
* - it is followed by a LOCK (0xf0) prefix
* - it is followed by an xchg instruction
* then it should be disassembled as a xacquire/xrelease not repne/rep.
*/
if ((byte == 0xf2 || byte == 0xf3) &&
((nextByte == 0xf0) |
((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
insn->xAcquireRelease = TRUE;
/*
* Also if the byte is 0xf3, and the following condition is met:
* - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
* "mov mem, imm" (opcode 0xc6/0xc7) instructions.
* then it should be disassembled as an xrelease not rep.
*/
if (byte == 0xf3 &&
(nextByte == 0x88 || nextByte == 0x89 ||
nextByte == 0xc6 || nextByte == 0xc7))
insn->xAcquireRelease = TRUE;
if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
if (consumeByte(insn, &nextByte))
return -1;
if (lookAtByte(insn, &nextByte))
return -1;
unconsumeByte(insn);
}
if (nextByte != 0x0f && nextByte != 0x90)
break;
}
switch (byte) {
case 0xf0: /* LOCK */
case 0xf2: /* REPNE/REPNZ */
case 0xf3: /* REP or REPE/REPZ */
if (prefixGroups[0])
dbgprintf(insn, "Redundant Group 1 prefix");
prefixGroups[0] = TRUE;
setPrefixPresent(insn, byte, prefixLocation);
break;
case 0x2e: /* CS segment override -OR- Branch not taken */
case 0x36: /* SS segment override -OR- Branch taken */
case 0x3e: /* DS segment override */
case 0x26: /* ES segment override */
case 0x64: /* FS segment override */
case 0x65: /* GS segment override */
switch (byte) {
case 0x2e:
insn->segmentOverride = SEG_OVERRIDE_CS;
break;
case 0x36:
insn->segmentOverride = SEG_OVERRIDE_SS;
break;
case 0x3e:
insn->segmentOverride = SEG_OVERRIDE_DS;
break;
case 0x26:
insn->segmentOverride = SEG_OVERRIDE_ES;
break;
case 0x64:
insn->segmentOverride = SEG_OVERRIDE_FS;
break;
case 0x65:
insn->segmentOverride = SEG_OVERRIDE_GS;
break;
default:
debug("Unhandled override");
return -1;
}
if (prefixGroups[1])
dbgprintf(insn, "Redundant Group 2 prefix");
prefixGroups[1] = TRUE;
setPrefixPresent(insn, byte, prefixLocation);
break;
case 0x66: /* Operand-size override */
if (prefixGroups[2])
dbgprintf(insn, "Redundant Group 3 prefix");
prefixGroups[2] = TRUE;
hasOpSize = TRUE;
setPrefixPresent(insn, byte, prefixLocation);
break;
case 0x67: /* Address-size override */
if (prefixGroups[3])
dbgprintf(insn, "Redundant Group 4 prefix");
prefixGroups[3] = TRUE;
hasAdSize = TRUE;
setPrefixPresent(insn, byte, prefixLocation);
break;
default: /* Not a prefix byte */
isPrefix = FALSE;
break;
}
if (isPrefix)
dbgprintf(insn, "Found prefix 0x%hhx", byte);
}
insn->vexXopType = TYPE_NO_VEX_XOP;
if (byte == 0xc4) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
dbgprintf(insn, "Couldn't read second byte of VEX");
return -1;
}
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vexXopType = TYPE_VEX_3B;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
if (insn->vexXopType == TYPE_VEX_3B) {
insn->vexXopPrefix[0] = byte;
consumeByte(insn, &insn->vexXopPrefix[1]);
consumeByte(insn, &insn->vexXopPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
if (insn->mode == MODE_64BIT) {
insn->rexPrefix = 0x40
| (wFromVEX3of3(insn->vexXopPrefix[2]) << 3)
| (rFromVEX2of3(insn->vexXopPrefix[1]) << 2)
| (xFromVEX2of3(insn->vexXopPrefix[1]) << 1)
| (bFromVEX2of3(insn->vexXopPrefix[1]) << 0);
}
switch (ppFromVEX3of3(insn->vexXopPrefix[2]))
{
default:
break;
case VEX_PREFIX_66:
hasOpSize = TRUE;
break;
}
dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
insn->vexXopPrefix[0], insn->vexXopPrefix[1],
insn->vexXopPrefix[2]);
}
}
else if (byte == 0xc5) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
dbgprintf(insn, "Couldn't read second byte of VEX");
return -1;
}
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vexXopType = TYPE_VEX_2B;
}
else {
unconsumeByte(insn);
}
if (insn->vexXopType == TYPE_VEX_2B) {
insn->vexXopPrefix[0] = byte;
consumeByte(insn, &insn->vexXopPrefix[1]);
if (insn->mode == MODE_64BIT) {
insn->rexPrefix = 0x40
| (rFromVEX2of2(insn->vexXopPrefix[1]) << 2);
}
switch (ppFromVEX2of2(insn->vexXopPrefix[1]))
{
default:
break;
case VEX_PREFIX_66:
hasOpSize = TRUE;
break;
}
dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexXopPrefix[0], insn->vexXopPrefix[1]);
}
}
else if (byte == 0x8f) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
dbgprintf(insn, "Couldn't read second byte of XOP");
return -1;
}
if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
insn->vexXopType = TYPE_XOP;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
if (insn->vexXopType == TYPE_XOP) {
insn->vexXopPrefix[0] = byte;
consumeByte(insn, &insn->vexXopPrefix[1]);
consumeByte(insn, &insn->vexXopPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
if (insn->mode == MODE_64BIT) {
insn->rexPrefix = 0x40
| (wFromXOP3of3(insn->vexXopPrefix[2]) << 3)
| (rFromXOP2of3(insn->vexXopPrefix[1]) << 2)
| (xFromXOP2of3(insn->vexXopPrefix[1]) << 1)
| (bFromXOP2of3(insn->vexXopPrefix[1]) << 0);
}
switch (ppFromXOP3of3(insn->vexXopPrefix[2]))
{
default:
break;
case VEX_PREFIX_66:
hasOpSize = TRUE;
break;
}
dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
insn->vexXopPrefix[0], insn->vexXopPrefix[1],
insn->vexXopPrefix[2]);
}
}
else {
if (insn->mode == MODE_64BIT) {
if ((byte & 0xf0) == 0x40) {
uint8_t opcodeByte;
if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
dbgprintf(insn, "Redundant REX prefix");
return -1;
}
insn->rexPrefix = byte;
insn->necessaryPrefixLocation = insn->readerCursor - 2;
dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
} else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
} else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
}
if (insn->mode == MODE_16BIT) {
insn->registerSize = (hasOpSize ? 4 : 2);
insn->addressSize = (hasAdSize ? 4 : 2);
insn->displacementSize = (hasAdSize ? 4 : 2);
insn->immediateSize = (hasOpSize ? 4 : 2);
} else if (insn->mode == MODE_32BIT) {
insn->registerSize = (hasOpSize ? 2 : 4);
insn->addressSize = (hasAdSize ? 2 : 4);
insn->displacementSize = (hasAdSize ? 2 : 4);
insn->immediateSize = (hasOpSize ? 2 : 4);
} else if (insn->mode == MODE_64BIT) {
if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
insn->registerSize = 8;
insn->addressSize = (hasAdSize ? 4 : 8);
insn->displacementSize = 4;
insn->immediateSize = 4;
} else if (insn->rexPrefix) {
insn->registerSize = (hasOpSize ? 2 : 4);
insn->addressSize = (hasAdSize ? 4 : 8);
insn->displacementSize = (hasOpSize ? 2 : 4);
insn->immediateSize = (hasOpSize ? 2 : 4);
} else {
insn->registerSize = (hasOpSize ? 2 : 4);
insn->addressSize = (hasAdSize ? 4 : 8);
insn->displacementSize = (hasOpSize ? 2 : 4);
insn->immediateSize = (hasOpSize ? 2 : 4);
}
}
return 0;
}
/*
* readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
* extended or escape opcodes).
*
* @param insn - The instruction whose opcode is to be read.
* @return - 0 if the opcode could be read successfully; nonzero otherwise.
*/
static int readOpcode(struct InternalInstruction* insn) {
/* Determine the length of the primary opcode */
uint8_t current;
dbgprintf(insn, "readOpcode()");
insn->opcodeType = ONEBYTE;
if (insn->vexXopType == TYPE_VEX_3B)
{
switch (mmmmmFromVEX2of3(insn->vexXopPrefix[1]))
{
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
mmmmmFromVEX2of3(insn->vexXopPrefix[1]));
return -1;
case VEX_LOB_0F:
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
case VEX_LOB_0F38:
insn->opcodeType = THREEBYTE_38;
return consumeByte(insn, &insn->opcode);
case VEX_LOB_0F3A:
insn->opcodeType = THREEBYTE_3A;
return consumeByte(insn, &insn->opcode);
}
}
else if (insn->vexXopType == TYPE_VEX_2B)
{
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
}
else if (insn->vexXopType == TYPE_XOP)
{
switch (mmmmmFromXOP2of3(insn->vexXopPrefix[1]))
{
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
mmmmmFromVEX2of3(insn->vexXopPrefix[1]));
return -1;
case XOP_MAP_SELECT_8:
insn->opcodeType = XOP8_MAP;
return consumeByte(insn, &insn->opcode);
case XOP_MAP_SELECT_9:
insn->opcodeType = XOP9_MAP;
return consumeByte(insn, &insn->opcode);
case XOP_MAP_SELECT_A:
insn->opcodeType = XOPA_MAP;
return consumeByte(insn, &insn->opcode);
}
}
if (consumeByte(insn, &current))
return -1;
if (current == 0x0f) {
dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
if (consumeByte(insn, &current))
return -1;
if (current == 0x38) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
if (consumeByte(insn, &current))
return -1;
insn->opcodeType = THREEBYTE_38;
} else if (current == 0x3a) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
if (consumeByte(insn, &current))
return -1;
insn->opcodeType = THREEBYTE_3A;
} else if (current == 0xa6) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
if (consumeByte(insn, &current))
return -1;
insn->opcodeType = THREEBYTE_A6;
} else if (current == 0xa7) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
if (consumeByte(insn, &current))
return -1;
insn->opcodeType = THREEBYTE_A7;
} else {
dbgprintf(insn, "Didn't find a three-byte escape prefix");
insn->opcodeType = TWOBYTE;
}
}
/*
* At this point we have consumed the full opcode.
* Anything we consume from here on must be unconsumed.
*/
insn->opcode = current;
return 0;
}
static int readModRM(struct InternalInstruction* insn);
/*
* getIDWithAttrMask - Determines the ID of an instruction, consuming
* the ModR/M byte as appropriate for extended and escape opcodes,
* and using a supplied attribute mask.
*
* @param instructionID - A pointer whose target is filled in with the ID of the
* instruction.
* @param insn - The instruction whose ID is to be determined.
* @param attrMask - The attribute mask to search.
* @return - 0 if the ModR/M could be read when needed or was not
* needed; nonzero otherwise.
*/
static int getIDWithAttrMask(uint16_t* instructionID,
struct InternalInstruction* insn,
uint8_t attrMask) {
BOOL hasModRMExtension;
uint8_t instructionClass;
instructionClass = contextForAttrs(attrMask);
hasModRMExtension = modRMRequired(insn->opcodeType,
instructionClass,
insn->opcode);
if (hasModRMExtension) {
if (readModRM(insn))
return -1;
*instructionID = decode(insn->opcodeType,
instructionClass,
insn->opcode,
insn->modRM);
} else {
*instructionID = decode(insn->opcodeType,
instructionClass,
insn->opcode,
0);
}
return 0;
}
/*
* is16BitEquivalent - Determines whether two instruction names refer to
* equivalent instructions but one is 16-bit whereas the other is not.
*
* @param orig - The instruction that is not 16-bit
* @param equiv - The instruction that is 16-bit
*/
static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
off_t i;
for (i = 0;; i++) {
if (orig[i] == '\0' && equiv[i] == '\0')
return TRUE;
if (orig[i] == '\0' || equiv[i] == '\0')
return FALSE;
if (orig[i] != equiv[i]) {
if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
continue;
if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
continue;
if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
continue;
return FALSE;
}
}
}
/*
* getID - Determines the ID of an instruction, consuming the ModR/M byte as
* appropriate for extended and escape opcodes. Determines the attributes and
* context for the instruction before doing so.
*
* @param insn - The instruction whose ID is to be determined.
* @return - 0 if the ModR/M could be read when needed or was not needed;
* nonzero otherwise.
*/
static int getID(struct InternalInstruction* insn, const void *miiArg) {
uint8_t attrMask;
uint16_t instructionID;
dbgprintf(insn, "getID()");
attrMask = ATTR_NONE;
if (insn->mode == MODE_64BIT)
attrMask |= ATTR_64BIT;
if (insn->vexXopType != TYPE_NO_VEX_XOP) {
attrMask |= ATTR_VEX;
if (insn->vexXopType == TYPE_VEX_3B) {
switch (ppFromVEX3of3(insn->vexXopPrefix[2])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
break;
case VEX_PREFIX_F2:
attrMask |= ATTR_XD;
break;
}
if (lFromVEX3of3(insn->vexXopPrefix[2]))
attrMask |= ATTR_VEXL;
}
else if (insn->vexXopType == TYPE_VEX_2B) {
switch (ppFromVEX2of2(insn->vexXopPrefix[1])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
break;
case VEX_PREFIX_F2:
attrMask |= ATTR_XD;
break;
}
if (lFromVEX2of2(insn->vexXopPrefix[1]))
attrMask |= ATTR_VEXL;
}
else if (insn->vexXopType == TYPE_XOP) {
switch (ppFromXOP3of3(insn->vexXopPrefix[2])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
break;
case VEX_PREFIX_F2:
attrMask |= ATTR_XD;
break;
}
if (lFromXOP3of3(insn->vexXopPrefix[2]))
attrMask |= ATTR_VEXL;
}
else {
return -1;
}
}
else {
if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
attrMask |= ATTR_OPSIZE;
else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
attrMask |= ATTR_ADSIZE;
else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
attrMask |= ATTR_XS;
else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
attrMask |= ATTR_XD;
}
if (insn->rexPrefix & 0x08)
attrMask |= ATTR_REXW;
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
/* The following clauses compensate for limitations of the tables. */
if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
/*
* The instruction tables make no distinction between instructions that
* allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
* particular spot (i.e., many MMX operations). In general we're
* conservative, but in the specific case where OpSize is present but not
* in the right place we check if there's a 16-bit operation.
*/
const struct InstructionSpecifier *spec;
uint16_t instructionIDWithOpsize;
const char *specName, *specWithOpSizeName;
spec = specifierForUID(instructionID);
if (getIDWithAttrMask(&instructionIDWithOpsize,
insn,
attrMask | ATTR_OPSIZE)) {
/*
* ModRM required with OpSize but not present; give up and return version
* without OpSize set
*/
insn->instructionID = instructionID;
insn->spec = spec;
return 0;
}
specName = x86DisassemblerGetInstrName(instructionID, miiArg);
specWithOpSizeName =
x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
if (is16BitEquivalent(specName, specWithOpSizeName)) {
insn->instructionID = instructionIDWithOpsize;
insn->spec = specifierForUID(instructionIDWithOpsize);
} else {
insn->instructionID = instructionID;
insn->spec = spec;
}
return 0;
}
if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
insn->rexPrefix & 0x01) {
/*
* NOOP shouldn't decode as NOOP if REX.b is set. Instead
* it should decode as XCHG %r8, %eax.
*/
const struct InstructionSpecifier *spec;
uint16_t instructionIDWithNewOpcode;
const struct InstructionSpecifier *specWithNewOpcode;
spec = specifierForUID(instructionID);
/* Borrow opcode from one of the other XCHGar opcodes */
insn->opcode = 0x91;
if (getIDWithAttrMask(&instructionIDWithNewOpcode,
insn,
attrMask)) {
insn->opcode = 0x90;
insn->instructionID = instructionID;
insn->spec = spec;
return 0;
}
specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
/* Change back */
insn->opcode = 0x90;
insn->instructionID = instructionIDWithNewOpcode;
insn->spec = specWithNewOpcode;
return 0;
}
insn->instructionID = instructionID;
insn->spec = specifierForUID(insn->instructionID);
return 0;
}
/*
* readSIB - Consumes the SIB byte to determine addressing information for an
* instruction.
*
* @param insn - The instruction whose SIB byte is to be read.
* @return - 0 if the SIB byte was successfully read; nonzero otherwise.
*/
static int readSIB(struct InternalInstruction* insn) {
SIBIndex sibIndexBase = 0;
SIBBase sibBaseBase = 0;
uint8_t index, base;
dbgprintf(insn, "readSIB()");
if (insn->consumedSIB)
return 0;
insn->consumedSIB = TRUE;
switch (insn->addressSize) {
case 2:
dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
return -1;
break;
case 4:
sibIndexBase = SIB_INDEX_EAX;
sibBaseBase = SIB_BASE_EAX;
break;
case 8:
sibIndexBase = SIB_INDEX_RAX;
sibBaseBase = SIB_BASE_RAX;
break;
}
if (consumeByte(insn, &insn->sib))
return -1;
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
switch (index) {
case 0x4:
insn->sibIndex = SIB_INDEX_NONE;
break;
default:
insn->sibIndex = (SIBIndex)(sibIndexBase + index);
if (insn->sibIndex == SIB_INDEX_sib ||
insn->sibIndex == SIB_INDEX_sib64)
insn->sibIndex = SIB_INDEX_NONE;
break;
}
switch (scaleFromSIB(insn->sib)) {
case 0:
insn->sibScale = 1;
break;
case 1:
insn->sibScale = 2;
break;
case 2:
insn->sibScale = 4;
break;
case 3:
insn->sibScale = 8;
break;
}
base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
switch (base) {
case 0x5:
+ case 0xd:
switch (modFromModRM(insn->modRM)) {
case 0x0:
insn->eaDisplacement = EA_DISP_32;
insn->sibBase = SIB_BASE_NONE;
break;
case 0x1:
insn->eaDisplacement = EA_DISP_8;
- insn->sibBase = (insn->addressSize == 4 ?
- SIB_BASE_EBP : SIB_BASE_RBP);
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
break;
case 0x2:
insn->eaDisplacement = EA_DISP_32;
- insn->sibBase = (insn->addressSize == 4 ?
- SIB_BASE_EBP : SIB_BASE_RBP);
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
break;
case 0x3:
debug("Cannot have Mod = 0b11 and a SIB byte");
return -1;
}
break;
default:
insn->sibBase = (SIBBase)(sibBaseBase + base);
break;
}
return 0;
}
/*
* readDisplacement - Consumes the displacement of an instruction.
*
* @param insn - The instruction whose displacement is to be read.
* @return - 0 if the displacement byte was successfully read; nonzero
* otherwise.
*/
static int readDisplacement(struct InternalInstruction* insn) {
int8_t d8;
int16_t d16;
int32_t d32;
dbgprintf(insn, "readDisplacement()");
if (insn->consumedDisplacement)
return 0;
insn->consumedDisplacement = TRUE;
insn->displacementOffset = insn->readerCursor - insn->startLocation;
switch (insn->eaDisplacement) {
case EA_DISP_NONE:
insn->consumedDisplacement = FALSE;
break;
case EA_DISP_8:
if (consumeInt8(insn, &d8))
return -1;
insn->displacement = d8;
break;
case EA_DISP_16:
if (consumeInt16(insn, &d16))
return -1;
insn->displacement = d16;
break;
case EA_DISP_32:
if (consumeInt32(insn, &d32))
return -1;
insn->displacement = d32;
break;
}
insn->consumedDisplacement = TRUE;
return 0;
}
/*
* readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
* displacement) for an instruction and interprets it.
*
* @param insn - The instruction whose addressing information is to be read.
* @return - 0 if the information was successfully read; nonzero otherwise.
*/
static int readModRM(struct InternalInstruction* insn) {
uint8_t mod, rm, reg;
dbgprintf(insn, "readModRM()");
if (insn->consumedModRM)
return 0;
if (consumeByte(insn, &insn->modRM))
return -1;
insn->consumedModRM = TRUE;
mod = modFromModRM(insn->modRM);
rm = rmFromModRM(insn->modRM);
reg = regFromModRM(insn->modRM);
/*
* This goes by insn->registerSize to pick the correct register, which messes
* up if we're using (say) XMM or 8-bit register operands. That gets fixed in
* fixupReg().
*/
switch (insn->registerSize) {
case 2:
insn->regBase = MODRM_REG_AX;
insn->eaRegBase = EA_REG_AX;
break;
case 4:
insn->regBase = MODRM_REG_EAX;
insn->eaRegBase = EA_REG_EAX;
break;
case 8:
insn->regBase = MODRM_REG_RAX;
insn->eaRegBase = EA_REG_RAX;
break;
}
reg |= rFromREX(insn->rexPrefix) << 3;
rm |= bFromREX(insn->rexPrefix) << 3;
insn->reg = (Reg)(insn->regBase + reg);
switch (insn->addressSize) {
case 2:
insn->eaBaseBase = EA_BASE_BX_SI;
switch (mod) {
case 0x0:
if (rm == 0x6) {
insn->eaBase = EA_BASE_NONE;
insn->eaDisplacement = EA_DISP_16;
if (readDisplacement(insn))
return -1;
} else {
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
insn->eaDisplacement = EA_DISP_NONE;
}
break;
case 0x1:
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
insn->eaDisplacement = EA_DISP_8;
if (readDisplacement(insn))
return -1;
break;
case 0x2:
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
insn->eaDisplacement = EA_DISP_16;
if (readDisplacement(insn))
return -1;
break;
case 0x3:
insn->eaBase = (EABase)(insn->eaRegBase + rm);
if (readDisplacement(insn))
return -1;
break;
}
break;
case 4:
case 8:
insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
switch (mod) {
case 0x0:
insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
switch (rm) {
case 0x4:
case 0xc: /* in case REXW.b is set */
insn->eaBase = (insn->addressSize == 4 ?
EA_BASE_sib : EA_BASE_sib64);
readSIB(insn);
if (readDisplacement(insn))
return -1;
break;
case 0x5:
insn->eaBase = EA_BASE_NONE;
insn->eaDisplacement = EA_DISP_32;
if (readDisplacement(insn))
return -1;
break;
default:
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
break;
}
break;
case 0x1:
case 0x2:
insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
switch (rm) {
case 0x4:
case 0xc: /* in case REXW.b is set */
insn->eaBase = EA_BASE_sib;
readSIB(insn);
if (readDisplacement(insn))
return -1;
break;
default:
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
if (readDisplacement(insn))
return -1;
break;
}
break;
case 0x3:
insn->eaDisplacement = EA_DISP_NONE;
insn->eaBase = (EABase)(insn->eaRegBase + rm);
break;
}
break;
} /* switch (insn->addressSize) */
return 0;
}
#define GENERIC_FIXUP_FUNC(name, base, prefix) \
static uint8_t name(struct InternalInstruction *insn, \
OperandType type, \
uint8_t index, \
uint8_t *valid) { \
*valid = 1; \
switch (type) { \
default: \
debug("Unhandled register type"); \
*valid = 0; \
return 0; \
case TYPE_Rv: \
return base + index; \
case TYPE_R8: \
if (insn->rexPrefix && \
index >= 4 && index <= 7) { \
return prefix##_SPL + (index - 4); \
} else { \
return prefix##_AL + index; \
} \
case TYPE_R16: \
return prefix##_AX + index; \
case TYPE_R32: \
return prefix##_EAX + index; \
case TYPE_R64: \
return prefix##_RAX + index; \
case TYPE_XMM512: \
return prefix##_ZMM0 + index; \
case TYPE_XMM256: \
return prefix##_YMM0 + index; \
case TYPE_XMM128: \
case TYPE_XMM64: \
case TYPE_XMM32: \
case TYPE_XMM: \
return prefix##_XMM0 + index; \
case TYPE_MM64: \
case TYPE_MM32: \
case TYPE_MM: \
if (index > 7) \
*valid = 0; \
return prefix##_MM0 + index; \
case TYPE_SEGMENTREG: \
if (index > 5) \
*valid = 0; \
return prefix##_ES + index; \
case TYPE_DEBUGREG: \
if (index > 7) \
*valid = 0; \
return prefix##_DR0 + index; \
case TYPE_CONTROLREG: \
if (index > 8) \
*valid = 0; \
return prefix##_CR0 + index; \
} \
}
/*
* fixup*Value - Consults an operand type to determine the meaning of the
* reg or R/M field. If the operand is an XMM operand, for example, an
* operand would be XMM0 instead of AX, which readModRM() would otherwise
* misinterpret it as.
*
* @param insn - The instruction containing the operand.
* @param type - The operand type.
* @param index - The existing value of the field as reported by readModRM().
* @param valid - The address of a uint8_t. The target is set to 1 if the
* field is valid for the register class; 0 if not.
* @return - The proper value.
*/
GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
/*
* fixupReg - Consults an operand specifier to determine which of the
* fixup*Value functions to use in correcting readModRM()'ss interpretation.
*
* @param insn - See fixup*Value().
* @param op - The operand specifier.
* @return - 0 if fixup was successful; -1 if the register returned was
* invalid for its class.
*/
static int fixupReg(struct InternalInstruction *insn,
const struct OperandSpecifier *op) {
uint8_t valid;
dbgprintf(insn, "fixupReg()");
switch ((OperandEncoding)op->encoding) {
default:
debug("Expected a REG or R/M encoding in fixupReg");
return -1;
case ENCODING_VVVV:
insn->vvvv = (Reg)fixupRegValue(insn,
(OperandType)op->type,
insn->vvvv,
&valid);
if (!valid)
return -1;
break;
case ENCODING_REG:
insn->reg = (Reg)fixupRegValue(insn,
(OperandType)op->type,
insn->reg - insn->regBase,
&valid);
if (!valid)
return -1;
break;
case ENCODING_RM:
if (insn->eaBase >= insn->eaRegBase) {
insn->eaBase = (EABase)fixupRMValue(insn,
(OperandType)op->type,
insn->eaBase - insn->eaRegBase,
&valid);
if (!valid)
return -1;
}
break;
}
return 0;
}
/*
* readOpcodeModifier - Reads an operand from the opcode field of an
* instruction. Handles AddRegFrm instructions.
*
* @param insn - The instruction whose opcode field is to be read.
* @param inModRM - Indicates that the opcode field is to be read from the
* ModR/M extension; useful for escape opcodes
* @return - 0 on success; nonzero otherwise.
*/
static int readOpcodeModifier(struct InternalInstruction* insn) {
dbgprintf(insn, "readOpcodeModifier()");
if (insn->consumedOpcodeModifier)
return 0;
insn->consumedOpcodeModifier = TRUE;
switch (insn->spec->modifierType) {
default:
debug("Unknown modifier type.");
return -1;
case MODIFIER_NONE:
debug("No modifier but an operand expects one.");
return -1;
case MODIFIER_OPCODE:
insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
return 0;
case MODIFIER_MODRM:
insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
return 0;
}
}
/*
* readOpcodeRegister - Reads an operand from the opcode field of an
* instruction and interprets it appropriately given the operand width.
* Handles AddRegFrm instructions.
*
* @param insn - See readOpcodeModifier().
* @param size - The width (in bytes) of the register being specified.
* 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
* RAX.
* @return - 0 on success; nonzero otherwise.
*/
static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
dbgprintf(insn, "readOpcodeRegister()");
if (readOpcodeModifier(insn))
return -1;
if (size == 0)
size = insn->registerSize;
switch (size) {
case 1:
insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
if (insn->rexPrefix &&
insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
insn->opcodeRegister < MODRM_REG_AL + 0x8) {
insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+ (insn->opcodeRegister - MODRM_REG_AL - 4));
}
break;
case 2:
insn->opcodeRegister = (Reg)(MODRM_REG_AX
+ ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
break;
case 4:
insn->opcodeRegister = (Reg)(MODRM_REG_EAX
+ ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
break;
case 8:
insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+ ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
break;
}
return 0;
}
/*
* readImmediate - Consumes an immediate operand from an instruction, given the
* desired operand size.
*
* @param insn - The instruction whose operand is to be read.
* @param size - The width (in bytes) of the operand.
* @return - 0 if the immediate was successfully consumed; nonzero
* otherwise.
*/
static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
uint8_t imm8;
uint16_t imm16;
uint32_t imm32;
uint64_t imm64;
dbgprintf(insn, "readImmediate()");
if (insn->numImmediatesConsumed == 2) {
debug("Already consumed two immediates");
return -1;
}
if (size == 0)
size = insn->immediateSize;
else
insn->immediateSize = size;
insn->immediateOffset = insn->readerCursor - insn->startLocation;
switch (size) {
case 1:
if (consumeByte(insn, &imm8))
return -1;
insn->immediates[insn->numImmediatesConsumed] = imm8;
break;
case 2:
if (consumeUInt16(insn, &imm16))
return -1;
insn->immediates[insn->numImmediatesConsumed] = imm16;
break;
case 4:
if (consumeUInt32(insn, &imm32))
return -1;
insn->immediates[insn->numImmediatesConsumed] = imm32;
break;
case 8:
if (consumeUInt64(insn, &imm64))
return -1;
insn->immediates[insn->numImmediatesConsumed] = imm64;
break;
}
insn->numImmediatesConsumed++;
return 0;
}
/*
* readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
*
* @param insn - The instruction whose operand is to be read.
* @return - 0 if the vvvv was successfully consumed; nonzero
* otherwise.
*/
static int readVVVV(struct InternalInstruction* insn) {
dbgprintf(insn, "readVVVV()");
if (insn->vexXopType == TYPE_VEX_3B)
insn->vvvv = vvvvFromVEX3of3(insn->vexXopPrefix[2]);
else if (insn->vexXopType == TYPE_VEX_2B)
insn->vvvv = vvvvFromVEX2of2(insn->vexXopPrefix[1]);
else if (insn->vexXopType == TYPE_XOP)
insn->vvvv = vvvvFromXOP3of3(insn->vexXopPrefix[2]);
else
return -1;
if (insn->mode != MODE_64BIT)
insn->vvvv &= 0x7;
return 0;
}
/*
* readOperands - Consults the specifier for an instruction and consumes all
* operands for that instruction, interpreting them as it goes.
*
* @param insn - The instruction whose operands are to be read and interpreted.
* @return - 0 if all operands could be read; nonzero otherwise.
*/
static int readOperands(struct InternalInstruction* insn) {
int index;
int hasVVVV, needVVVV;
int sawRegImm = 0;
dbgprintf(insn, "readOperands()");
/* If non-zero vvvv specified, need to make sure one of the operands
uses it. */
hasVVVV = !readVVVV(insn);
needVVVV = hasVVVV && (insn->vvvv != 0);
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
switch (x86OperandSets[insn->spec->operands][index].encoding) {
case ENCODING_NONE:
break;
case ENCODING_REG:
case ENCODING_RM:
if (readModRM(insn))
return -1;
if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
return -1;
break;
case ENCODING_CB:
case ENCODING_CW:
case ENCODING_CD:
case ENCODING_CP:
case ENCODING_CO:
case ENCODING_CT:
dbgprintf(insn, "We currently don't hande code-offset encodings");
return -1;
case ENCODING_IB:
if (sawRegImm) {
/* Saw a register immediate so don't read again and instead split the
previous immediate. FIXME: This is a hack. */
insn->immediates[insn->numImmediatesConsumed] =
insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
++insn->numImmediatesConsumed;
break;
}
if (readImmediate(insn, 1))
return -1;
if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 7)
return -1;
if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 31)
return -1;
if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
sawRegImm = 1;
break;
case ENCODING_IW:
if (readImmediate(insn, 2))
return -1;
break;
case ENCODING_ID:
if (readImmediate(insn, 4))
return -1;
break;
case ENCODING_IO:
if (readImmediate(insn, 8))
return -1;
break;
case ENCODING_Iv:
if (readImmediate(insn, insn->immediateSize))
return -1;
break;
case ENCODING_Ia:
if (readImmediate(insn, insn->addressSize))
return -1;
break;
case ENCODING_RB:
if (readOpcodeRegister(insn, 1))
return -1;
break;
case ENCODING_RW:
if (readOpcodeRegister(insn, 2))
return -1;
break;
case ENCODING_RD:
if (readOpcodeRegister(insn, 4))
return -1;
break;
case ENCODING_RO:
if (readOpcodeRegister(insn, 8))
return -1;
break;
case ENCODING_Rv:
if (readOpcodeRegister(insn, 0))
return -1;
break;
case ENCODING_I:
if (readOpcodeModifier(insn))
return -1;
break;
case ENCODING_VVVV:
needVVVV = 0; /* Mark that we have found a VVVV operand. */
if (!hasVVVV)
return -1;
if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
return -1;
break;
case ENCODING_DUP:
break;
default:
dbgprintf(insn, "Encountered an operand with an unknown encoding.");
return -1;
}
}
/* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
if (needVVVV) return -1;
return 0;
}
/*
* decodeInstruction - Reads and interprets a full instruction provided by the
* user.
*
* @param insn - A pointer to the instruction to be populated. Must be
* pre-allocated.
* @param reader - The function to be used to read the instruction's bytes.
* @param readerArg - A generic argument to be passed to the reader to store
* any internal state.
* @param logger - If non-NULL, the function to be used to write log messages
* and warnings.
* @param loggerArg - A generic argument to be passed to the logger to store
* any internal state.
* @param startLoc - The address (in the reader's address space) of the first
* byte in the instruction.
* @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
* decode the instruction in.
* @return - 0 if the instruction's memory could be read; nonzero if
* not.
*/
int decodeInstruction(struct InternalInstruction* insn,
byteReader_t reader,
const void* readerArg,
dlog_t logger,
void* loggerArg,
const void* miiArg,
uint64_t startLoc,
DisassemblerMode mode) {
memset(insn, 0, sizeof(struct InternalInstruction));
insn->reader = reader;
insn->readerArg = readerArg;
insn->dlog = logger;
insn->dlogArg = loggerArg;
insn->startLocation = startLoc;
insn->readerCursor = startLoc;
insn->mode = mode;
insn->numImmediatesConsumed = 0;
if (readPrefixes(insn) ||
readOpcode(insn) ||
getID(insn, miiArg) ||
insn->instructionID == 0 ||
readOperands(insn))
return -1;
insn->operands = &x86OperandSets[insn->spec->operands][0];
insn->length = insn->readerCursor - insn->startLocation;
dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
startLoc, insn->readerCursor, insn->length);
if (insn->length > 15)
dbgprintf(insn, "Instruction exceeds 15-byte limit");
return 0;
}
Index: head/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
===================================================================
--- head/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp (revision 265925)
@@ -1,156 +1,167 @@
//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declarations of the X86MCAsmInfo properties.
//
//===----------------------------------------------------------------------===//
#include "X86MCAsmInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
enum AsmWriterFlavorTy {
// Note: This numbering has to match the GCC assembler dialects for inline
// asm alternatives to work right.
ATT = 0, Intel = 1
};
static cl::opt<AsmWriterFlavorTy>
AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
cl::desc("Choose style of code to emit from X86 backend:"),
cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"),
clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
clEnumValEnd));
static cl::opt<bool>
MarkedJTDataRegions("mark-data-regions", cl::init(false),
cl::desc("Mark code section jump table data regions."),
cl::Hidden);
void X86MCAsmInfoDarwin::anchor() { }
X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
bool is64Bit = T.getArch() == Triple::x86_64;
if (is64Bit)
PointerSize = CalleeSaveStackSlotSize = 8;
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
if (!is64Bit)
Data64bitsDirective = 0; // we can't emit a 64-bit unit
// Use ## as a comment string so that .s files generated by llvm can go
// through the GCC preprocessor without causing an error. This is needed
// because "clang foo.s" runs the C preprocessor, which is usually reserved
// for .S files on other systems. Perhaps this is because the file system
// wasn't always case preserving or something.
CommentString = "##";
SupportsDebugInformation = true;
UseDataRegionDirectives = MarkedJTDataRegions;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ // FIXME: this should not depend on the target OS version, but on the ld64
+ // version in use. From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified
+ // FDE relocs may be used.
+ DwarfFDESymbolsUseAbsDiff = T.isMacOSX() && !T.isMacOSXVersionLT(10, 6);
+
+ // old assembler lacks some directives
+ // FIXME: this should really be a check on the assembler characteristics
+ // rather than OS version
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
+ HasWeakDefCanBeHiddenDirective = false;
}
X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
: X86MCAsmInfoDarwin(Triple) {
}
void X86ELFMCAsmInfo::anchor() { }
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
bool is64Bit = T.getArch() == Triple::x86_64;
bool isX32 = T.getEnvironment() == Triple::GNUX32;
// For ELF, x86-64 pointer size depends on the ABI.
// For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64
// with the x32 ABI, pointer size remains the default 4.
PointerSize = (is64Bit && !isX32) ? 8 : 4;
// OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
PrivateGlobalPrefix = ".L";
// Set up DWARF directives
HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
// Debug Information
SupportsDebugInformation = true;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
// OpenBSD and Bitrig have buggy support for .quad in 32-bit mode, just split
// into two .words.
if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) &&
T.getArch() == Triple::x86)
Data64bitsDirective = 0;
}
const MCExpr *
X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
unsigned Encoding,
MCStreamer &Streamer) const {
MCContext &Context = Streamer.getContext();
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
const MCExpr *Four = MCConstantExpr::Create(4, Context);
return MCBinaryExpr::CreateAdd(Res, Four, Context);
}
const MCSection *X86ELFMCAsmInfo::
getNonexecutableStackSection(MCContext &Ctx) const {
return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
0, SectionKind::getMetadata());
}
void X86MCAsmInfoMicrosoft::anchor() { }
X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
}
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
AllowAtInName = true;
}
void X86MCAsmInfoGNUCOFF::anchor() { }
X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
}
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
}
Index: head/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
===================================================================
--- head/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp (revision 265925)
@@ -1,737 +1,739 @@
//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a printer that converts from our internal representation
// of machine-dependent LLVM code to X86 machine code.
//
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/DebugInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
// Primitive Helper Functions.
//===----------------------------------------------------------------------===//
/// runOnMachineFunction - Emit the function body.
///
bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
bool Intrn = MF.getFunction()->hasInternalLinkage();
OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
: COFF::IMAGE_SYM_CLASS_EXTERNAL);
OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
<< COFF::SCT_COMPLEX_TYPE_SHIFT);
OutStreamer.EndCOFFSymbolDef();
}
// Have common code print out the function header with linkage info etc.
EmitFunctionHeader();
// Emit the rest of the function body.
EmitFunctionBody();
// We didn't modify anything.
return false;
}
/// printSymbolOperand - Print a raw symbol reference operand. This handles
/// jump tables, constant pools, global address and external symbols, all of
/// which print to a label with various suffixes for relocation types etc.
void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
raw_ostream &O) {
switch (MO.getType()) {
default: llvm_unreachable("unknown symbol type!");
case MachineOperand::MO_JumpTableIndex:
O << *GetJTISymbol(MO.getIndex());
break;
case MachineOperand::MO_ConstantPoolIndex:
O << *GetCPISymbol(MO.getIndex());
printOffset(MO.getOffset(), O);
break;
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
MCSymbol *GVSym;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
else
GVSym = getSymbol(GV);
// Handle dllimport linkage.
if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
}
// If the name begins with a dollar-sign, enclose it in parens. We do this
// to avoid having it look like an integer immediate to the assembler.
if (GVSym->getName()[0] != '$')
O << *GVSym;
else
O << '(' << *GVSym << ')';
printOffset(MO.getOffset(), O);
break;
}
case MachineOperand::MO_ExternalSymbol: {
const MCSymbol *SymToPrint;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
SmallString<128> TempNameStr;
TempNameStr += StringRef(MO.getSymbolName());
TempNameStr += StringRef("$stub");
MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
if (StubSym.getPointer() == 0) {
TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
StubSym = MachineModuleInfoImpl::
StubValueTy(OutContext.GetOrCreateSymbol(TempNameStr.str()),
true);
}
SymToPrint = StubSym.getPointer();
} else {
SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
}
// If the name begins with a dollar-sign, enclose it in parens. We do this
// to avoid having it look like an integer immediate to the assembler.
if (SymToPrint->getName()[0] != '$')
O << *SymToPrint;
else
O << '(' << *SymToPrint << '(';
break;
}
}
switch (MO.getTargetFlags()) {
default:
llvm_unreachable("Unknown target flag on GV operand");
case X86II::MO_NO_FLAG: // No flag.
break;
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DLLIMPORT:
case X86II::MO_DARWIN_STUB:
// These affect the name of the symbol, not any suffix.
break;
case X86II::MO_GOT_ABSOLUTE_ADDRESS:
O << " + [.-" << *MF->getPICBaseSymbol() << ']';
break;
case X86II::MO_PIC_BASE_OFFSET:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
O << '-' << *MF->getPICBaseSymbol();
break;
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_TLSLD: O << "@TLSLD"; break;
case X86II::MO_TLSLDM: O << "@TLSLDM"; break;
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
case X86II::MO_TPOFF: O << "@TPOFF"; break;
case X86II::MO_DTPOFF: O << "@DTPOFF"; break;
case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
case X86II::MO_GOTNTPOFF: O << "@GOTNTPOFF"; break;
case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
case X86II::MO_GOT: O << "@GOT"; break;
case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
case X86II::MO_PLT: O << "@PLT"; break;
case X86II::MO_TLVP: O << "@TLVP"; break;
case X86II::MO_TLVP_PIC_BASE:
O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
break;
case X86II::MO_SECREL: O << "@SECREL32"; break;
}
}
/// printPCRelImm - This is used to print an immediate value that ends up
/// being encoded as a pc-relative value. These print slightly differently, for
/// example, a $ is not emitted.
void X86AsmPrinter::printPCRelImm(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
default: llvm_unreachable("Unknown pcrel immediate operand");
case MachineOperand::MO_Register:
// pc-relativeness was handled when computing the value in the reg.
printOperand(MI, OpNo, O);
return;
case MachineOperand::MO_Immediate:
O << MO.getImm();
return;
case MachineOperand::MO_MachineBasicBlock:
O << *MO.getMBB()->getSymbol();
return;
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
printSymbolOperand(MO, O);
return;
}
}
void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier,
unsigned AsmVariant) {
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
default: llvm_unreachable("unknown operand type!");
case MachineOperand::MO_Register: {
// FIXME: Enumerating AsmVariant, so we can remove magic number.
if (AsmVariant == 0) O << '%';
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
MVT::SimpleValueType VT = (strcmp(Modifier+6,"64") == 0) ?
MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
Reg = getX86SubSuperRegister(Reg, VT);
}
O << X86ATTInstPrinter::getRegisterName(Reg);
return;
}
case MachineOperand::MO_Immediate:
if (AsmVariant == 0) O << '$';
O << MO.getImm();
return;
case MachineOperand::MO_JumpTableIndex:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol: {
if (AsmVariant == 0) O << '$';
printSymbolOperand(MO, O);
break;
}
}
}
void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
raw_ostream &O, const char *Modifier) {
const MachineOperand &BaseReg = MI->getOperand(Op);
const MachineOperand &IndexReg = MI->getOperand(Op+2);
const MachineOperand &DispSpec = MI->getOperand(Op+3);
// If we really don't want to print out (rip), don't.
bool HasBaseReg = BaseReg.getReg() != 0;
if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
BaseReg.getReg() == X86::RIP)
HasBaseReg = false;
// HasParenPart - True if we will print out the () part of the mem ref.
bool HasParenPart = IndexReg.getReg() || HasBaseReg;
if (DispSpec.isImm()) {
int DispVal = DispSpec.getImm();
if (DispVal || !HasParenPart)
O << DispVal;
} else {
assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
DispSpec.isJTI() || DispSpec.isSymbol());
printSymbolOperand(MI->getOperand(Op+3), O);
}
if (Modifier && strcmp(Modifier, "H") == 0)
O << "+8";
if (HasParenPart) {
assert(IndexReg.getReg() != X86::ESP &&
"X86 doesn't allow scaling by ESP");
O << '(';
if (HasBaseReg)
printOperand(MI, Op, O, Modifier);
if (IndexReg.getReg()) {
O << ',';
printOperand(MI, Op+2, O, Modifier);
unsigned ScaleVal = MI->getOperand(Op+1).getImm();
if (ScaleVal != 1)
O << ',' << ScaleVal;
}
O << ')';
}
}
void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
raw_ostream &O, const char *Modifier) {
assert(isMem(MI, Op) && "Invalid memory reference!");
const MachineOperand &Segment = MI->getOperand(Op+4);
if (Segment.getReg()) {
printOperand(MI, Op+4, O, Modifier);
O << ':';
}
printLeaMemReference(MI, Op, O, Modifier);
}
void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op,
raw_ostream &O, const char *Modifier,
unsigned AsmVariant){
const MachineOperand &BaseReg = MI->getOperand(Op);
unsigned ScaleVal = MI->getOperand(Op+1).getImm();
const MachineOperand &IndexReg = MI->getOperand(Op+2);
const MachineOperand &DispSpec = MI->getOperand(Op+3);
const MachineOperand &SegReg = MI->getOperand(Op+4);
// If this has a segment register, print it.
if (SegReg.getReg()) {
printOperand(MI, Op+4, O, Modifier, AsmVariant);
O << ':';
}
O << '[';
bool NeedPlus = false;
if (BaseReg.getReg()) {
printOperand(MI, Op, O, Modifier, AsmVariant);
NeedPlus = true;
}
if (IndexReg.getReg()) {
if (NeedPlus) O << " + ";
if (ScaleVal != 1)
O << ScaleVal << '*';
printOperand(MI, Op+2, O, Modifier, AsmVariant);
NeedPlus = true;
}
if (!DispSpec.isImm()) {
if (NeedPlus) O << " + ";
printOperand(MI, Op+3, O, Modifier, AsmVariant);
} else {
int64_t DispVal = DispSpec.getImm();
if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
if (NeedPlus) {
if (DispVal > 0)
O << " + ";
else {
O << " - ";
DispVal = -DispVal;
}
}
O << DispVal;
}
}
O << ']';
}
bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
raw_ostream &O) {
unsigned Reg = MO.getReg();
switch (Mode) {
default: return true; // Unknown mode.
case 'b': // Print QImode register
Reg = getX86SubSuperRegister(Reg, MVT::i8);
break;
case 'h': // Print QImode high register
Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
break;
case 'w': // Print HImode register
Reg = getX86SubSuperRegister(Reg, MVT::i16);
break;
case 'k': // Print SImode register
Reg = getX86SubSuperRegister(Reg, MVT::i32);
break;
- case 'q': // Print DImode register
- // FIXME: gcc will actually print e instead of r for 32-bit.
- Reg = getX86SubSuperRegister(Reg, MVT::i64);
+ case 'q':
+ // Print 64-bit register names if 64-bit integer registers are available.
+ // Otherwise, print 32-bit register names.
+ MVT::SimpleValueType Ty = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+ Reg = getX86SubSuperRegister(Reg, Ty);
break;
}
O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
return false;
}
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
const MachineOperand &MO = MI->getOperand(OpNo);
switch (ExtraCode[0]) {
default:
// See if this is a generic print operand
return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'a': // This is an address. Currently only 'i' and 'r' are expected.
if (MO.isImm()) {
O << MO.getImm();
return false;
}
if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
printSymbolOperand(MO, O);
if (Subtarget->isPICStyleRIPRel())
O << "(%rip)";
return false;
}
if (MO.isReg()) {
O << '(';
printOperand(MI, OpNo, O);
O << ')';
return false;
}
return true;
case 'c': // Don't print "$" before a global var name or constant.
if (MO.isImm())
O << MO.getImm();
else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol())
printSymbolOperand(MO, O);
else
printOperand(MI, OpNo, O);
return false;
case 'A': // Print '*' before a register (it must be a register)
if (MO.isReg()) {
O << '*';
printOperand(MI, OpNo, O);
return false;
}
return true;
case 'b': // Print QImode register
case 'h': // Print QImode high register
case 'w': // Print HImode register
case 'k': // Print SImode register
case 'q': // Print DImode register
if (MO.isReg())
return printAsmMRegister(MO, ExtraCode[0], O);
printOperand(MI, OpNo, O);
return false;
case 'P': // This is the operand of a call, treat specially.
printPCRelImm(MI, OpNo, O);
return false;
case 'n': // Negate the immediate or print a '-' before the operand.
// Note: this is a temporary solution. It should be handled target
// independently as part of the 'MC' work.
if (MO.isImm()) {
O << -MO.getImm();
return false;
}
O << '-';
}
}
printOperand(MI, OpNo, O, /*Modifier*/ 0, AsmVariant);
return false;
}
bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNo, unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
if (AsmVariant) {
printIntelMemReference(MI, OpNo, O);
return false;
}
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
default: return true; // Unknown modifier.
case 'b': // Print QImode register
case 'h': // Print QImode high register
case 'w': // Print HImode register
case 'k': // Print SImode register
case 'q': // Print SImode register
// These only apply to registers, ignore on mem.
break;
case 'H':
printMemReference(MI, OpNo, O, "H");
return false;
case 'P': // Don't print @PLT, but do print as memory.
printMemReference(MI, OpNo, O, "no-rip");
return false;
}
}
printMemReference(MI, OpNo, O);
return false;
}
void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
if (Subtarget->isTargetEnvMacho())
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
if (Subtarget->isTargetCOFF()) {
// Emit an absolute @feat.00 symbol. This appears to be some kind of
// compiler features bitfield read by link.exe.
if (!Subtarget->is64Bit()) {
MCSymbol *S = MMI->getContext().GetOrCreateSymbol(StringRef("@feat.00"));
OutStreamer.BeginCOFFSymbolDef(S);
OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
OutStreamer.EndCOFFSymbolDef();
// According to the PE-COFF spec, the LSB of this value marks the object
// for "registered SEH". This means that all SEH handler entry points
// must be registered in .sxdata. Use of any unregistered handlers will
// cause the process to terminate immediately. LLVM does not know how to
// register any SEH handlers, so its object files should be safe.
S->setAbsolute();
OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
OutStreamer.EmitAssignment(
S, MCConstantExpr::Create(int64_t(1), MMI->getContext()));
}
}
}
void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
if (Subtarget->isTargetEnvMacho()) {
// All darwin targets use mach-o.
MachineModuleInfoMachO &MMIMacho =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
// Output stubs for dynamically-linked functions.
MachineModuleInfoMachO::SymbolListTy Stubs;
Stubs = MMIMacho.GetFnStubList();
if (!Stubs.empty()) {
const MCSection *TheSection =
OutContext.getMachOSection("__IMPORT", "__jump_table",
MCSectionMachO::S_SYMBOL_STUBS |
MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
5, SectionKind::getMetadata());
OutStreamer.SwitchSection(TheSection);
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
OutStreamer.EmitLabel(Stubs[i].first);
// .indirect_symbol _foo
OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
MCSA_IndirectSymbol);
// hlt; hlt; hlt; hlt; hlt hlt = 0xf4.
const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
OutStreamer.EmitBytes(StringRef(HltInsts, 5));
}
Stubs.clear();
OutStreamer.AddBlankLine();
}
// Output stubs for external and common global variables.
Stubs = MMIMacho.GetGVStubList();
if (!Stubs.empty()) {
const MCSection *TheSection =
OutContext.getMachOSection("__IMPORT", "__pointers",
MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
SectionKind::getMetadata());
OutStreamer.SwitchSection(TheSection);
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$non_lazy_ptr:
OutStreamer.EmitLabel(Stubs[i].first);
// .indirect_symbol _foo
MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),
MCSA_IndirectSymbol);
// .long 0
if (MCSym.getInt())
// External to current translation unit.
OutStreamer.EmitIntValue(0, 4/*size*/);
else
// Internal to current translation unit.
//
// When we place the LSDA into the TEXT section, the type info
// pointers need to be indirect and pc-rel. We accomplish this by
// using NLPs. However, sometimes the types are local to the file. So
// we need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext), 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
}
Stubs = MMIMacho.GetHiddenGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
EmitAlignment(2);
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$non_lazy_ptr:
OutStreamer.EmitLabel(Stubs[i].first);
// .long _foo
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
OutContext), 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
}
SM.serializeToStackMapSection();
// Funny Darwin hack: This flag tells the linker that no global symbols
// contain code that falls through to other global symbols (e.g. the obvious
// implementation of multiple entry points). If this doesn't occur, the
// linker can safely perform dead code stripping. Since LLVM never
// generates code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
MMI->usesVAFloatArgument()) {
StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
}
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
X86COFFMachineModuleInfo &COFFMMI =
MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
// Emit type information for external functions
typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
for (externals_iterator I = COFFMMI.externals_begin(),
E = COFFMMI.externals_end();
I != E; ++I) {
OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
<< COFF::SCT_COMPLEX_TYPE_SHIFT);
OutStreamer.EndCOFFSymbolDef();
}
// Necessary for dllexport support
std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
const TargetLoweringObjectFileCOFF &TLOFCOFF =
static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
if (I->hasDLLExportLinkage())
DLLExportedFns.push_back(getSymbol(I));
for (Module::const_global_iterator I = M.global_begin(),
E = M.global_end(); I != E; ++I)
if (I->hasDLLExportLinkage())
DLLExportedGlobals.push_back(getSymbol(I));
// Output linker support code for dllexported globals on windows.
if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
SmallString<128> name;
for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
if (Subtarget->isTargetWindows())
name = " /EXPORT:";
else
name = " -export:";
name += DLLExportedGlobals[i]->getName();
if (Subtarget->isTargetWindows())
name += ",DATA";
else
name += ",data";
OutStreamer.EmitBytes(name);
}
for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
if (Subtarget->isTargetWindows())
name = " /EXPORT:";
else
name = " -export:";
name += DLLExportedFns[i]->getName();
OutStreamer.EmitBytes(name);
}
}
}
if (Subtarget->isTargetELF()) {
const TargetLoweringObjectFileELF &TLOFELF =
static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
// Output stubs for external and common global variables.
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
const DataLayout *TD = TM.getDataLayout();
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
TD->getPointerSize());
}
Stubs.clear();
}
}
}
//===----------------------------------------------------------------------===//
// Target Registry Stuff
//===----------------------------------------------------------------------===//
// Force static initialization.
extern "C" void LLVMInitializeX86AsmPrinter() {
RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
}
Index: head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp (revision 265924)
+++ head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp (revision 265925)
@@ -1,19932 +1,19956 @@
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "x86-isel"
#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86.h"
#include "X86CallingConv.h"
#include "X86InstrBuilder.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/VariadicFunction.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include <bitset>
#include <cctype>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, SDLoc dl,
unsigned vectorWidth) {
assert((vectorWidth == 128 || vectorWidth == 256) &&
"Unsupported vector width");
EVT VT = Vec.getValueType();
EVT ElVT = VT.getVectorElementType();
unsigned Factor = VT.getSizeInBits()/vectorWidth;
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
VT.getVectorNumElements()/Factor);
// Extract from UNDEF is UNDEF.
if (Vec.getOpcode() == ISD::UNDEF)
return DAG.getUNDEF(ResultVT);
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
// This is the index of the first element of the vectorWidth-bit chunk
// we want.
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
* ElemsPerChunk);
// If the input is a buildvector just emit a smaller one.
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk);
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
VecIdx);
return Result;
}
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
/// instructions or a simple subregister reference. Idx is an index in the
/// 128 bits we want. It need not be aligned to a 128-bit bounday. That makes
/// lowering EXTRACT_VECTOR_ELT operations easier.
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, SDLoc dl) {
assert((Vec.getValueType().is256BitVector() ||
Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
}
/// Generate a DAG to grab 256-bits from a 512-bit vector.
static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, SDLoc dl) {
assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
}
static SDValue InsertSubVector(SDValue Result, SDValue Vec,
unsigned IdxVal, SelectionDAG &DAG,
SDLoc dl, unsigned vectorWidth) {
assert((vectorWidth == 128 || vectorWidth == 256) &&
"Unsupported vector width");
// Inserting UNDEF is Result
if (Vec.getOpcode() == ISD::UNDEF)
return Result;
EVT VT = Vec.getValueType();
EVT ElVT = VT.getVectorElementType();
EVT ResultVT = Result.getValueType();
// Insert the relevant vectorWidth bits.
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
// This is the index of the first element of the vectorWidth-bit chunk
// we want.
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
* ElemsPerChunk);
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
VecIdx);
}
/// Generate a DAG to put 128-bits into a vector > 128 bits. This
/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
/// simple superregister reference. Idx is an index in the 128 bits
/// we want. It need not be aligned to a 128-bit bounday. That makes
/// lowering INSERT_VECTOR_ELT operations easier.
static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
unsigned IdxVal, SelectionDAG &DAG,
SDLoc dl) {
assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
}
static SDValue Insert256BitVector(SDValue Result, SDValue Vec,
unsigned IdxVal, SelectionDAG &DAG,
SDLoc dl) {
assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
}
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
/// instructions. This is used because creating CONCAT_VECTOR nodes of
/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
/// large BUILD_VECTORS.
static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
unsigned NumElems, SelectionDAG &DAG,
SDLoc dl) {
SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
}
static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
unsigned NumElems, SelectionDAG &DAG,
SDLoc dl) {
SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
}
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
bool is64Bit = Subtarget->is64Bit();
if (Subtarget->isTargetEnvMacho()) {
if (is64Bit)
return new X86_64MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
}
if (Subtarget->isTargetLinux())
return new X86LinuxTargetObjectFile();
if (Subtarget->isTargetELF())
return new TargetLoweringObjectFileELF();
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
return new TargetLoweringObjectFileCOFF();
llvm_unreachable("unknown subtarget type");
}
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
TD = getDataLayout();
resetOperationActions();
}
void X86TargetLowering::resetOperationActions() {
const TargetMachine &TM = getTargetMachine();
static bool FirstTimeThrough = true;
// If none of the target options have changed, then we don't need to reset the
// operation actions.
if (!FirstTimeThrough && TO == TM.Options) return;
if (!FirstTimeThrough) {
// Reinitialize the actions.
initActions();
FirstTimeThrough = false;
}
TO = TM.Options;
// Set up the TargetLowering object.
static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
// X86-SSE is even stranger. It uses -1 or 0 for vector masks.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
// For Atom, always use ILP scheduling.
if (Subtarget->isAtom())
setSchedulingPreference(Sched::ILP);
else if (Subtarget->is64Bit())
setSchedulingPreference(Sched::ILP);
else
setSchedulingPreference(Sched::RegPressure);
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
// Bypass expensive divides on Atom when compiling with O2
if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
addBypassSlowDiv(32, 8);
if (Subtarget->is64Bit())
addBypassSlowDiv(64, 16);
}
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
// Setup Windows compiler runtime calls.
setLibcallName(RTLIB::SDIV_I64, "_alldiv");
setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
setLibcallName(RTLIB::SREM_I64, "_allrem");
setLibcallName(RTLIB::UREM_I64, "_aullrem");
setLibcallName(RTLIB::MUL_I64, "_allmul");
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
// The _ftol2 runtime function has an unusual calling conv, which
// is modeled by a special pseudo-instruction.
setLibcallName(RTLIB::FPTOUINT_F64_I64, 0);
setLibcallName(RTLIB::FPTOUINT_F32_I64, 0);
setLibcallName(RTLIB::FPTOUINT_F64_I32, 0);
setLibcallName(RTLIB::FPTOUINT_F32_I32, 0);
}
if (Subtarget->isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(false);
setUseUnderscoreLongJmp(false);
} else if (Subtarget->isTargetMingw()) {
// MS runtime is weird: it exports _setjmp, but longjmp!
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(false);
} else {
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
}
// Set up the register classes.
addRegisterClass(MVT::i8, &X86::GR8RegClass);
addRegisterClass(MVT::i16, &X86::GR16RegClass);
addRegisterClass(MVT::i32, &X86::GR32RegClass);
if (Subtarget->is64Bit())
addRegisterClass(MVT::i64, &X86::GR64RegClass);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
// We don't accept any truncstore of integer registers.
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
setTruncStoreAction(MVT::i32, MVT::i16, Expand);
setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
// SETOEQ and SETUNE require checking two conditions.
setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
} else if (!TM.Options.UseSoftFloat) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
}
// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
if (!TM.Options.UseSoftFloat) {
// SSE has no i16 to fp conversion, only i32
if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
// f32 and f64 cases are Legal, f80 case is not
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
} else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
}
} else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
}
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
if (X86ScalarSSEf32) {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
// f32 and f64 cases are Legal, f80 case is not
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
}
// Handle FP_TO_UINT by promoting the destination to a larger signed
// conversion.
setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
} else if (!TM.Options.UseSoftFloat) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
// With SSE3 we can use fisttpll to convert to a signed i64; without
// SSE, we're stuck with a fistpll.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
}
if (isTargetFTOL()) {
// Use the _ftol2 runtime function, which has a pseudo-instruction
// to handle its weird calling convention.
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!X86ScalarSSEf64) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
}
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
// the two-result form to trivial CSE, which is able to combine x/y and x%y
// into a single instruction.
//
// Scalar integer multiply-high is also lowered to use two-result
// operations, to match the available instructions. However, plain multiply
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
MVT VT = IntVTs[i];
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
// Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
setOperationAction(ISD::ADDC, VT, Custom);
setOperationAction(ISD::ADDE, VT, Custom);
setOperationAction(ISD::SUBC, VT, Custom);
setOperationAction(ISD::SUBE, VT, Custom);
}
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
setOperationAction(ISD::BR_CC , MVT::f32, Expand);
setOperationAction(ISD::BR_CC , MVT::f64, Expand);
setOperationAction(ISD::BR_CC , MVT::f80, Expand);
setOperationAction(ISD::BR_CC , MVT::i8, Expand);
setOperationAction(ISD::BR_CC , MVT::i16, Expand);
setOperationAction(ISD::BR_CC , MVT::i32, Expand);
setOperationAction(ISD::BR_CC , MVT::i64, Expand);
setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
// Promote the i8 variants and force them on up to i32 which has a shorter
// encoding.
setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
if (Subtarget->hasBMI()) {
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
}
if (Subtarget->hasLZCNT()) {
// When promoting the i8 variants, force them to i32 for a shorter
// encoding.
setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
}
}
if (Subtarget->hasPOPCNT()) {
setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
} else {
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
// These should be promoted to a larger select which is supported.
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
// X86 wants to expand cmov itself.
setOperationAction(ISD::SELECT , MVT::i8 , Custom);
setOperationAction(ISD::SELECT , MVT::i16 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
setOperationAction(ISD::SELECT , MVT::f80 , Custom);
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
setOperationAction(ISD::SETCC , MVT::f80 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::SELECT , MVT::i64 , Custom);
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
}
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
// other SjLj exception interfaces are implemented and please don't build
// your own exception handling based on them.
// LLVM/Clang supports zero-cost DWARF exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// Darwin ABI issue.
setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
if (Subtarget->is64Bit())
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
}
// 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
}
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
// Expand certain atomics
for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
MVT VT = IntVTs[i];
setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
if (!Subtarget->is64Bit()) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
}
if (Subtarget->hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
}
// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
!Subtarget->isTargetELF() &&
!Subtarget->isTargetCygMing()) {
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
if (Subtarget->is64Bit()) {
setExceptionPointerRegister(X86::RAX);
setExceptionSelectorRegister(X86::RDX);
} else {
setExceptionPointerRegister(X86::EAX);
setExceptionSelectorRegister(X86::EDX);
}
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
// TargetInfo::X86_64ABIBuiltinVaList
setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
} else {
// TargetInfo::CharPtrBuiltinVaList
setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
}
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->isOSWindows() && !Subtarget->isTargetEnvMacho())
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else if (TM.Options.EnableSegmentedStacks)
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Expand);
if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, &X86::FR32RegClass);
addRegisterClass(MVT::f64, &X86::FR64RegClass);
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS , MVT::f64, Custom);
setOperationAction(ISD::FABS , MVT::f32, Custom);
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f64, Custom);
setOperationAction(ISD::FNEG , MVT::f32, Custom);
// Use ANDPD and ORPD to simulate FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// Lower this to FGETSIGNx86 plus an AND.
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
addLegalFPImmediate(APFloat(+0.0f)); // xorps
} else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, &X86::FR32RegClass);
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
setOperationAction(ISD::FABS , MVT::f32, Custom);
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f32, Custom);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
// Use ANDPS and ORPS to simulate FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Special cases we handle for FP constants.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
}
} else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
// Set up the FP register classes.
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
addRegisterClass(MVT::f32, &X86::RFP32RegClass);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
setOperationAction(ISD::UNDEF, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
addLegalFPImmediate(APFloat(+0.0f)); // FLD0
addLegalFPImmediate(APFloat(+1.0f)); // FLD1
addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
}
// We don't support FMA.
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Long double always uses X87.
if (!TM.Options.UseSoftFloat) {
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
addLegalFPImmediate(TmpFlt); // FLD0
TmpFlt.changeSign();
addLegalFPImmediate(TmpFlt); // FLD0/FCHS
bool ignored;
APFloat TmpFlt2(+1.0);
TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
&ignored);
addLegalFPImmediate(TmpFlt2); // FLD1
TmpFlt2.changeSign();
addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
}
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f80, Expand);
setOperationAction(ISD::FCOS , MVT::f80, Expand);
setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
}
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
setOperationAction(ISD::FCEIL, MVT::f80, Expand);
setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
setOperationAction(ISD::FRINT, MVT::f80, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
}
// Always use a library call for pow.
setOperationAction(ISD::FPOW , MVT::f32 , Expand);
setOperationAction(ISD::FPOW , MVT::f64 , Expand);
setOperationAction(ISD::FPOW , MVT::f80 , Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
setOperationAction(ISD::FLOG10, MVT::f80, Expand);
setOperationAction(ISD::FEXP, MVT::f80, Expand);
setOperationAction(ISD::FEXP2, MVT::f80, Expand);
// First set operation action for all vector types to either promote
// (for widening) or expand (for scalarization). Then we will selectively
// turn on ones that can be effectively codegen'd.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
setOperationAction(ISD::ADD , VT, Expand);
setOperationAction(ISD::SUB , VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FSUB, VT, Expand);
setOperationAction(ISD::MUL , VT, Expand);
setOperationAction(ISD::FMUL, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::LOAD, VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::SHL, VT, Expand);
setOperationAction(ISD::SRA, VT, Expand);
setOperationAction(ISD::SRL, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::SETCC, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
setOperationAction(ISD::TRUNCATE, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
setTruncStoreAction(VT,
(MVT::SimpleValueType)InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
// No operations on x86mmx supported, everything uses intrinsics.
}
// MMX-sized vectors (other than x86mmx) are expected to be expanded
// into smaller operations.
setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
setOperationAction(ISD::AND, MVT::v8i8, Expand);
setOperationAction(ISD::AND, MVT::v4i16, Expand);
setOperationAction(ISD::AND, MVT::v2i32, Expand);
setOperationAction(ISD::AND, MVT::v1i64, Expand);
setOperationAction(ISD::OR, MVT::v8i8, Expand);
setOperationAction(ISD::OR, MVT::v4i16, Expand);
setOperationAction(ISD::OR, MVT::v2i32, Expand);
setOperationAction(ISD::OR, MVT::v1i64, Expand);
setOperationAction(ISD::XOR, MVT::v8i8, Expand);
setOperationAction(ISD::XOR, MVT::v4i16, Expand);
setOperationAction(ISD::XOR, MVT::v2i32, Expand);
setOperationAction(ISD::XOR, MVT::v1i64, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
// registers cannot be used even for integer operations.
addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
setOperationAction(ISD::ADD, MVT::v16i8, Legal);
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
setOperationAction(ISD::SUB, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::FADD, MVT::v2f64, Legal);
setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to custom lower non-power-of-2 vectors
if (!isPowerOf2_32(VT.getVectorNumElements()))
continue;
// Do not attempt to custom lower non-128-bit vectors
if (!VT.is128BitVector())
continue;
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
}
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector())
continue;
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v2i64);
setOperationAction(ISD::OR, VT, Promote);
AddPromotedToType (ISD::OR, VT, MVT::v2i64);
setOperationAction(ISD::XOR, VT, Promote);
AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
setOperationAction(ISD::LOAD, VT, Promote);
AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
// As there is no 64-bit GPR available, we need build a special custom
// sequence to convert from v2i32 to v2f32.
if (!Subtarget->is64Bit())
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
setOperationAction(ISD::VSELECT, MVT::v2i64, Legal);
setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
// information.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
// FIXME: these should be Legal but thats only for the case where
// the index is constant. For now custom expand to deal with that.
if (Subtarget->is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
}
}
if (Subtarget->hasSSE2()) {
setOperationAction(ISD::SRL, MVT::v8i16, Custom);
setOperationAction(ISD::SRL, MVT::v16i8, Custom);
setOperationAction(ISD::SHL, MVT::v8i16, Custom);
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
setOperationAction(ISD::SRA, MVT::v16i8, Custom);
// In the customized shift lowering, the legal cases in AVX2 will be
// recognized.
setOperationAction(ISD::SRL, MVT::v2i64, Custom);
setOperationAction(ISD::SRL, MVT::v4i32, Custom);
setOperationAction(ISD::SHL, MVT::v2i64, Custom);
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
setOperationAction(ISD::FADD, MVT::v8f32, Legal);
setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
setOperationAction(ISD::FABS, MVT::v8f32, Custom);
setOperationAction(ISD::FADD, MVT::v4f64, Legal);
setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
setOperationAction(ISD::SHL, MVT::v16i16, Custom);
setOperationAction(ISD::SHL, MVT::v32i8, Custom);
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
setOperationAction(ISD::SRA, MVT::v32i8, Custom);
setOperationAction(ISD::SDIV, MVT::v16i16, Custom);
setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::f32, Legal);
setOperationAction(ISD::FMA, MVT::f64, Legal);
}
if (Subtarget->hasInt256()) {
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
setOperationAction(ISD::ADD, MVT::v16i16, Legal);
setOperationAction(ISD::ADD, MVT::v32i8, Legal);
setOperationAction(ISD::SUB, MVT::v4i64, Legal);
setOperationAction(ISD::SUB, MVT::v8i32, Legal);
setOperationAction(ISD::SUB, MVT::v16i16, Legal);
setOperationAction(ISD::SUB, MVT::v32i8, Legal);
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
setOperationAction(ISD::MUL, MVT::v8i32, Legal);
setOperationAction(ISD::MUL, MVT::v16i16, Legal);
// Don't lower v32i8 because there is no 128-bit byte mul
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
setOperationAction(ISD::ADD, MVT::v16i16, Custom);
setOperationAction(ISD::ADD, MVT::v32i8, Custom);
setOperationAction(ISD::SUB, MVT::v4i64, Custom);
setOperationAction(ISD::SUB, MVT::v8i32, Custom);
setOperationAction(ISD::SUB, MVT::v16i16, Custom);
setOperationAction(ISD::SUB, MVT::v32i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
// Don't lower v32i8 because there is no 128-bit byte mul
}
// In the customized shift lowering, the legal cases in AVX2 will be
// recognized.
setOperationAction(ISD::SRL, MVT::v4i64, Custom);
setOperationAction(ISD::SRL, MVT::v8i32, Custom);
setOperationAction(ISD::SHL, MVT::v4i64, Custom);
setOperationAction(ISD::SHL, MVT::v8i32, Custom);
setOperationAction(ISD::SRA, MVT::v8i32, Custom);
// Custom lower several nodes for 256-bit types.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
if (VT.is128BitVector())
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Do not attempt to custom lower other non-256-bit vectors
if (!VT.is256BitVector())
continue;
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-256-bit vectors
if (!VT.is256BitVector())
continue;
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v4i64);
setOperationAction(ISD::OR, VT, Promote);
AddPromotedToType (ISD::OR, VT, MVT::v4i64);
setOperationAction(ISD::XOR, VT, Promote);
AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
setOperationAction(ISD::LOAD, VT, Promote);
AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
}
}
if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) {
addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
setOperationAction(ISD::FADD, MVT::v16f32, Legal);
setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
setOperationAction(ISD::FADD, MVT::v8f64, Legal);
setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
setOperationAction(ISD::SDIV, MVT::v16i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
}
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
setOperationAction(ISD::TRUNCATE, MVT::i1, Legal);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
setOperationAction(ISD::ADD, MVT::v8i64, Legal);
setOperationAction(ISD::ADD, MVT::v16i32, Legal);
setOperationAction(ISD::SUB, MVT::v8i64, Legal);
setOperationAction(ISD::SUB, MVT::v16i32, Legal);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
setOperationAction(ISD::SRL, MVT::v8i64, Custom);
setOperationAction(ISD::SRL, MVT::v16i32, Custom);
setOperationAction(ISD::SHL, MVT::v8i64, Custom);
setOperationAction(ISD::SHL, MVT::v16i32, Custom);
setOperationAction(ISD::SRA, MVT::v8i64, Custom);
setOperationAction(ISD::SRA, MVT::v16i32, Custom);
setOperationAction(ISD::AND, MVT::v8i64, Legal);
setOperationAction(ISD::OR, MVT::v8i64, Legal);
setOperationAction(ISD::XOR, MVT::v8i64, Legal);
setOperationAction(ISD::AND, MVT::v16i32, Legal);
setOperationAction(ISD::OR, MVT::v16i32, Legal);
setOperationAction(ISD::XOR, MVT::v16i32, Legal);
// Custom lower several nodes.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
// Extract subvector is special because the value type
// (result) is 256/128-bit but the source is 512-bit wide.
if (VT.is128BitVector() || VT.is256BitVector())
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
if (VT.getVectorElementType() == MVT::i1)
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
// Do not attempt to custom lower other non-512-bit vectors
if (!VT.is512BitVector())
continue;
if ( EltSize >= 32) {
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}
}
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-256-bit vectors
if (!VT.is512BitVector())
continue;
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
}
}// has AVX-512
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
// of this type with custom code.
for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
VT != MVT::LAST_VECTOR_VALUETYPE; VT++) {
setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
Custom);
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
//
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
// Add/Sub/Mul with overflow operations are custom lowered.
MVT VT = IntVTs[i];
setOperationAction(ISD::SADDO, VT, Custom);
setOperationAction(ISD::UADDO, VT, Custom);
setOperationAction(ISD::SSUBO, VT, Custom);
setOperationAction(ISD::USUBO, VT, Custom);
setOperationAction(ISD::SMULO, VT, Custom);
setOperationAction(ISD::UMULO, VT, Custom);
}
// There are no 8-bit 3-address imul/mul instructions
setOperationAction(ISD::SMULO, MVT::i8, Expand);
setOperationAction(ISD::UMULO, MVT::i8, Expand);
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, 0);
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
}
// Combine sin / cos into one node or libcall if possible.
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
if (Subtarget->isTargetDarwin()) {
// For MacOSX, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret to avoid memory
// traffic.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
}
}
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
computeRegisterProperties();
// On Darwin, -Os means optimize for size without hurting performance,
// do not reduce the limit.
MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(4); // 2^4 bytes.
// Predictable cmov don't hurt on atom because it's in-order.
PredictableSelectIsExpensive = !Subtarget->isAtom();
setPrefFunctionAlignment(4); // 2^4 bytes.
}
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
return MVT::i8;
const TargetMachine &TM = getTargetMachine();
if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512())
switch(VT.getVectorNumElements()) {
case 8: return MVT::v8i1;
case 16: return MVT::v16i1;
}
return VT.changeVectorElementTypeToInteger();
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (MaxAlign == 16)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (VTy->getBitWidth() == 128)
MaxAlign = 16;
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned EltAlign = 0;
getMaxByValAlign(STy->getElementType(i), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == 16)
break;
}
}
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
if (Subtarget->is64Bit()) {
// Max of 8 and alignment of type.
unsigned TyAlign = TD->getABITypeAlignment(Ty);
if (TyAlign > 8)
return TyAlign;
return 8;
}
unsigned Align = 4;
if (Subtarget->hasSSE1())
getMaxByValAlign(Ty, Align);
return Align;
}
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If 'IsMemset' is
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function *F = MF.getFunction();
if ((!IsMemset || ZeroMemset) &&
!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) {
if (Size >= 32) {
if (Subtarget->hasInt256())
return MVT::v8i32;
if (Subtarget->hasFp256())
return MVT::v8f32;
}
if (Subtarget->hasSSE2())
return MVT::v4i32;
if (Subtarget->hasSSE1())
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
Subtarget->hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
return MVT::f64;
}
}
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
return MVT::i32;
}
bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
if (VT == MVT::f32)
return X86ScalarSSEf32;
else if (VT == MVT::f64)
return X86ScalarSSEf64;
return true;
}
bool
X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
if (Fast)
*Fast = Subtarget->isUnalignedMemAccessFast();
return true;
}
/// getJumpTableEncoding - Return the entry encoding for a jump table in the
/// current function. The returned value is a member of the
/// MachineJumpTableInfo::JTEntryKind enum.
unsigned X86TargetLowering::getJumpTableEncoding() const {
// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
// symbol.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
}
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned uid,MCContext &Ctx) const{
assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT());
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
return MCSymbolRefExpr::Create(MBB->getSymbol(),
MCSymbolRefExpr::VK_GOTOFF, Ctx);
}
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
if (!Subtarget->is64Bit())
// This doesn't have SDLoc associated with it, but is not really the
// same as a Register.
return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
return Table;
}
/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
/// MCExpr.
const MCExpr *X86TargetLowering::
getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
MCContext &Ctx) const {
// X86-64 uses RIP relative addressing based on the jump table label.
if (Subtarget->isPICStyleRIPRel())
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
// Otherwise, the reference is relative to the PIC base.
return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
}
// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
X86TargetLowering::findRepresentativeClass(MVT VT) const{
const TargetRegisterClass *RRC = 0;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
RRC = Subtarget->is64Bit() ?
(const TargetRegisterClass*)&X86::GR64RegClass :
(const TargetRegisterClass*)&X86::GR32RegClass;
break;
case MVT::x86mmx:
RRC = &X86::VR64RegClass;
break;
case MVT::f32: case MVT::f64:
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
case MVT::v4f64:
RRC = &X86::VR128RegClass;
break;
}
return std::make_pair(RRC, Cost);
}
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
if (!Subtarget->isTargetLinux())
return false;
if (Subtarget->is64Bit()) {
// %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
Offset = 0x28;
if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
AddressSpace = 256;
else
AddressSpace = 257;
} else {
// %gs:0x14 on i386
Offset = 0x14;
AddressSpace = 256;
}
return true;
}
bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
assert(SrcAS != DestAS && "Expected different address spaces!");
return SrcAS < 256 && DestAS < 256;
}
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
#include "X86GenCallingConv.inc"
bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
const uint16_t *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
static const uint16_t ScratchRegs[] = { X86::R11, 0 };
return ScratchRegs;
}
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
SDValue Flag;
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
MVT::i16));
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue ValToCopy = OutVals[i];
EVT ValVT = ValToCopy.getValueType();
// Promote values to the appropriate types
if (VA.getLocInfo() == CCValAssign::SExt)
ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::ZExt)
ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::AExt)
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::BCvt)
ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
// If this is x86-64, and we disabled SSE, we can't return FP values,
// or SSE or MMX vectors.
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
(Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
// llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now.
if (ValVT == MVT::f64 &&
(Subtarget->is64Bit() && !Subtarget->hasSSE2()))
report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
if (VA.getLocReg() == X86::ST0 ||
VA.getLocReg() == X86::ST1) {
// If this is a copy from an xmm register to ST(0), use an FPExtend to
// change the value to the FP stack register class.
if (isScalarFPTypeInSSEReg(VA.getValVT()))
ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
RetOps.push_back(ValToCopy);
// Don't emit a copytoreg.
continue;
}
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
ValToCopy);
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
if (!Subtarget->hasSSE2())
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
}
}
}
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
// The x86-64 ABIs require that for returning structs by value we copy
// the sret argument into %rax/%eax (depending on ABI) for the return.
// Win32 requires us to put the sret argument to %eax as well.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into %rax/%eax.
if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
(Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
unsigned RetValReg
= (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
X86::RAX : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
// RAX/EAX now acts like a return value.
RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
}
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(X86ISD::RET_FLAG, dl,
MVT::Other, &RetOps[0], RetOps.size());
}
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
return false;
TCChain = Copy->getOperand(0);
} else if (Copy->getOpcode() != ISD::FP_EXTEND)
return false;
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != X86ISD::RET_FLAG)
return false;
HasRet = true;
}
if (!HasRet)
return false;
Chain = TCChain;
return true;
}
MVT
X86TargetLowering::getTypeForExtArgOrReturn(MVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT;
// TODO: Is this also valid on 32-bit?
if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
ReturnMVT = MVT::i8;
else
ReturnMVT = MVT::i32;
MVT MinVT = getRegisterType(ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
SDValue
X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget->is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
SDValue Val;
// If this is a call to a function that returns an fp value on the floating
// point stack, we must guarantee the value is popped from the stack, so
// a CopyFromReg is not good enough - the copy instruction may be eliminated
// if the return value is not used. We use the FpPOP_RETVAL instruction
// instead.
if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
// If we prefer to use the value in xmm registers, copy it out as f80 and
// use a truncate to move it from fp stack reg to xmm reg.
if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
SDValue Ops[] = { Chain, InFlag };
Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
MVT::Other, MVT::Glue, Ops), 1);
Val = Chain.getValue(0);
// Round the f80 to the right size, which also moves it to the appropriate
// xmm register.
if (CopyVT != VA.getValVT())
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
// This truncation won't change the value.
DAG.getIntPtrConstant(1));
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
CopyVT, InFlag).getValue(1);
Val = Chain.getValue(0);
}
InFlag = Chain.getValue(2);
InVals.push_back(Val);
}
return Chain;
}
//===----------------------------------------------------------------------===//
// C & StdCall & Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
// StdCall calling convention seems to be standard for many Windows' API
// routines and around. It differs from C calling convention just a little:
// callee should clean up the stack, not caller. Symbols should be also
// decorated in some fancy way :) It doesn't support any vector arguments.
// For info on fast calling convention see Fast Calling Convention (tail call)
// implementation LowerX86_32FastCCCallTo.
/// CallIsStructReturn - Determines whether a call uses struct return
/// semantics.
enum StructReturnType {
NotStructReturn,
RegStructReturn,
StackStructReturn
};
static StructReturnType
callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
if (Outs.empty())
return NotStructReturn;
const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
if (Flags.isInReg())
return RegStructReturn;
return StackStructReturn;
}
/// ArgsAreStructReturn - Determines whether a function uses struct
/// return semantics.
static StructReturnType
argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
if (Ins.empty())
return NotStructReturn;
const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
if (Flags.isInReg())
return RegStructReturn;
return StackStructReturn;
}
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
/// function parameter.
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
SDLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile*/false, /*AlwaysInline=*/true,
MachinePointerInfo(), MachinePointerInfo());
}
/// IsTailCallConvention - Return true if the calling convention is one that
/// supports tail call optimization.
static bool IsTailCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::HiPE);
}
/// \brief Return true if the calling convention is a C calling convention.
static bool IsCCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
CC == CallingConv::X86_64_SysV);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
return false;
CallSite CS(CI);
CallingConv::ID CalleeCC = CS.getCallingConv();
if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
return false;
return true;
}
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
bool GuaranteedTailCallOpt) {
return GuaranteedTailCallOpt && IsTailCallConvention(CC);
}
SDValue
X86TargetLowering::LowerMemArgument(SDValue Chain,
CallingConv::ID CallConv,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo *MFI,
unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv,
getTargetMachine().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
// If value is passed by pointer we have address passed instead of the value
// itself.
if (VA.getLocInfo() == CCValAssign::Indirect)
ValVT = VA.getLocVT();
else
ValVT = VA.getValVT();
// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
if (Flags.isByVal()) {
unsigned Bytes = Flags.getByValSize();
if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
return DAG.getFrameIndex(FI, getPointerTy());
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
}
}
SDValue
X86TargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Function* Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
Subtarget->isTargetCygMing() &&
Fn->getName() == "main")
FuncInfo->setForceFramePointer(true);
MachineFrameInfo *MFI = MF.getFrameInfo();
bool Is64Bit = Subtarget->is64Bit();
bool IsWindows = Subtarget->isTargetWindows();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
unsigned LastVal = ~0U;
SDValue ArgValue;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
// places.
assert(VA.getValNo() != LastVal &&
"Don't support value assigned to multiple locs yet");
(void)LastVal;
LastVal = VA.getValNo();
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = &X86::GR32RegClass;
else if (Is64Bit && RegVT == MVT::i64)
RC = &X86::GR64RegClass;
else if (RegVT == MVT::f32)
RC = &X86::FR32RegClass;
else if (RegVT == MVT::f64)
RC = &X86::FR64RegClass;
else if (RegVT.is512BitVector())
RC = &X86::VR512RegClass;
else if (RegVT.is256BitVector())
RC = &X86::VR256RegClass;
else if (RegVT.is128BitVector())
RC = &X86::VR128RegClass;
else if (RegVT == MVT::x86mmx)
RC = &X86::VR64RegClass;
else if (RegVT == MVT::v8i1)
RC = &X86::VK8RegClass;
else if (RegVT == MVT::v16i1)
RC = &X86::VK16RegClass;
else
llvm_unreachable("Unknown argument type!");
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
// bits. Insert an assert[sz]ext to capture this, then truncate to the
// right size.
if (VA.getLocInfo() == CCValAssign::SExt)
ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::ZExt)
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::BCvt)
ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
if (RegVT.isVector())
ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
}
} else {
assert(VA.isMemLoc());
ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
}
// If value is passed via pointer - do a load.
if (VA.getLocInfo() == CCValAssign::Indirect)
ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
MachinePointerInfo(), false, false, false, 0);
InVals.push_back(ArgValue);
}
// The x86-64 ABIs require that for returning structs by value we copy
// the sret argument into %rax/%eax (depending on ABI) for the return.
// Win32 requires us to put the sret argument to %eax as well.
// Save the argument into a virtual register so that we can access it
// from the return points.
if (MF.getFunction()->hasStructRetAttr() &&
(Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
MVT PtrTy = getPointerTy();
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
}
unsigned StackSize = CCInfo.getNextStackOffset();
// Align stack specially for tail calls.
if (FuncIsMadeTailCallSafe(CallConv,
MF.getTarget().Options.GuaranteedTailCallOpt))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
CallConv != CallingConv::X86_ThisCall)) {
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
}
if (Is64Bit) {
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
// FIXME: We should really autogenerate these arrays
static const uint16_t GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
static const uint16_t GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
static const uint16_t XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
const uint16_t *GPR64ArgRegs;
unsigned NumXMMRegs = 0;
if (IsWin64) {
// The XMM registers which might contain var arg parameters are shadowed
// in their paired GPR. So we only need to save the GPR to their home
// slots.
TotalNumIntRegs = 4;
GPR64ArgRegs = GPR64ArgRegsWin64;
} else {
TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
GPR64ArgRegs = GPR64ArgRegs64Bit;
NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit,
TotalNumXMMRegs);
}
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
bool NoImplicitFloatOps = Fn->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
!Subtarget->hasSSE1())
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
if (IsWin64) {
const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
FuncInfo->setRegSaveFrameIndex(
MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
// Fixup to set vararg frame on shadow area (4 x i64).
if (NumIntRegs < 4)
FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
} else {
// For X86-64, if there are vararg parameters that are passed via
// registers, then we must store them to their spots on the stack so
// they may be loaded by deferencing the result of va_next.
FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
FuncInfo->setRegSaveFrameIndex(
MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
false));
}
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
unsigned Offset = FuncInfo->getVarArgsGPOffset();
for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(Offset));
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
&X86::GR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo::getFixedStack(
FuncInfo->getRegSaveFrameIndex(), Offset),
false, false, 0);
MemOps.push_back(Store);
Offset += 8;
}
if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
// Now store the XMM (fp + vector) parameter registers.
SmallVector<SDValue, 11> SaveXMMOps;
SaveXMMOps.push_back(Chain);
unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
SaveXMMOps.push_back(ALVal);
SaveXMMOps.push_back(DAG.getIntPtrConstant(
FuncInfo->getRegSaveFrameIndex()));
SaveXMMOps.push_back(DAG.getIntPtrConstant(
FuncInfo->getVarArgsFPOffset()));
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
&X86::VR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
SaveXMMOps.push_back(Val);
}
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
MVT::Other,
&SaveXMMOps[0], SaveXMMOps.size()));
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
}
}
// Some CCs need callee pop.
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
argsAreStructReturn(Ins) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}
if (!Is64Bit) {
// RegSaveFrameIndex is X86-64 only.
FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
if (CallConv == CallingConv::X86_FastCall ||
CallConv == CallingConv::X86_ThisCall)
// fastcc functions can't have varargs.
FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
}
FuncInfo->setArgumentStackSize(StackSize);
return Chain;
}
SDValue
X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue StackPtr, SDValue Arg,
SDLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
return DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
}
/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
/// optimization is performed and it is required.
SDValue
X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
SDValue &OutRetAddr, SDValue Chain,
bool IsTailCall, bool Is64Bit,
int FPDiff, SDLoc dl) const {
// Adjust the Return address stack slot.
EVT VT = getPointerTy();
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
false, false, false, 0);
return SDValue(OutRetAddr.getNode(), 1);
}
/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT,
unsigned SlotSize, int FPDiff, SDLoc dl) {
// Store the return address to the appropriate stack slot.
if (!FPDiff) return Chain;
// Calculate the new stack slot for the return address.
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
false);
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
MachinePointerInfo::getFixedStack(NewReturnAddrFI),
false, false, 0);
return Chain;
}
SDValue
X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &dl = CLI.DL;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
CallingConv::ID CallConv = CLI.CallConv;
bool &isTailCall = CLI.IsTailCall;
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
bool IsWindows = Subtarget->isTargetWindows();
StructReturnType SR = callIsStructReturn(Outs);
bool IsSibcall = false;
if (MF.getTarget().Options.DisableTailCalls)
isTailCall = false;
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, SR != NotStructReturn,
MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
IsSibcall = true;
if (isTailCall)
++NumTailCalls;
}
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (IsSibcall)
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
else if (getTargetMachine().Options.GuaranteedTailCallOpt &&
IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
if (isTailCall && !IsSibcall) {
// Lower arguments at fp - stackoffset + fpdiff.
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
FPDiff = NumBytesCallerPushed - NumBytes;
// Set the delta of movement of the returnaddr stackslot.
// But only set if delta is greater than previous delta.
if (FPDiff < X86Info->getTCReturnAddrDelta())
X86Info->setTCReturnAddrDelta(FPDiff);
}
if (!IsSibcall)
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
dl);
SDValue RetAddrFrIdx;
// Load return address for tail calls.
if (isTailCall && FPDiff)
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
Is64Bit, FPDiff, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::AExt:
if (RegVT.is128BitVector()) {
// Special case: passing MMX values in XMM registers.
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
break;
case CCValAssign::Indirect: {
// Store the argument.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
MachinePointerInfo::getFixedStack(FI),
false, false, 0);
Arg = SpillSlot;
break;
}
}
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
if (isVarArg && IsWin64) {
// Win64 ABI requires argument XMM reg to be copied to the corresponding
// shadow reg if callee is a varargs function.
unsigned ShadowReg = 0;
switch (VA.getLocReg()) {
case X86::XMM0: ShadowReg = X86::RCX; break;
case X86::XMM1: ShadowReg = X86::RDX; break;
case X86::XMM2: ShadowReg = X86::R8; break;
case X86::XMM3: ShadowReg = X86::R9; break;
}
if (ShadowReg)
RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
}
} else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
if (Subtarget->isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (!isTailCall) {
RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
} else {
// If we are tail calling and generating PIC/GOT style code load the
// address of the callee into ECX. The value in ecx is used as target of
// the tail jump. This is done to circumvent the ebx/callee-saved problem
// for tail calls on PIC/GOT architectures. Normally we would just put the
// address of GOT into ebx and then call target@PLT. But for tail calls
// ebx would be restored (since ebx is callee saved) before jumping to the
// target@PLT.
// Note: The actual moving to ECX is done further down.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
if (G && !G->getGlobal()->hasHiddenVisibility() &&
!G->getGlobal()->hasProtectedVisibility())
Callee = LowerGlobalAddress(Callee, DAG);
else if (isa<ExternalSymbolSDNode>(Callee))
Callee = LowerExternalSymbol(Callee, DAG);
}
}
if (Is64Bit && isVarArg && !IsWin64) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
// the declaration) %al is used as hidden argument to specify the number
// of SSE registers used. The contents of %al do not need to match exactly
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
// Count the number of XMM registers allocated.
static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
assert((Subtarget->hasSSE1() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
DAG.getConstant(NumXMMRegs, MVT::i8)));
}
// For tail calls lower the arguments to the 'real' stack slot.
if (isTailCall) {
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments are stored to the stack, because the
// outgoing stack slots may alias the incoming argument stack slots, and
// the alias isn't otherwise explicit. This is slightly more conservative
// than necessary, because it means that each store effectively depends
// on every argument instead of just those arguments it would clobber.
SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
if (getTargetMachine().Options.GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc())
continue;
assert(VA.isMemLoc());
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal()) {
// Copy relative to framepointer.
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl,
RegInfo->getStackRegister(),
getPointerTy());
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
ArgChain,
Flags, DAG, dl));
} else {
// Store relative to framepointer.
MemOpChains2.push_back(
DAG.getStore(ArgChain, dl, Arg, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
}
if (!MemOpChains2.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
getPointerTy(), RegInfo->getSlotSize(),
FPDiff, dl);
}
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
// through a register, since the call instruction's 32-bit
// pc-relative offset may not be large enough to hold the whole
// address.
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
// it.
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
const GlobalValue *GV = G->getGlobal();
if (!GV->hasDLLImportLinkage()) {
unsigned char OpFlags = 0;
bool ExtraLoad = false;
unsigned WrapperKind = ISD::DELETED_NODE;
// On ELF targets, in both X86-64 and X86-32 mode, direct calls to
// external symbols most go through the PLT in PIC mode. If the symbol
// has hidden or protected visibility, or if it is static or local, then
// we don't need to use the PLT - we can directly call it.
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(GV->isDeclaration() || GV->isWeakForLinker()) &&
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
} else if (Subtarget->isPICStyleRIPRel() &&
isa<Function>(GV) &&
cast<Function>(GV)->getAttributes().
hasAttribute(AttributeSet::FunctionIndex,
Attribute::NonLazyBind)) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
// at the cost of eager binding (and one extra byte of encoding).
OpFlags = X86II::MO_GOTPCREL;
WrapperKind = X86ISD::WrapperRIP;
ExtraLoad = true;
}
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
G->getOffset(), OpFlags);
// Add a wrapper if needed.
if (WrapperKind != ISD::DELETED_NODE)
Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
// Add extra indirection if needed.
if (ExtraLoad)
Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(),
false, false, false, 0);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to
// external symbols should go through the PLT.
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
}
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
OpFlags);
}
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
Ops.push_back(Chain);
Ops.push_back(Callee);
if (isTailCall)
Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
if (isTailCall) {
// We used to do:
//// If this is the first return lowered for this function, add the regs
//// to the liveout set for the function.
// This isn't right, although it's probably harmless on x86; liveouts
// should be computed from returns not tail calls. Consider a void
// function making a tail call to a function returning int.
return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
}
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush;
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
getTargetMachine().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
SR == StackStructReturn)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
// For MSVC Win32 targets, the caller pops the hidden struct pointer.
NumBytesForCalleeToPush = 4;
else
NumBytesForCalleeToPush = 0; // Callee pops nothing.
// Returns a flag for retval copy to use.
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(NumBytesForCalleeToPush,
true),
InFlag, dl);
InFlag = Chain.getValue(1);
}
// Handle result values, copying them out of physregs into vregs that we
// return.
return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
Ins, dl, DAG, InVals);
}
//===----------------------------------------------------------------------===//
// Fast Calling Convention (tail call) implementation
//===----------------------------------------------------------------------===//
// Like std call, callee cleans arguments, convention except that ECX is
// reserved for storing the tail called function address. Only 2 registers are
// free for argument passing (inreg). Tail call optimization is performed
// provided:
// * tailcallopt is enabled
// * caller/callee are fastcc
// On X86_64 architecture with GOT-style position independent code only local
// (within module) calls are supported at the moment.
// To keep the stack aligned according to platform abi the function
// GetAlignedArgumentStackSize ensures that argument delta is always multiples
// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
// If a tail called function callee has more arguments than the caller the
// caller needs to make sure that there is room to move the RETADDR to. This is
// achieved by reserving an area the size of the argument delta right after the
// original REtADDR, but before the saved framepointer or the spilled registers
// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
// stack layout:
// arg1
// arg2
// RETADDR
// [ new RETADDR
// move area ]
// (possible EBP)
// ESI
// EDI
// local1 ..
/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
/// for a 16 byte align requirement.
unsigned
X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
const TargetFrameLowering &TFI = *TM.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
unsigned SlotSize = RegInfo->getSlotSize();
if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
// Number smaller than 12 so just add the difference.
Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
} else {
// Mask out lower bits, add stackalignment once plus the 12 bytes.
Offset = ((~AlignMask) & Offset) + StackAlignment +
(StackAlignment-SlotSize);
}
return Offset;
}
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
const X86InstrInfo *TII) {
unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!TargetRegisterInfo::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
return false;
if (!Flags.isByVal()) {
if (!TII->isLoadFromStackSlot(Def, FI))
return false;
} else {
unsigned Opcode = Def->getOpcode();
if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
Def->getOperand(1).isFI()) {
FI = Def->getOperand(1).getIndex();
Bytes = Flags.getByValSize();
} else
return false;
}
} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
if (Flags.isByVal())
// ByVal argument is passed in as a pointer but it's now being
// dereferenced. e.g.
// define @foo(%struct.X* %A) {
// tail call @bar(%struct.X* byval %A)
// }
return false;
SDValue Ptr = Ld->getBasePtr();
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
if (!FINode)
return false;
FI = FINode->getIndex();
} else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
FI = FINode->getIndex();
Bytes = Flags.getByValSize();
} else
return false;
assert(FI != INT_MAX);
if (!MFI->isFixedObjectIndex(FI))
return false;
return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
}
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
bool
X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
Type *RetTy,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG &DAG) const {
if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
// If the function return type is x86_fp80 and the callee return type is not,
// then the FP_EXTEND of the call result is not a nop. It's not safe to
// perform a tailcall optimization here.
if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
return false;
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
if (getTargetMachine().Options.GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) && CCMatch)
return true;
return false;
}
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
// emit a special epilogue.
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
if (RegInfo->needsStackRealignment(MF))
return false;
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
return false;
// An stdcall caller is expected to clean up its arguments; the callee
// isn't going to do that.
if (!CCMatch && CallerCC == CallingConv::X86_StdCall)
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
// registers.
if (isVarArg && !Outs.empty()) {
// Optimizing for varargs on Win64 is unlikely to be safe without
// additional testing.
if (IsCalleeWin64 || IsCallerWin64)
return false;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
if (!ArgLocs[i].isRegLoc())
return false;
}
// If the call result is in ST0 / ST1, it needs to be popped off the x87
// stack. Therefore, if it's not used by the call it is not safe to optimize
// this into a sibcall.
bool Unused = false;
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
if (!Ins[i].Used) {
Unused = true;
break;
}
}
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
return false;
}
}
// If the calling conventions do not match, then we'd better make sure the
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
getTargetMachine(), RVLocs1, *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
SmallVector<CCValAssign, 16> RVLocs2;
CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
getTargetMachine(), RVLocs2, *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
if (RVLocs1.size() != RVLocs2.size())
return false;
for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
return false;
if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
return false;
if (RVLocs1[i].isRegLoc()) {
if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
return false;
} else {
if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
return false;
}
}
}
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsCalleeWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
if (CCInfo.getNextStackOffset()) {
MachineFunction &MF = DAG.getMachineFunction();
if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
return false;
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII =
((const X86TargetMachine&)getTargetMachine()).getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
if (!VA.isRegLoc()) {
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
MFI, MRI, TII))
return false;
}
}
}
// If the tailcall address may be in a register, then make sure it's
// possible to register allocate for it. In 32-bit, the call address can
// only target EAX, EDX, or ECX since the tail call must be scheduled after
// callee-saved registers are restored. These happen to be the same
// registers used to pass 'inreg' arguments so watch out for those.
if (!Subtarget->is64Bit() &&
((!isa<GlobalAddressSDNode>(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) ||
getTargetMachine().getRelocationModel() == Reloc::PIC_)) {
unsigned NumInRegs = 0;
// In PIC we need an extra register to formulate the address computation
// for the callee.
unsigned MaxInRegs =
(getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
continue;
unsigned Reg = VA.getLocReg();
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
if (++NumInRegs == MaxInRegs)
return false;
break;
}
}
}
}
return true;
}
FastISel *
X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
return X86::createFastISel(funcInfo, libInfo);
}
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
static bool MayFoldLoad(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
}
static bool MayFoldIntoStore(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
}
static bool isTargetShuffle(unsigned Opcode) {
switch(Opcode) {
default: return false;
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
case X86ISD::PALIGNR:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
case X86ISD::MOVLPS:
case X86ISD::MOVLPD:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VPERMILP:
case X86ISD::VPERM2X128:
case X86ISD::VPERMI:
return true;
}
}
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue V1, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
case X86ISD::MOVDDUP:
return DAG.getNode(Opc, dl, VT, V1);
}
}
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue V1, unsigned TargetMask,
SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::VPERMILP:
case X86ISD::VPERMI:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
}
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue V1, SDValue V2, unsigned TargetMask,
SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PALIGNR:
case X86ISD::SHUFP:
case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
DAG.getConstant(TargetMask, MVT::i8));
}
}
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue V1, SDValue V2, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
case X86ISD::MOVLPS:
case X86ISD::MOVLPD:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
return DAG.getNode(Opc, dl, VT, V1, V2);
}
}
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
unsigned SlotSize = RegInfo->getSlotSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize,
-(int64_t)SlotSize,
false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
}
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement) {
// Offset should fit into 32 bit immediate field.
if (!isInt<32>(Offset))
return false;
// If we don't have a symbolic displacement - we don't have any extra
// restrictions.
if (!hasSymbolicDisplacement)
return true;
// FIXME: Some tweaks might be needed for medium code model.
if (M != CodeModel::Small && M != CodeModel::Kernel)
return false;
// For small code model we assume that latest object is 16MB before end of 31
// bits boundary. We may also accept pretty large negative constants knowing
// that all objects are in the positive half of address space.
if (M == CodeModel::Small && Offset < 16*1024*1024)
return true;
// For kernel code model we know that all object resist in the negative half
// of 32bits address space. We may not accept negative offsets, since they may
// be just off and we may accept pretty large positive ones.
if (M == CodeModel::Kernel && Offset > 0)
return true;
return false;
}
/// isCalleePop - Determines whether the callee is required to pop its
/// own arguments. Callee pop is necessary to support tail calls.
bool X86::isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool TailCallOpt) {
if (IsVarArg)
return false;
switch (CallingConv) {
default:
return false;
case CallingConv::X86_StdCall:
return !is64Bit;
case CallingConv::X86_FastCall:
return !is64Bit;
case CallingConv::X86_ThisCall:
return !is64Bit;
case CallingConv::Fast:
return TailCallOpt;
case CallingConv::GHC:
return TailCallOpt;
case CallingConv::HiPE:
return TailCallOpt;
}
}
/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
/// specific condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
if (!isFP) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
// X > -1 -> X == 0, jump !sign.
RHS = DAG.getConstant(0, RHS.getValueType());
return X86::COND_NS;
}
if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
// X < 0 -> X == 0, jump on sign.
return X86::COND_S;
}
if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
// X < 1 -> X <= 0
RHS = DAG.getConstant(0, RHS.getValueType());
return X86::COND_LE;
}
}
switch (SetCCOpcode) {
default: llvm_unreachable("Invalid integer condition!");
case ISD::SETEQ: return X86::COND_E;
case ISD::SETGT: return X86::COND_G;
case ISD::SETGE: return X86::COND_GE;
case ISD::SETLT: return X86::COND_L;
case ISD::SETLE: return X86::COND_LE;
case ISD::SETNE: return X86::COND_NE;
case ISD::SETULT: return X86::COND_B;
case ISD::SETUGT: return X86::COND_A;
case ISD::SETULE: return X86::COND_BE;
case ISD::SETUGE: return X86::COND_AE;
}
}
// First determine if it is required or is profitable to flip the operands.
// If LHS is a foldable load, but RHS is not, flip the condition.
if (ISD::isNON_EXTLoad(LHS.getNode()) &&
!ISD::isNON_EXTLoad(RHS.getNode())) {
SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
std::swap(LHS, RHS);
}
switch (SetCCOpcode) {
default: break;
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETUGT:
case ISD::SETUGE:
std::swap(LHS, RHS);
break;
}
// On a floating point condition, the flags are set as follows:
// ZF PF CF op
// 0 | 0 | 0 | X > Y
// 0 | 0 | 1 | X < Y
// 1 | 0 | 0 | X == Y
// 1 | 1 | 1 | unordered
switch (SetCCOpcode) {
default: llvm_unreachable("Condcode should be pre-legalized away");
case ISD::SETUEQ:
case ISD::SETEQ: return X86::COND_E;
case ISD::SETOLT: // flipped
case ISD::SETOGT:
case ISD::SETGT: return X86::COND_A;
case ISD::SETOLE: // flipped
case ISD::SETOGE:
case ISD::SETGE: return X86::COND_AE;
case ISD::SETUGT: // flipped
case ISD::SETULT:
case ISD::SETLT: return X86::COND_B;
case ISD::SETUGE: // flipped
case ISD::SETULE:
case ISD::SETLE: return X86::COND_BE;
case ISD::SETONE:
case ISD::SETNE: return X86::COND_NE;
case ISD::SETUO: return X86::COND_P;
case ISD::SETO: return X86::COND_NP;
case ISD::SETOEQ:
case ISD::SETUNE: return X86::COND_INVALID;
}
}
/// hasFPCMov - is there a floating point cmov for the specific X86 condition
/// code. Current x86 isa includes the following FP cmov instructions:
/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
static bool hasFPCMov(unsigned X86CC) {
switch (X86CC) {
default:
return false;
case X86::COND_B:
case X86::COND_BE:
case X86::COND_E:
case X86::COND_P:
case X86::COND_A:
case X86::COND_AE:
case X86::COND_NE:
case X86::COND_NP:
return true;
}
}
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
return true;
}
return false;
}
/// isUndefOrInRange - Return true if Val is undef or if its value falls within
/// the specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
return (Val < 0) || (Val >= Low && Val < Hi);
}
/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
/// specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
return (Val < 0 || Val == CmpVal);
}
/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size, falls within the specified
/// sequential range (L, L+Pos]. or is undef.
static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
unsigned Pos, unsigned Size, int Low) {
for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
if (!isUndefOrEqual(Mask[i], Low))
return false;
return true;
}
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT) {
if (VT == MVT::v4f32 || VT == MVT::v4i32 )
return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
if (VT == MVT::v2f64 || VT == MVT::v2i64)
return (Mask[0] < 2 && Mask[1] < 2);
return false;
}
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
static bool isPSHUFHWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
// Lower quadword copied in order or undef.
if (!isSequentialOrUndefInRange(Mask, 0, 4, 0))
return false;
// Upper quadword shuffled.
for (unsigned i = 4; i != 8; ++i)
if (!isUndefOrInRange(Mask[i], 4, 8))
return false;
if (VT == MVT::v16i16) {
// Lower quadword copied in order or undef.
if (!isSequentialOrUndefInRange(Mask, 8, 4, 8))
return false;
// Upper quadword shuffled.
for (unsigned i = 12; i != 16; ++i)
if (!isUndefOrInRange(Mask[i], 12, 16))
return false;
}
return true;
}
/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFLW.
static bool isPSHUFLWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
// Upper quadword copied in order.
if (!isSequentialOrUndefInRange(Mask, 4, 4, 4))
return false;
// Lower quadword shuffled.
for (unsigned i = 0; i != 4; ++i)
if (!isUndefOrInRange(Mask[i], 0, 4))
return false;
if (VT == MVT::v16i16) {
// Upper quadword copied in order.
if (!isSequentialOrUndefInRange(Mask, 12, 4, 12))
return false;
// Lower quadword shuffled.
for (unsigned i = 8; i != 12; ++i)
if (!isUndefOrInRange(Mask[i], 8, 12))
return false;
}
return true;
}
/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PALIGNR.
static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
(VT.is256BitVector() && !Subtarget->hasInt256()))
return false;
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.is512BitVector() ? 1: VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
// Do not handle 64-bit element shuffles with palignr.
if (NumLaneElts == 2)
return false;
for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
unsigned i;
for (i = 0; i != NumLaneElts; ++i) {
if (Mask[i+l] >= 0)
break;
}
// Lane is all undef, go to next lane
if (i == NumLaneElts)
continue;
int Start = Mask[i+l];
// Make sure its in this lane in one of the sources
if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
!isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
return false;
// If not lane 0, then we must match lane 0
if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
return false;
// Correct second source to be contiguous with first source
if (Start >= (int)NumElts)
Start -= NumElts - NumLaneElts;
// Make sure we're shifting in the right direction.
if (Start <= (int)(i+l))
return false;
Start -= i;
// Check the rest of the elements to see if they are consecutive.
for (++i; i != NumLaneElts; ++i) {
int Idx = Mask[i+l];
// Make sure its in this lane
if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
!isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
return false;
// If not lane 0, then we must match lane 0
if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
return false;
if (Idx >= (int)NumElts)
Idx -= NumElts - NumLaneElts;
if (!isUndefOrEqual(Idx, Start+i))
return false;
}
}
return true;
}
/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
/// the two vector operands have swapped position.
static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
unsigned NumElems) {
for (unsigned i = 0; i != NumElems; ++i) {
int idx = Mask[i];
if (idx < 0)
continue;
else if (idx < (int)NumElems)
Mask[i] = idx + NumElems;
else
Mask[i] = idx - NumElems;
}
}
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 128/256-bit
/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
/// reverse of what x86 shuffles want.
static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool Commuted = false) {
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElems = NumElems/NumLanes;
if (NumLaneElems != 2 && NumLaneElems != 4)
return false;
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
bool symetricMaskRequired =
(VT.getSizeInBits() >= 256) && (EltSize == 32);
// VSHUFPSY divides the resulting vector into 4 chunks.
// The sources are also splitted into 4 chunks, and each destination
// chunk must come from a different source chunk.
//
// SRC1 => X7 X6 X5 X4 X3 X2 X1 X0
// SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9
//
// DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
// Y3..Y0, Y3..Y0, X3..X0, X3..X0
//
// VSHUFPDY divides the resulting vector into 4 chunks.
// The sources are also splitted into 4 chunks, and each destination
// chunk must come from a different source chunk.
//
// SRC1 => X3 X2 X1 X0
// SRC2 => Y3 Y2 Y1 Y0
//
// DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
//
SmallVector<int, 4> MaskVal(NumLaneElems, -1);
unsigned HalfLaneElems = NumLaneElems/2;
for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
for (unsigned i = 0; i != NumLaneElems; ++i) {
int Idx = Mask[i+l];
unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0);
if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems))
return false;
// For VSHUFPSY, the mask of the second half must be the same as the
// first but with the appropriate offsets. This works in the same way as
// VPERMILPS works with masks.
if (!symetricMaskRequired || Idx < 0)
continue;
if (MaskVal[i] < 0) {
MaskVal[i] = Idx - l;
continue;
}
if ((signed)(Idx - l) != MaskVal[i])
return false;
}
}
return true;
}
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
static bool isMOVHLPSMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 4)
return false;
// Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
return isUndefOrEqual(Mask[0], 6) &&
isUndefOrEqual(Mask[1], 7) &&
isUndefOrEqual(Mask[2], 2) &&
isUndefOrEqual(Mask[3], 3);
}
/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 4)
return false;
return isUndefOrEqual(Mask[0], 2) &&
isUndefOrEqual(Mask[1], 3) &&
isUndefOrEqual(Mask[2], 2) &&
isUndefOrEqual(Mask[3], 3);
}
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
static bool isMOVLPMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i + NumElems))
return false;
for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
return true;
}
/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
static bool isMOVLHPSMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i + e], i + NumElems))
return false;
return true;
}
//
// Some special combinations that can be optimized.
//
static
SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
MVT VT = SVOp->getSimpleValueType(0);
SDLoc dl(SVOp);
if (VT != MVT::v8i32 && VT != MVT::v8f32)
return SDValue();
ArrayRef<int> Mask = SVOp->getMask();
// These are the special masks that may be optimized.
static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15};
bool MatchEvenMask = true;
bool MatchOddMask = true;
for (int i=0; i<8; ++i) {
if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
MatchEvenMask = false;
if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
MatchOddMask = false;
}
if (!MatchEvenMask && !MatchOddMask)
return SDValue();
SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
SDValue Op0 = SVOp->getOperand(0);
SDValue Op1 = SVOp->getOperand(1);
if (MatchEvenMask) {
// Shift the second operand right to 32 bits.
static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
} else {
// Shift the first operand left to 32 bits.
static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
}
static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
}
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckl");
// AVX defines UNPCK* to operate independently on 128-bit lanes.
unsigned NumLanes;
unsigned NumOf256BitLanes;
unsigned NumElts = VT.getVectorNumElements();
if (VT.is256BitVector()) {
if (NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
NumLanes = 2;
NumOf256BitLanes = 1;
} else if (VT.is512BitVector()) {
assert(VT.getScalarType().getSizeInBits() >= 32 &&
"Unsupported vector type for unpckh");
NumLanes = 2;
NumOf256BitLanes = 2;
} else {
NumLanes = 1;
NumOf256BitLanes = 1;
}
unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
unsigned NumLaneElts = NumEltsInStride/NumLanes;
for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
int BitI = Mask[l256*NumEltsInStride+l+i];
int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
if (!isUndefOrEqual(BitI, j+l256*NumElts))
return false;
if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
return false;
if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
return false;
}
}
}
return true;
}
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckh");
// AVX defines UNPCK* to operate independently on 128-bit lanes.
unsigned NumLanes;
unsigned NumOf256BitLanes;
unsigned NumElts = VT.getVectorNumElements();
if (VT.is256BitVector()) {
if (NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
NumLanes = 2;
NumOf256BitLanes = 1;
} else if (VT.is512BitVector()) {
assert(VT.getScalarType().getSizeInBits() >= 32 &&
"Unsupported vector type for unpckh");
NumLanes = 2;
NumOf256BitLanes = 2;
} else {
NumLanes = 1;
NumOf256BitLanes = 1;
}
unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
unsigned NumLaneElts = NumEltsInStride/NumLanes;
for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
int BitI = Mask[l256*NumEltsInStride+l+i];
int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
if (!isUndefOrEqual(BitI, j+l256*NumElts))
return false;
if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
return false;
if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
return false;
}
}
}
return true;
}
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
bool Is256BitVec = VT.is256BitVector();
if (VT.is512BitVector())
return false;
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
// For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
// FIXME: Need a better way to get rid of this, there's no latency difference
// between UNPCKLPD and MOVDDUP, the later should always be checked first and
// the former later. We should also remove the "_undef" special mask.
if (NumElts == 4 && Is256BitVec)
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
// independently on 128-bit lanes.
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
int BitI = Mask[l+i];
int BitI1 = Mask[l+i+1];
if (!isUndefOrEqual(BitI, j))
return false;
if (!isUndefOrEqual(BitI1, j))
return false;
}
}
return true;
}
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
if (VT.is512BitVector())
return false;
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
// independently on 128-bit lanes.
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
int BitI = Mask[l+i];
int BitI1 = Mask[l+i+1];
if (!isUndefOrEqual(BitI, j))
return false;
if (!isUndefOrEqual(BitI1, j))
return false;
}
}
return true;
}
/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
if (!VT.is128BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
if (!isUndefOrEqual(Mask[0], NumElts))
return false;
for (unsigned i = 1; i != NumElts; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
return true;
}
/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
/// as permutations between 128-bit chunks or halves. As an example: this
/// shuffle bellow:
/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
static bool isVPERM2X128Mask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
if (!HasFp256 || !VT.is256BitVector())
return false;
// The shuffle result is divided into half A and half B. In total the two
// sources have 4 halves, namely: C, D, E, F. The final values of A and
// B must come from C, D, E or F.
unsigned HalfSize = VT.getVectorNumElements()/2;
bool MatchA = false, MatchB = false;
// Check if A comes from one of C, D, E, F.
for (unsigned Half = 0; Half != 4; ++Half) {
if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
MatchA = true;
break;
}
}
// Check if B comes from one of C, D, E, F.
for (unsigned Half = 0; Half != 4; ++Half) {
if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
MatchB = true;
break;
}
}
return MatchA && MatchB;
}
/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
MVT VT = SVOp->getSimpleValueType(0);
unsigned HalfSize = VT.getVectorNumElements()/2;
unsigned FstHalf = 0, SndHalf = 0;
for (unsigned i = 0; i < HalfSize; ++i) {
if (SVOp->getMaskElt(i) > 0) {
FstHalf = SVOp->getMaskElt(i)/HalfSize;
break;
}
}
for (unsigned i = HalfSize; i < HalfSize*2; ++i) {
if (SVOp->getMaskElt(i) > 0) {
SndHalf = SVOp->getMaskElt(i)/HalfSize;
break;
}
}
return (FstHalf | (SndHalf << 4));
}
// Symetric in-lane mask. Each lane has 4 elements (for imm8)
static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize < 32)
return false;
unsigned NumElts = VT.getVectorNumElements();
Imm8 = 0;
if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
for (unsigned i = 0; i != NumElts; ++i) {
if (Mask[i] < 0)
continue;
Imm8 |= Mask[i] << (i*2);
}
return true;
}
unsigned LaneSize = 4;
SmallVector<int, 4> MaskVal(LaneSize, -1);
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
if (Mask[i+l] < 0)
continue;
if (MaskVal[i] < 0) {
MaskVal[i] = Mask[i+l] - l;
Imm8 |= MaskVal[i] << (i*2);
continue;
}
if (Mask[i+l] != (signed)(MaskVal[i]+l))
return false;
}
}
return true;
}
/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (VT.getSizeInBits() < 256 || EltSize < 32)
return false;
bool symetricMaskRequired = (EltSize == 32);
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned LaneSize = NumElts/NumLanes;
// 2 or 4 elements in one lane
SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
if (symetricMaskRequired) {
if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
ExpectedMaskVal[i] = Mask[i+l] - l;
continue;
}
if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
return false;
}
}
}
return true;
}
/// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
static bool isCommutedMOVLMask(ArrayRef<int> Mask, MVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
if (!VT.is128BitVector())
return false;
unsigned NumOps = VT.getVectorNumElements();
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
if (!isUndefOrEqual(Mask[0], 0))
return false;
for (unsigned i = 1; i != NumOps; ++i)
if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
(V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
(V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
return false;
return true;
}
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
static bool isMOVSHDUPMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if (!Subtarget->hasSSE3())
return false;
unsigned NumElems = VT.getVectorNumElements();
if ((VT.is128BitVector() && NumElems != 4) ||
(VT.is256BitVector() && NumElems != 8) ||
(VT.is512BitVector() && NumElems != 16))
return false;
// "i+1" is the value the indexed mask element must have
for (unsigned i = 0; i != NumElems; i += 2)
if (!isUndefOrEqual(Mask[i], i+1) ||
!isUndefOrEqual(Mask[i+1], i+1))
return false;
return true;
}
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
static bool isMOVSLDUPMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if (!Subtarget->hasSSE3())
return false;
unsigned NumElems = VT.getVectorNumElements();
if ((VT.is128BitVector() && NumElems != 4) ||
(VT.is256BitVector() && NumElems != 8) ||
(VT.is512BitVector() && NumElems != 16))
return false;
// "i" is the value the indexed mask element must have
for (unsigned i = 0; i != NumElems; i += 2)
if (!isUndefOrEqual(Mask[i], i) ||
!isUndefOrEqual(Mask[i+1], i))
return false;
return true;
}
/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
static bool isMOVDDUPYMask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
if (!HasFp256 || !VT.is256BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
if (NumElts != 4)
return false;
for (unsigned i = 0; i != NumElts/2; ++i)
if (!isUndefOrEqual(Mask[i], 0))
return false;
for (unsigned i = NumElts/2; i != NumElts; ++i)
if (!isUndefOrEqual(Mask[i], NumElts/2))
return false;
return true;
}
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 128-bit
/// version of MOVDDUP.
static bool isMOVDDUPMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
unsigned e = VT.getVectorNumElements() / 2;
for (unsigned i = 0; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
for (unsigned i = 0; i != e; ++i)
if (!isUndefOrEqual(Mask[e+i], i))
return false;
return true;
}
/// isVEXTRACTIndex - Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for instruction that extract 128 or 256 bit vectors
static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
return false;
// The index should be aligned on a vecWidth-bit boundary.
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
MVT VT = N->getSimpleValueType(0);
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % vecWidth == 0;
return Result;
}
/// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR
/// operand specifies a subvector insert that is suitable for input to
/// insertion of 128 or 256-bit subvectors
static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
return false;
// The index should be aligned on a vecWidth-bit boundary.
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
MVT VT = N->getSimpleValueType(0);
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % vecWidth == 0;
return Result;
}
bool X86::isVINSERT128Index(SDNode *N) {
return isVINSERTIndex(N, 128);
}
bool X86::isVINSERT256Index(SDNode *N) {
return isVINSERTIndex(N, 256);
}
bool X86::isVEXTRACT128Index(SDNode *N) {
return isVEXTRACTIndex(N, 128);
}
bool X86::isVEXTRACT256Index(SDNode *N) {
return isVEXTRACTIndex(N, 256);
}
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
/// Handles 128-bit and 256-bit.
static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
MVT VT = N->getSimpleValueType(0);
assert((VT.getSizeInBits() >= 128) &&
"Unsupported vector type for PSHUF/SHUFP");
// Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
// independently on 128-bit lanes.
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
"Only supports 2, 4 or 8 elements per lane");
unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
unsigned Mask = 0;
for (unsigned i = 0; i != NumElts; ++i) {
int Elt = N->getMaskElt(i);
if (Elt < 0) continue;
Elt &= NumLaneElts - 1;
unsigned ShAmt = (i << Shift) % 8;
Mask |= Elt << ShAmt;
}
return Mask;
}
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
MVT VT = N->getSimpleValueType(0);
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
unsigned NumElts = VT.getVectorNumElements();
unsigned Mask = 0;
for (unsigned l = 0; l != NumElts; l += 8) {
// 8 nodes per lane, but we only care about the last 4.
for (unsigned i = 0; i < 4; ++i) {
int Elt = N->getMaskElt(l+i+4);
if (Elt < 0) continue;
Elt &= 0x3; // only 2-bits.
Mask |= Elt << (i * 2);
}
}
return Mask;
}
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
MVT VT = N->getSimpleValueType(0);
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
unsigned NumElts = VT.getVectorNumElements();
unsigned Mask = 0;
for (unsigned l = 0; l != NumElts; l += 8) {
// 8 nodes per lane, but we only care about the first 4.
for (unsigned i = 0; i < 4; ++i) {
int Elt = N->getMaskElt(l+i);
if (Elt < 0) continue;
Elt &= 0x3; // only 2-bits
Mask |= Elt << (i * 2);
}
}
return Mask;
}
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
MVT VT = SVOp->getSimpleValueType(0);
unsigned EltSize = VT.is512BitVector() ? 1 :
VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
int Val = 0;
unsigned i;
for (i = 0; i != NumElts; ++i) {
Val = SVOp->getMaskElt(i);
if (Val >= 0)
break;
}
if (Val >= (int)NumElts)
Val -= NumElts - NumLaneElts;
assert(Val - i > 0 && "PALIGNR imm should be positive");
return (Val - i) * EltSize;
}
static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
llvm_unreachable("Illegal extract subvector for VEXTRACT");
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
MVT VecVT = N->getOperand(0).getSimpleValueType();
MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
return Index / NumElemsPerChunk;
}
static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
llvm_unreachable("Illegal insert subvector for VINSERT");
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
MVT VecVT = N->getSimpleValueType(0);
MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
return Index / NumElemsPerChunk;
}
/// getExtractVEXTRACT128Immediate - Return the appropriate immediate
/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
/// and VINSERTI128 instructions.
unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
return getExtractVEXTRACTImmediate(N, 128);
}
/// getExtractVEXTRACT256Immediate - Return the appropriate immediate
/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4
/// and VINSERTI64x4 instructions.
unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
return getExtractVEXTRACTImmediate(N, 256);
}
/// getInsertVINSERT128Immediate - Return the appropriate immediate
/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
/// and VINSERTI128 instructions.
unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 128);
}
/// getInsertVINSERT256Immediate - Return the appropriate immediate
/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4
/// and VINSERTI64x4 instructions.
unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 256);
}
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Elt))
return CN->isNullValue();
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
return CFP->getValueAPF().isPosZero();
return false;
}
/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
/// their permute mask.
static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
MVT VT = SVOp->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i != NumElems; ++i) {
int Idx = SVOp->getMaskElt(i);
if (Idx >= 0) {
if (Idx < (int)NumElems)
Idx += NumElems;
else
Idx -= NumElems;
}
MaskVec.push_back(Idx);
}
return DAG.getVectorShuffle(VT, SDLoc(SVOp), SVOp->getOperand(1),
SVOp->getOperand(0), &MaskVec[0]);
}
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
if (VT.getVectorNumElements() != 4)
return false;
for (unsigned i = 0, e = 2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i+2))
return false;
for (unsigned i = 2; i != 4; ++i)
if (!isUndefOrEqual(Mask[i], i+4))
return false;
return true;
}
/// isScalarLoadToVector - Returns true if the node is a scalar load that
/// is promoted to a vector. It also returns the LoadSDNode by reference if
/// required.
static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
return false;
N = N->getOperand(0).getNode();
if (!ISD::isNON_EXTLoad(N))
return false;
if (LD)
*LD = cast<LoadSDNode>(N);
return true;
}
// Test whether the given value is a vector value which will be legalized
// into a load.
static bool WillBeConstantPoolLoad(SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
// Check for any non-constant elements.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
switch (N->getOperand(i).getNode()->getOpcode()) {
case ISD::UNDEF:
case ISD::ConstantFP:
case ISD::Constant:
break;
default:
return false;
}
// Vectors of all-zeros and all-ones are materialized with special
// instructions rather than being loaded.
return !ISD::isBuildVectorAllZeros(N) &&
!ISD::isBuildVectorAllOnes(N);
}
/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
/// match movlp{s|d}. The lower half elements should come from lower half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order). And since V1 will become the source of the
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
return false;
// Is V2 is a vector load, don't do this transformation. We will try to use
// load folding shufps op.
if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2))
return false;
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i+NumElems))
return false;
return true;
}
/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
/// all the same.
static bool isSplatVector(SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
SDValue SplatValue = N->getOperand(0);
for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
if (N->getOperand(i) != SplatValue)
return false;
return true;
}
/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
/// to an zero vector.
/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
static bool isZeroShuffle(ShuffleVectorSDNode *N) {
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
unsigned NumElems = N->getValueType(0).getVectorNumElements();
for (unsigned i = 0; i != NumElems; ++i) {
int Idx = N->getMaskElt(i);
if (Idx >= (int)NumElems) {
unsigned Opc = V2.getOpcode();
if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
continue;
if (Opc != ISD::BUILD_VECTOR ||
!X86::isZeroNode(V2.getOperand(Idx-NumElems)))
return false;
} else if (Idx >= 0) {
unsigned Opc = V1.getOpcode();
if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
continue;
if (Opc != ISD::BUILD_VECTOR ||
!X86::isZeroNode(V1.getOperand(Idx)))
return false;
}
}
return true;
}
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG, SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
// Always build SSE zero vectors as <4 x i32> bitcasted
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
if (VT.is128BitVector()) { // SSE
if (Subtarget->hasSSE2()) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else { // SSE1
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
} else if (VT.is256BitVector()) { // AVX
if (Subtarget->hasInt256()) { // AVX2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
array_lengthof(Ops));
} else {
// 256-bit logic and arithmetic instructions in AVX are all
// floating-point, no support for integer ops. Emit fp zeroed vectors.
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops,
array_lengthof(Ops));
}
} else if (VT.is512BitVector()) { // AVX-512
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16);
} else
llvm_unreachable("Unexpected vector type");
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
/// getOnesVector - Returns a vector of specified type with all bits set.
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
/// Then bitcast to their original type, ensuring they get CSE'd.
static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
if (VT.is256BitVector()) {
if (HasInt256) { // AVX2
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
array_lengthof(Ops));
} else { // AVX
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
}
} else if (VT.is128BitVector()) {
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else
llvm_unreachable("Unexpected vector type");
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) {
for (unsigned i = 0; i != NumElems; ++i) {
if (Mask[i] > (int)NumElems) {
Mask[i] = NumElems;
}
}
}
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
/// operation of specified width.
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
Mask.push_back(NumElems);
for (unsigned i = 1; i != NumElems; ++i)
Mask.push_back(i);
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
Mask.push_back(i);
Mask.push_back(i + NumElems);
}
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
Mask.push_back(i + Half);
Mask.push_back(i + NumElems + Half);
}
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
// PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by
// a generic shuffle instruction because the target has no such instructions.
// Generate shuffles which repeat i16 and i8 several times until they can be
// represented by v4f32 and then be manipulated by target suported shuffles.
static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
MVT VT = V.getSimpleValueType();
int NumElems = VT.getVectorNumElements();
SDLoc dl(V);
while (NumElems > 4) {
if (EltNo < NumElems/2) {
V = getUnpackl(DAG, dl, VT, V, V);
} else {
V = getUnpackh(DAG, dl, VT, V, V);
EltNo -= NumElems/2;
}
NumElems >>= 1;
}
return V;
}
/// getLegalSplat - Generate a legal splat with supported x86 shuffles
static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
MVT VT = V.getSimpleValueType();
SDLoc dl(V);
if (VT.is128BitVector()) {
V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
&SplatMask[0]);
} else if (VT.is256BitVector()) {
// To use VPERMILPS to splat scalars, the second half of indicies must
// refer to the higher part, which is a duplication of the lower one,
// because VPERMILPS can only handle in-lane permutations.
int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
&SplatMask[0]);
} else
llvm_unreachable("Vector size not supported");
return DAG.getNode(ISD::BITCAST, dl, VT, V);
}
/// PromoteSplat - Splat is promoted to target supported vector shuffles.
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
MVT SrcVT = SV->getSimpleValueType(0);
SDValue V1 = SV->getOperand(0);
SDLoc dl(SV);
int EltNo = SV->getSplatIndex();
int NumElems = SrcVT.getVectorNumElements();
bool Is256BitVec = SrcVT.is256BitVector();
assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
"Unknown how to promote splat for type");
// Extract the 128-bit part containing the splat element and update
// the splat element index when it refers to the higher register.
if (Is256BitVec) {
V1 = Extract128BitVector(V1, EltNo, DAG, dl);
if (EltNo >= NumElems/2)
EltNo -= NumElems/2;
}
// All i16 and i8 vector types can't be used directly by a generic shuffle
// instruction because the target has no such instruction. Generate shuffles
// which repeat i16 and i8 several times until they fit in i32, and then can
// be manipulated by target suported shuffles.
MVT EltVT = SrcVT.getVectorElementType();
if (EltVT == MVT::i8 || EltVT == MVT::i16)
V1 = PromoteSplati8i16(V1, DAG, EltNo);
// Recreate the 256-bit vector and place the same 128-bit vector
// into the low and high part. This is necessary because we want
// to use VPERM* to shuffle the vectors
if (Is256BitVec) {
V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
}
return getLegalSplat(DAG, V1, EltNo);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
/// vector of zero or undef vector. This produces a shuffle where the low
/// element of V2 is swizzled into the zero/undef vector, landing at element
/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
bool IsZero,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = V2.getSimpleValueType();
SDValue V1 = IsZero
? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 16> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
// If this is the insertion idx, put the low elt of V2 here.
MaskVec.push_back(i == Idx ? NumElems : i);
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
}
/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
/// target specific opcode. Returns true if the Mask could be calculated.
/// Sets IsUnary to true if only uses one source.
static bool getTargetShuffleMask(SDNode *N, MVT VT,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
SDValue ImmN;
IsUnary = false;
switch(N->getOpcode()) {
case X86ISD::SHUFP:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
break;
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, Mask);
break;
case X86ISD::UNPCKL:
DecodeUNPCKLMask(VT, Mask);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, Mask);
break;
case X86ISD::MOVLHPS:
DecodeMOVLHPSMask(NumElems, Mask);
break;
case X86ISD::PALIGNR:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
break;
case X86ISD::PSHUFD:
case X86ISD::VPERMILP:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFHW:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFLW:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::VPERMI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::MOVSS:
case X86ISD::MOVSD: {
// The index 0 always comes from the first element of the second source,
// this is why MOVSS and MOVSD are used in the first place. The other
// elements come from the other positions of the first source vector
Mask.push_back(NumElems);
for (unsigned i = 1; i != NumElems; ++i) {
Mask.push_back(i);
}
break;
}
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
break;
case X86ISD::MOVDDUP:
case X86ISD::MOVLHPD:
case X86ISD::MOVLPD:
case X86ISD::MOVLPS:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
// Not yet implemented
return false;
default: llvm_unreachable("unknown target shuffle node");
}
return true;
}
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
unsigned Depth) {
if (Depth == 6)
return SDValue(); // Limit search depth.
SDValue V = SDValue(N, 0);
EVT VT = V.getValueType();
unsigned Opcode = V.getOpcode();
// Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
int Elt = SV->getMaskElt(Index);
if (Elt < 0)
return DAG.getUNDEF(VT.getVectorElementType());
unsigned NumElems = VT.getVectorNumElements();
SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
: SV->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
}
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
MVT ShufVT = V.getSimpleValueType();
unsigned NumElems = ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
bool IsUnary;
if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
return SDValue();
int Elt = ShuffleMask[Index];
if (Elt < 0)
return DAG.getUNDEF(ShufVT.getVectorElementType());
SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
: N->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
Depth+1);
}
// Actual nodes that may contain scalar elements
if (Opcode == ISD::BITCAST) {
V = V.getOperand(0);
EVT SrcVT = V.getValueType();
unsigned NumElems = VT.getVectorNumElements();
if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
return SDValue();
}
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
return (Index == 0) ? V.getOperand(0)
: DAG.getUNDEF(VT.getVectorElementType());
if (V.getOpcode() == ISD::BUILD_VECTOR)
return V.getOperand(Index);
return SDValue();
}
/// getNumOfConsecutiveZeros - Return the number of elements of a vector
/// shuffle operation which come from a consecutively from a zero. The
/// search can start in two different directions, from left or right.
/// We count undefs as zeros until PreferredNum is reached.
static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp,
unsigned NumElems, bool ZerosFromLeft,
SelectionDAG &DAG,
unsigned PreferredNum = -1U) {
unsigned NumZeros = 0;
for (unsigned i = 0; i != NumElems; ++i) {
unsigned Index = ZerosFromLeft ? i : NumElems - i - 1;
SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
if (!Elt.getNode())
break;
if (X86::isZeroNode(Elt))
++NumZeros;
else if (Elt.getOpcode() == ISD::UNDEF) // Undef as zero up to PreferredNum.
NumZeros = std::min(NumZeros + 1, PreferredNum);
else
break;
}
return NumZeros;
}
/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
/// correspond consecutively to elements from one of the vector operands,
/// starting from its index OpIdx. Also tell OpNum which source vector operand.
static
bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
unsigned MaskI, unsigned MaskE, unsigned OpIdx,
unsigned NumElems, unsigned &OpNum) {
bool SeenV1 = false;
bool SeenV2 = false;
for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
int Idx = SVOp->getMaskElt(i);
// Ignore undef indicies
if (Idx < 0)
continue;
if (Idx < (int)NumElems)
SeenV1 = true;
else
SeenV2 = true;
// Only accept consecutive elements from the same vector
if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
return false;
}
OpNum = SeenV1 ? 0 : 1;
return true;
}
/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
/// logical left shift of a vector.
static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
unsigned NumElems =
SVOp->getSimpleValueType(0).getVectorNumElements();
unsigned NumZeros = getNumOfConsecutiveZeros(
SVOp, NumElems, false /* check zeros from right */, DAG,
SVOp->getMaskElt(0));
unsigned OpSrc;
if (!NumZeros)
return false;
// Considering the elements in the mask that are not consecutive zeros,
// check if they consecutively come from only one of the source vectors.
//
// V1 = {X, A, B, C} 0
// \ \ \ /
// vector_shuffle V1, V2 <1, 2, 3, X>
//
if (!isShuffleMaskConsecutive(SVOp,
0, // Mask Start Index
NumElems-NumZeros, // Mask End Index(exclusive)
NumZeros, // Where to start looking in the src vector
NumElems, // Number of elements in vector
OpSrc)) // Which source operand ?
return false;
isLeft = false;
ShAmt = NumZeros;
ShVal = SVOp->getOperand(OpSrc);
return true;
}
/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
/// logical left shift of a vector.
static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
unsigned NumElems =
SVOp->getSimpleValueType(0).getVectorNumElements();
unsigned NumZeros = getNumOfConsecutiveZeros(
SVOp, NumElems, true /* check zeros from left */, DAG,
NumElems - SVOp->getMaskElt(NumElems - 1) - 1);
unsigned OpSrc;
if (!NumZeros)
return false;
// Considering the elements in the mask that are not consecutive zeros,
// check if they consecutively come from only one of the source vectors.
//
// 0 { A, B, X, X } = V2
// / \ / /
// vector_shuffle V1, V2 <X, X, 4, 5>
//
if (!isShuffleMaskConsecutive(SVOp,
NumZeros, // Mask Start Index
NumElems, // Mask End Index(exclusive)
0, // Where to start looking in the src vector
NumElems, // Number of elements in vector
OpSrc)) // Which source operand ?
return false;
isLeft = true;
ShAmt = NumZeros;
ShVal = SVOp->getOperand(OpSrc);
return true;
}
/// isVectorShift - Returns true if the shuffle can be implemented as a
/// logical left or right shift of a vector.
static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
// Although the logic below support any bitwidth size, there are no
// shift instructions which handle more than 128-bit vectors.
if (!SVOp->getSimpleValueType(0).is128BitVector())
return false;
if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
return true;
return false;
}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget* Subtarget,
const TargetLowering &TLI) {
if (NumNonZero > 8)
return SDValue();
SDLoc dl(Op);
SDValue V(0, 0);
bool First = true;
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
}
if ((i & 1) != 0) {
SDValue ThisElt(0, 0), LastElt(0, 0);
bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
if (LastIsNonZero) {
LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
MVT::i16, Op.getOperand(i-1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
ThisElt, DAG.getConstant(8, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
if (ThisElt.getNode())
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
DAG.getIntPtrConstant(i/2));
}
}
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
}
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
///
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget* Subtarget,
const TargetLowering &TLI) {
if (NumNonZero > 4)
return SDValue();
SDLoc dl(Op);
SDValue V(0, 0);
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
bool isNonZero = (NonZeros & (1 << i)) != 0;
if (isNonZero) {
if (First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
MVT::v8i16, V, Op.getOperand(i),
DAG.getIntPtrConstant(i));
}
}
return V;
}
/// getVShift - Return a vector logical shift node.
///
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, SDLoc dl) {
assert(VT.is128BitVector() && "Unknown type for VShift");
EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opc, dl, ShVT, SrcOp,
DAG.getConstant(NumBits,
TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
}
static SDValue
LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
// Check if the scalar load can be widened into a vector load. And if
// the address is "base + cst" see if the cst can be "absorbed" into
// the shuffle mask.
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
SDValue Ptr = LD->getBasePtr();
if (!ISD::isNormalLoad(LD) || LD->isVolatile())
return SDValue();
EVT PVT = LD->getValueType(0);
if (PVT != MVT::i32 && PVT != MVT::f32)
return SDValue();
int FI = -1;
int64_t Offset = 0;
if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
FI = FINode->getIndex();
Offset = 0;
} else if (DAG.isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
Offset = Ptr.getConstantOperandVal(1);
Ptr = Ptr.getOperand(0);
} else {
return SDValue();
}
// FIXME: 256-bit vector instructions don't require a strict alignment,
// improve this code to support it better.
unsigned RequiredAlign = VT.getSizeInBits()/8;
SDValue Chain = LD->getChain();
// Make sure the stack object alignment is at least 16 or 32.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
if (MFI->isFixedObjectIndex(FI)) {
// Can't change the alignment. FIXME: It's possible to compute
// the exact stack offset and reference FI + adjust offset instead.
// If someone *really* cares about this. That's the way to implement it.
return SDValue();
} else {
MFI->setObjectAlignment(FI, RequiredAlign);
}
}
// (Offset % 16 or 32) must be multiple of 4. Then address is then
// Ptr + (Offset & ~15).
if (Offset < 0)
return SDValue();
if ((Offset % RequiredAlign) & 3)
return SDValue();
int64_t StartOffset = Offset & ~(RequiredAlign-1);
if (StartOffset)
Ptr = DAG.getNode(ISD::ADD, SDLoc(Ptr), Ptr.getValueType(),
Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
int EltNo = (Offset - StartOffset) >> 2;
unsigned NumElems = VT.getVectorNumElements();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(StartOffset),
false, false, false, 0);
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumElems; ++i)
Mask.push_back(EltNo);
return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
}
return SDValue();
}
/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
/// vector of type 'VT', see if the elements can be replaced by a single large
/// load which has the same value as a build_vector whose operands are 'elts'.
///
/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
///
/// FIXME: we'd also like to handle the case where the last elements are zero
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDLoc &DL, SelectionDAG &DAG,
bool isAfterLegalize) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
LoadSDNode *LDBase = NULL;
unsigned LastLoadedElt = -1U;
// For each element in the initializer, see if we've found a load or an undef.
// If we don't find an initial load element, or later load elements are
// non-consecutive, bail out.
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = Elts[i];
if (!Elt.getNode() ||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
return SDValue();
if (!LDBase) {
if (Elt.getNode()->getOpcode() == ISD::UNDEF)
return SDValue();
LDBase = cast<LoadSDNode>(Elt.getNode());
LastLoadedElt = i;
continue;
}
if (Elt.getOpcode() == ISD::UNDEF)
continue;
LoadSDNode *LD = cast<LoadSDNode>(Elt);
if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
return SDValue();
LastLoadedElt = i;
}
// If we have found an entire vector of loads and undefs, then return a large
// load of the entire vector width starting at the base pointer. If we found
// consecutive loads for the low half, generate a vzext_load node.
if (LastLoadedElt == NumElems - 1) {
if (isAfterLegalize &&
!DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
return SDValue();
SDValue NewLd = SDValue();
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
LDBase->isInvariant(), 0);
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
LDBase->isInvariant(), LDBase->getAlignment());
if (LDBase->hasAnyUseOfValue(1)) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
SDValue(LDBase, 1),
SDValue(NewLd.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
SDValue(NewLd.getNode(), 1));
}
return NewLd;
}
if (NumElems == 4 && LastLoadedElt == 1 &&
DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
SDValue ResNode =
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops,
array_lengthof(Ops), MVT::i64,
LDBase->getPointerInfo(),
LDBase->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
false/*WriteMem*/);
// Make sure the newly-created LOAD is in the same position as LDBase in
// terms of dependency. We create a TokenFactor for LDBase and ResNode, and
// update uses of LDBase's output chain to use the TokenFactor.
if (LDBase->hasAnyUseOfValue(1)) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
SDValue(ResNode.getNode(), 1));
}
return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
}
return SDValue();
}
/// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
/// to generate a splat value for the following cases:
/// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
/// 2. A splat shuffle which uses a scalar_to_vector node which comes from
/// a scalar load, or a constant.
/// The VBROADCAST node is returned when a pattern is found,
/// or SDValue() otherwise.
static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
if (!Subtarget->hasFp256())
return SDValue();
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Unsupported vector type for broadcast.");
SDValue Ld;
bool ConstSplatVal;
switch (Op.getOpcode()) {
default:
// Unknown pattern found.
return SDValue();
case ISD::BUILD_VECTOR: {
// The BUILD_VECTOR node must be a splat.
if (!isSplatVector(Op.getNode()))
return SDValue();
Ld = Op.getOperand(0);
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
Ld.getOpcode() == ISD::ConstantFP);
// The suspected load node has several users. Make sure that all
// of its users are from the BUILD_VECTOR node.
// Constants may have multiple users.
if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
return SDValue();
break;
}
case ISD::VECTOR_SHUFFLE: {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
// Shuffles must have a splat mask where the first element is
// broadcasted.
if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
return SDValue();
SDValue Sc = Op.getOperand(0);
if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
Sc.getOpcode() != ISD::BUILD_VECTOR) {
if (!Subtarget->hasInt256())
return SDValue();
// Use the register form of the broadcast instruction available on AVX2.
if (VT.getSizeInBits() >= 256)
Sc = Extract128BitVector(Sc, 0, DAG, dl);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
}
Ld = Sc.getOperand(0);
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
Ld.getOpcode() == ISD::ConstantFP);
// The scalar_to_vector node and the suspected
// load node must have exactly one user.
// Constants may have multiple users.
// AVX-512 has register version of the broadcast
bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
Ld.getValueType().getSizeInBits() >= 32;
if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
!hasRegVer))
return SDValue();
break;
}
}
bool IsGE256 = (VT.getSizeInBits() >= 256);
// Handle the broadcasting a single constant scalar from the constant pool
// into a vector. On Sandybridge it is still better to load a constant vector
// from the constant pool and not to broadcast it from a scalar.
if (ConstSplatVal && Subtarget->hasInt256()) {
EVT CVT = Ld.getValueType();
assert(!CVT.isVector() && "Must not broadcast a vector type");
unsigned ScalarSize = CVT.getSizeInBits();
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) {
const Constant *C = 0;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
C = CI->getConstantIntValue();
else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
C = CF->getConstantFPValue();
assert(C && "Invalid constant type");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
}
bool IsLoad = ISD::isNormalLoad(Ld.getNode());
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
// Handle AVX2 in-register broadcasts.
if (!IsLoad && Subtarget->hasInt256() &&
(ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The scalar source must be a normal load.
if (!IsLoad)
return SDValue();
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
// double since there is no vbroadcastsd xmm
if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
// Unsupported broadcast.
return SDValue();
}
static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
// Skip if insert_vec_elt is not supported.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
return SDValue();
SDLoc DL(Op);
unsigned NumElems = Op.getNumOperands();
SDValue VecIn1;
SDValue VecIn2;
SmallVector<unsigned, 4> InsertIndices;
SmallVector<int, 8> Mask(NumElems, -1);
for (unsigned i = 0; i != NumElems; ++i) {
unsigned Opc = Op.getOperand(i).getOpcode();
if (Opc == ISD::UNDEF)
continue;
if (Opc != ISD::EXTRACT_VECTOR_ELT) {
// Quit if more than 1 elements need inserting.
if (InsertIndices.size() > 1)
return SDValue();
InsertIndices.push_back(i);
continue;
}
SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
SDValue ExtIdx = Op.getOperand(i).getOperand(1);
// Quit if extracted from vector of different type.
if (ExtractedFromVec.getValueType() != VT)
return SDValue();
// Quit if non-constant index.
if (!isa<ConstantSDNode>(ExtIdx))
return SDValue();
if (VecIn1.getNode() == 0)
VecIn1 = ExtractedFromVec;
else if (VecIn1 != ExtractedFromVec) {
if (VecIn2.getNode() == 0)
VecIn2 = ExtractedFromVec;
else if (VecIn2 != ExtractedFromVec)
// Quit if more than 2 vectors to shuffle
return SDValue();
}
unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
if (ExtractedFromVec == VecIn1)
Mask[i] = Idx;
else if (ExtractedFromVec == VecIn2)
Mask[i] = Idx + NumElems;
}
if (VecIn1.getNode() == 0)
return SDValue();
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
unsigned Idx = InsertIndices[i];
NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
DAG.getIntPtrConstant(Idx));
}
return NV;
}
// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
SDValue
X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
"Unexpected type in LowerBUILD_VECTORvXi1!");
SDLoc dl(Op);
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
Ops, VT.getVectorNumElements());
}
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
SDValue Cst = DAG.getTargetConstant(1, MVT::i1);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
Ops, VT.getVectorNumElements());
}
bool AllContants = true;
uint64_t Immediate = 0;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() == ISD::UNDEF)
continue;
if (!isa<ConstantSDNode>(In)) {
AllContants = false;
break;
}
if (cast<ConstantSDNode>(In)->getZExtValue())
Immediate |= (1ULL << idx);
}
if (AllContants) {
SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
DAG.getConstant(Immediate, MVT::i16));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
DAG.getIntPtrConstant(0));
}
// Splat vector (with undefs)
SDValue In = Op.getOperand(0);
for (unsigned i = 1, e = Op.getNumOperands(); i != e; ++i) {
if (Op.getOperand(i) != In && Op.getOperand(i).getOpcode() != ISD::UNDEF)
llvm_unreachable("Unsupported predicate operation");
}
SDValue EFLAGS, X86CC;
if (In.getOpcode() == ISD::SETCC) {
SDValue Op0 = In.getOperand(0);
SDValue Op1 = In.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(In.getOperand(2))->get();
bool isFP = Op1.getValueType().isFloatingPoint();
unsigned X86CCVal = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
assert(X86CCVal != X86::COND_INVALID && "Unsupported predicate operation");
X86CC = DAG.getConstant(X86CCVal, MVT::i8);
EFLAGS = EmitCmp(Op0, Op1, X86CCVal, DAG);
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
} else if (In.getOpcode() == X86ISD::SETCC) {
X86CC = In.getOperand(0);
EFLAGS = In.getOperand(1);
} else {
// The algorithm:
// Bit1 = In & 0x1
// if (Bit1 != 0)
// ZF = 0
// else
// ZF = 1
// if (ZF == 0)
// res = allOnes ### CMOVNE -1, %res
// else
// res = allZero
MVT InVT = In.getSimpleValueType();
SDValue Bit1 = DAG.getNode(ISD::AND, dl, InVT, In, DAG.getConstant(1, InVT));
EFLAGS = EmitTest(Bit1, X86::COND_NE, DAG);
X86CC = DAG.getConstant(X86::COND_NE, MVT::i8);
}
if (VT == MVT::v16i1) {
SDValue Cst1 = DAG.getConstant(-1, MVT::i16);
SDValue Cst0 = DAG.getConstant(0, MVT::i16);
SDValue CmovOp = DAG.getNode(X86ISD::CMOV, dl, MVT::i16,
Cst0, Cst1, X86CC, EFLAGS);
return DAG.getNode(ISD::BITCAST, dl, VT, CmovOp);
}
if (VT == MVT::v8i1) {
SDValue Cst1 = DAG.getConstant(-1, MVT::i32);
SDValue Cst0 = DAG.getConstant(0, MVT::i32);
SDValue CmovOp = DAG.getNode(X86ISD::CMOV, dl, MVT::i32,
Cst0, Cst1, X86CC, EFLAGS);
CmovOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CmovOp);
return DAG.getNode(ISD::BITCAST, dl, VT, CmovOp);
}
llvm_unreachable("Unsupported predicate operation");
}
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT ExtVT = VT.getVectorElementType();
unsigned NumElems = Op.getNumOperands();
// Generate vectors for predicate vectors.
if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512())
return LowerBUILD_VECTORvXi1(Op, DAG);
// Vectors containing all zeros can be matched by pxor and xorps later
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
// Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
// and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
return Op;
return getZeroVector(VT, Subtarget, DAG, dl);
}
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
// vpcmpeqd on 256-bit vectors.
if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
return Op;
if (!VT.is512BitVector())
return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
}
SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
if (Broadcast.getNode())
return Broadcast;
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
bool IsAllConstants = true;
SmallSet<SDValue, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = Op.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF)
continue;
Values.insert(Elt);
if (Elt.getOpcode() != ISD::Constant &&
Elt.getOpcode() != ISD::ConstantFP)
IsAllConstants = false;
if (X86::isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
NumNonZero++;
}
}
// All undef vector. Return an UNDEF. All zero vectors were handled above.
if (NumNonZero == 0)
return DAG.getUNDEF(VT);
// Special case for single non-zero, non-undef, element.
if (NumNonZero == 1) {
unsigned Idx = countTrailingZeros(NonZeros);
SDValue Item = Op.getOperand(Idx);
// If this is an insertion of an i64 value on x86-32, and if the top bits of
// the value are obviously zero, truncate the value to i32 and do the
// insertion that way. Only do this if the value is non-constant or if the
// value is a constant being inserted into element 0. It is cheaper to do
// a constant pool load than it is to do a movd + shuffle.
if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
// Handle SSE only.
assert(VT == MVT::v2i64 && "Expected an SSE value type!");
EVT VecVT = MVT::v4i32;
unsigned VecElts = 4;
// Truncate the value (which may itself be a constant) to i32, and
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
// Now we have our 32-bit value zero extended in the low element of
// a vector. If Idx != 0, swizzle it into place.
if (Idx != 0) {
SmallVector<int, 4> Mask;
Mask.push_back(Idx);
for (unsigned i = 1; i != VecElts; ++i)
Mask.push_back(i);
Item = DAG.getVectorShuffle(VecVT, dl, Item, DAG.getUNDEF(VecVT),
&Mask[0]);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
// If we have a constant or non-constant insertion into the low element of
// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
// the rest of the elements. This will be matched as movd/movq/movss/movsd
// depending on what the source datatype is.
if (Idx == 0) {
if (NumZero == 0)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
if (VT.is256BitVector() || VT.is512BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
}
assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
} else {
assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
// Is it a vector logical left shift?
if (NumElems == 2 && Idx == 1 &&
X86::isZeroNode(Op.getOperand(0)) &&
!X86::isZeroNode(Op.getOperand(1))) {
unsigned NumBits = VT.getSizeInBits();
return getVShift(true, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
VT, Op.getOperand(1)),
NumBits/2, DAG, *this, dl);
}
if (IsAllConstants) // Otherwise, it's better to do a constpool load.
return SDValue();
// Otherwise, if this is a vector with i32 or f32 elements, and the element
// is a non-constant being inserted into an element other than the low one,
// we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
// movd/movss) to move this into the low element, then shuffle it into
// place.
if (EVTBits == 32) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
MaskVec.push_back(i == Idx ? 0 : 1);
return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
}
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1) {
if (EVTBits == 32) {
// Instead of a shuffle like this:
// shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
// Check if it's possible to issue this instead.
// shuffle (vload ptr)), undef, <1, 1, 1, 1>
unsigned Idx = countTrailingZeros(NonZeros);
SDValue Item = Op.getOperand(Idx);
if (Op.getNode()->isOnlyUserOf(Item.getNode()))
return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
}
return SDValue();
}
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
if (IsAllConstants)
return SDValue();
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
if (VT.is256BitVector()) {
SmallVector<SDValue, 32> V;
for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
// Build both the lower and upper subvector.
SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
NumElems/2);
// Recreate the wider vector with the lower and upper part.
return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
}
// Let legalizer expand 2-wide build_vectors.
if (EVTBits == 64) {
if (NumNonZero == 1) {
// One half is zero or undef.
unsigned Idx = countTrailingZeros(NonZeros);
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
}
return SDValue();
}
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) {
SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
Subtarget, *this);
if (V.getNode()) return V;
}
if (EVTBits == 16 && NumElems == 8) {
SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
Subtarget, *this);
if (V.getNode()) return V;
}
// If element VT is == 32 bits, turn it into a number of shuffles.
SmallVector<SDValue, 8> V(NumElems);
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
V[i] = getZeroVector(VT, Subtarget, DAG, dl);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
}
for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
default: break;
case 0:
V[i] = V[i*2]; // Must be a zero vector.
break;
case 1:
V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
break;
case 2:
V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
break;
case 3:
V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
break;
}
}
bool Reverse1 = (NonZeros & 0x3) == 2;
bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
int MaskVec[] = {
Reverse1 ? 1 : 0,
Reverse1 ? 0 : 1,
static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
static_cast<int>(Reverse2 ? NumElems : NumElems+1)
};
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
if (Values.size() > 1 && VT.is128BitVector()) {
// Check for a build vector of consecutive loads.
for (unsigned i = 0; i < NumElems; ++i)
V[i] = Op.getOperand(i);
// Check for elements which are consecutive loads.
SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
if (LD.getNode())
return LD;
// Check for a build vector from mostly shuffle plus few inserting.
SDValue Sh = buildFromShuffleMostly(Op, DAG);
if (Sh.getNode())
return Sh;
// For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
else
Result = DAG.getUNDEF(VT);
for (unsigned i = 1; i < NumElems; ++i) {
if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
Op.getOperand(i), DAG.getIntPtrConstant(i));
}
return Result;
}
// Otherwise, expand into a number of unpckl*, start by extending each of
// our (non-undef) elements to the full vector width with the element in the
// bottom slot of the vector (which generates no code for SSE).
for (unsigned i = 0; i < NumElems; ++i) {
if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
else
V[i] = DAG.getUNDEF(VT);
}
// Next, we iteratively mix elements, e.g. for v4f32:
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
unsigned EltStride = NumElems >> 1;
while (EltStride != 0) {
for (unsigned i = 0; i < EltStride; ++i) {
// If V[i+EltStride] is undef and this is the first round of mixing,
// then it is safe to just drop this shuffle: V[i] is already in the
// right place, the one element (since it's the first round) being
// inserted as undef can be dropped. This isn't safe for successive
// rounds because they will permute elements within both vectors.
if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
EltStride == NumElems/2)
continue;
V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
}
EltStride >>= 1;
}
return V[0];
}
return SDValue();
}
// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT ResVT = Op.getSimpleValueType();
assert((ResVT.is256BitVector() ||
ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
unsigned NumElems = ResVT.getVectorNumElements();
if(ResVT.is256BitVector())
return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 2);
// AVX/AVX-512 can use the vinsertf128 instruction to create 256-bit vectors
// from two other 128-bit ones.
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
// Try to lower a shuffle node into a simple blend instruction.
static SDValue
LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
MVT VT = SVOp->getSimpleValueType(0);
MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
// There is no blend with immediate in AVX-512.
if (VT.is512BitVector())
return SDValue();
if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
return SDValue();
if (!Subtarget->hasInt256() && VT == MVT::v16i16)
return SDValue();
// Check the mask for BLEND and build the value.
unsigned MaskValue = 0;
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
unsigned NumLanes = (NumElems-1)/8 + 1;
unsigned NumElemsInLane = NumElems / NumLanes;
// Blend for v16i16 should be symetric for the both lanes.
for (unsigned i = 0; i < NumElemsInLane; ++i) {
int SndLaneEltIdx = (NumLanes == 2) ?
SVOp->getMaskElt(i + NumElemsInLane) : -1;
int EltIdx = SVOp->getMaskElt(i);
if ((EltIdx < 0 || EltIdx == (int)i) &&
(SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
continue;
if (((unsigned)EltIdx == (i + NumElems)) &&
(SndLaneEltIdx < 0 ||
(unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
MaskValue |= (1<<i);
else
return SDValue();
}
// Convert i32 vectors to floating point if it is not AVX2.
// AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
MVT BlendVT = VT;
if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
NumElems);
V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
}
SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
DAG.getConstant(MaskValue, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
}
// v8i16 shuffles - Prefer shuffles in the following order:
// 1. [all] pshuflw, pshufhw, optional move
// 2. [ssse3] 1 x pshufb
// 3. [ssse3] 2 x pshufb + 1 x por
// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
static SDValue
LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
SmallVector<int, 8> MaskVals;
// Determine if more than 1 of the words in each of the low and high quadwords
// of the result come from the same quadword of one of the two inputs. Undef
// mask values count as coming from any quadword, for better codegen.
unsigned LoQuad[] = { 0, 0, 0, 0 };
unsigned HiQuad[] = { 0, 0, 0, 0 };
std::bitset<4> InputQuads;
for (unsigned i = 0; i < 8; ++i) {
unsigned *Quad = i < 4 ? LoQuad : HiQuad;
int EltIdx = SVOp->getMaskElt(i);
MaskVals.push_back(EltIdx);
if (EltIdx < 0) {
++Quad[0];
++Quad[1];
++Quad[2];
++Quad[3];
continue;
}
++Quad[EltIdx / 4];
InputQuads.set(EltIdx / 4);
}
int BestLoQuad = -1;
unsigned MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (LoQuad[i] > MaxQuad) {
BestLoQuad = i;
MaxQuad = LoQuad[i];
}
}
int BestHiQuad = -1;
MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (HiQuad[i] > MaxQuad) {
BestHiQuad = i;
MaxQuad = HiQuad[i];
}
}
// For SSSE3, If all 8 words of the result come from only 1 quadword of each
// of the two input vectors, shuffle them into one input vector so only a
// single pshufb instruction is necessary. If There are more than 2 input
// quads, disable the next transformation since it does not help SSSE3.
bool V1Used = InputQuads[0] || InputQuads[1];
bool V2Used = InputQuads[2] || InputQuads[3];
if (Subtarget->hasSSSE3()) {
if (InputQuads.count() == 2 && V1Used && V2Used) {
BestLoQuad = InputQuads[0] ? 0 : 1;
BestHiQuad = InputQuads[2] ? 2 : 3;
}
if (InputQuads.count() > 2) {
BestLoQuad = -1;
BestHiQuad = -1;
}
}
// If BestLoQuad or BestHiQuad are set, shuffle the quads together and update
// the shuffle mask. If a quad is scored as -1, that means that it contains
// words from all 4 input quadwords.
SDValue NewV;
if (BestLoQuad >= 0 || BestHiQuad >= 0) {
int MaskV[] = {
BestLoQuad < 0 ? 0 : BestLoQuad,
BestHiQuad < 0 ? 1 : BestHiQuad
};
NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
// Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
// source words for the shuffle, to aid later transformations.
bool AllWordsInNewV = true;
bool InOrder[2] = { true, true };
for (unsigned i = 0; i != 8; ++i) {
int idx = MaskVals[i];
if (idx != (int)i)
InOrder[i/4] = false;
if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
continue;
AllWordsInNewV = false;
break;
}
bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
if (AllWordsInNewV) {
for (int i = 0; i != 8; ++i) {
int idx = MaskVals[i];
if (idx < 0)
continue;
idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
if ((idx != i) && idx < 4)
pshufhw = false;
if ((idx != i) && idx > 3)
pshuflw = false;
}
V1 = NewV;
V2Used = false;
BestLoQuad = 0;
BestHiQuad = 1;
}
// If we've eliminated the use of V2, and the new mask is a pshuflw or
// pshufhw, that's as cheap as it gets. Return the new shuffle.
if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
unsigned TargetMask = 0;
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
TargetMask = pshufhw ? getShufflePSHUFHWImmediate(SVOp):
getShufflePSHUFLWImmediate(SVOp);
V1 = NewV.getOperand(0);
return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
}
}
// Promote splats to a larger type which usually leads to more efficient code.
// FIXME: Is this true if pshufb is available?
if (SVOp->isSplat())
return PromoteSplat(SVOp, DAG);
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
if (Subtarget->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
// If we have elements from both input vectors, set the high bit of the
// shuffle mask element to zero out elements that come from V2 in the V1
// mask, and elements that come from V1 in the V2 mask, so that the two
// results can be OR'd together.
bool TwoInputs = V1Used && V2Used;
for (unsigned i = 0; i != 8; ++i) {
int EltIdx = MaskVals[i] * 2;
int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx;
int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1;
pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8));
}
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
if (!TwoInputs)
return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
// Calculate the shuffle mask for the second input, shuffle it, and
// OR it with the first shuffled input.
pshufbMask.clear();
for (unsigned i = 0; i != 8; ++i) {
int EltIdx = MaskVals[i] * 2;
int Idx0 = (EltIdx < 16) ? 0x80 : EltIdx - 16;
int Idx1 = (EltIdx < 16) ? 0x80 : EltIdx - 15;
pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8));
}
V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
}
// If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
// and update MaskVals with new element order.
std::bitset<8> InOrder;
if (BestLoQuad >= 0) {
int MaskV[] = { -1, -1, -1, -1, 4, 5, 6, 7 };
for (int i = 0; i != 4; ++i) {
int idx = MaskVals[i];
if (idx < 0) {
InOrder.set(i);
} else if ((idx / 4) == BestLoQuad) {
MaskV[i] = idx & 3;
InOrder.set(i);
}
}
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
NewV.getOperand(0),
getShufflePSHUFLWImmediate(SVOp), DAG);
}
}
// If BestHi >= 0, generate a pshufhw to put the high elements in order,
// and update MaskVals with the new element order.
if (BestHiQuad >= 0) {
int MaskV[] = { 0, 1, 2, 3, -1, -1, -1, -1 };
for (unsigned i = 4; i != 8; ++i) {
int idx = MaskVals[i];
if (idx < 0) {
InOrder.set(i);
} else if ((idx / 4) == BestHiQuad) {
MaskV[i] = (idx & 3) + 4;
InOrder.set(i);
}
}
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
NewV.getOperand(0),
getShufflePSHUFHWImmediate(SVOp), DAG);
}
}
// In case BestHi & BestLo were both -1, which means each quadword has a word
// from each of the four input quadwords, calculate the InOrder bitvector now
// before falling through to the insert/extract cleanup.
if (BestLoQuad == -1 && BestHiQuad == -1) {
NewV = V1;
for (int i = 0; i != 8; ++i)
if (MaskVals[i] < 0 || MaskVals[i] == i)
InOrder.set(i);
}
// The other elements are put in the right place using pextrw and pinsrw.
for (unsigned i = 0; i != 8; ++i) {
if (InOrder[i])
continue;
int EltIdx = MaskVals[i];
if (EltIdx < 0)
continue;
SDValue ExtOp = (EltIdx < 8) ?
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
DAG.getIntPtrConstant(EltIdx)) :
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
DAG.getIntPtrConstant(EltIdx - 8));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
DAG.getIntPtrConstant(i));
}
return NewV;
}
// v16i8 shuffles - Prefer shuffles in the following order:
// 1. [ssse3] 1 x pshufb
// 2. [ssse3] 2 x pshufb + 1 x por
// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw
static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
ArrayRef<int> MaskVals = SVOp->getMask();
// Promote splats to a larger type which usually leads to more efficient code.
// FIXME: Is this true if pshufb is available?
if (SVOp->isSplat())
return PromoteSplat(SVOp, DAG);
// If we have SSSE3, case 1 is generated when all result bytes come from
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
// present, fall back to case 3.
// If SSSE3, use 1 pshufb instruction per vector with elements in the result.
if (Subtarget->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
// If all result elements are from one input vector, then only translate
// undef mask values to 0x80 (zero out result) in the pshufb mask.
//
// Otherwise, we have elements from both input vectors, and must zero out
// elements that come from V2 in the first mask, and V1 in the second mask
// so that we can OR them together.
for (unsigned i = 0; i != 16; ++i) {
int EltIdx = MaskVals[i];
if (EltIdx < 0 || EltIdx >= 16)
EltIdx = 0x80;
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
}
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
// As PSHUFB will zero elements with negative indices, it's safe to ignore
// the 2nd operand if it's undefined or zero.
if (V2.getOpcode() == ISD::UNDEF ||
ISD::isBuildVectorAllZeros(V2.getNode()))
return V1;
// Calculate the shuffle mask for the second input, shuffle it, and
// OR it with the first shuffled input.
pshufbMask.clear();
for (unsigned i = 0; i != 16; ++i) {
int EltIdx = MaskVals[i];
EltIdx = (EltIdx < 16) ? 0x80 : EltIdx - 16;
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
}
V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
}
// No SSSE3 - Calculate in place words and then fix all out of place words
// With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
// the 16 different words that comprise the two doublequadword input vectors.
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
SDValue NewV = V1;
for (int i = 0; i != 8; ++i) {
int Elt0 = MaskVals[i*2];
int Elt1 = MaskVals[i*2+1];
// This word of the result is all undef, skip it.
if (Elt0 < 0 && Elt1 < 0)
continue;
// This word of the result is already in the correct place, skip it.
if ((Elt0 == i*2) && (Elt1 == i*2+1))
continue;
SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
SDValue InsElt;
// If Elt0 and Elt1 are defined, are consecutive, and can be load
// using a single extract together, load it and store it.
if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
DAG.getIntPtrConstant(Elt1 / 2));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
DAG.getIntPtrConstant(i));
continue;
}
// If Elt1 is defined, extract it from the appropriate source. If the
// source byte is not also odd, shift the extracted word left 8 bits
// otherwise clear the bottom 8 bits if we need to do an or.
if (Elt1 >= 0) {
InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
DAG.getIntPtrConstant(Elt1 / 2));
if ((Elt1 & 1) == 0)
InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
DAG.getConstant(8,
TLI.getShiftAmountTy(InsElt.getValueType())));
else if (Elt0 >= 0)
InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
DAG.getConstant(0xFF00, MVT::i16));
}
// If Elt0 is defined, extract it from the appropriate source. If the
// source byte is not also even, shift the extracted word right 8 bits. If
// Elt1 was also defined, OR the extracted values together before
// inserting them in the result.
if (Elt0 >= 0) {
SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
if ((Elt0 & 1) != 0)
InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
DAG.getConstant(8,
TLI.getShiftAmountTy(InsElt0.getValueType())));
else if (Elt1 >= 0)
InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
DAG.getConstant(0x00FF, MVT::i16));
InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
: InsElt0;
}
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
DAG.getIntPtrConstant(i));
}
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
}
// v32i8 shuffles - Translate to VPSHUFB if possible.
static
SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = SVOp->getSimpleValueType(0);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
// VPSHUFB may be generated if
// (1) one of input vector is undefined or zeroinitializer.
// The mask value 0x80 puts 0 in the corresponding slot of the vector.
// And (2) the mask indexes don't cross the 128-bit lane.
if (VT != MVT::v32i8 || !Subtarget->hasInt256() ||
(!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
return SDValue();
if (V1IsAllZero && !V2IsAllZero) {
CommuteVectorShuffleMask(MaskVals, 32);
V1 = V2;
}
SmallVector<SDValue, 32> pshufbMask;
for (unsigned i = 0; i != 32; i++) {
int EltIdx = MaskVals[i];
if (EltIdx < 0 || EltIdx >= 32)
EltIdx = 0x80;
else {
if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16))
// Cross lane is not allowed.
return SDValue();
EltIdx &= 0xf;
}
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
}
return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v32i8, &pshufbMask[0], 32));
}
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
MVT VT = SVOp->getSimpleValueType(0);
SDLoc dl(SVOp);
unsigned NumElems = VT.getVectorNumElements();
MVT NewVT;
unsigned Scale;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected!");
case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
case MVT::v16i8: NewVT = MVT::v4i32; Scale = 4; break;
case MVT::v16i16: NewVT = MVT::v8i32; Scale = 2; break;
case MVT::v32i8: NewVT = MVT::v8i32; Scale = 4; break;
}
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i != NumElems; i += Scale) {
int StartIdx = -1;
for (unsigned j = 0; j != Scale; ++j) {
int EltIdx = SVOp->getMaskElt(i+j);
if (EltIdx < 0)
continue;
if (StartIdx < 0)
StartIdx = (EltIdx / Scale);
if (EltIdx != (int)(StartIdx*Scale + j))
return SDValue();
}
MaskVec.push_back(StartIdx);
}
SDValue V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(0));
SDValue V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(1));
return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
}
/// getVZextMovL - Return a zero-extending vector move low node.
///
static SDValue getVZextMovL(MVT VT, MVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, SDLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
LoadSDNode *LD = NULL;
if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
LD = dyn_cast<LoadSDNode>(SrcOp);
if (!LD) {
// movssrr and movsdrr do not clear top bits. Try to use movd, movq
// instead.
MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
// PR2108
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
OpVT,
SrcOp.getOperand(0)
.getOperand(0))));
}
}
}
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
DAG.getNode(ISD::BITCAST, dl,
OpVT, SrcOp)));
}
/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
/// which could not be matched by any known target speficic shuffle
static SDValue
LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG);
if (NewOp.getNode())
return NewOp;
MVT VT = SVOp->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLaneElems = NumElems / 2;
SDLoc dl(SVOp);
MVT EltVT = VT.getVectorElementType();
MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
SDValue Output[2];
SmallVector<int, 16> Mask;
for (unsigned l = 0; l < 2; ++l) {
// Build a shuffle mask for the output, discovering on the fly which
// input vectors to use as shuffle operands (recorded in InputUsed).
// If building a suitable shuffle vector proves too hard, then bail
// out with UseBuildVector set.
bool UseBuildVector = false;
int InputUsed[2] = { -1, -1 }; // Not yet discovered.
unsigned LaneStart = l * NumLaneElems;
for (unsigned i = 0; i != NumLaneElems; ++i) {
// The mask element. This indexes into the input.
int Idx = SVOp->getMaskElt(i+LaneStart);
if (Idx < 0) {
// the mask element does not index into any input vector.
Mask.push_back(-1);
continue;
}
// The input vector this mask element indexes into.
int Input = Idx / NumLaneElems;
// Turn the index into an offset from the start of the input vector.
Idx -= Input * NumLaneElems;
// Find or create a shuffle vector operand to hold this input.
unsigned OpNo;
for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
if (InputUsed[OpNo] == Input)
// This input vector is already an operand.
break;
if (InputUsed[OpNo] < 0) {
// Create a new operand for this input vector.
InputUsed[OpNo] = Input;
break;
}
}
if (OpNo >= array_lengthof(InputUsed)) {
// More than two input vectors used! Give up on trying to create a
// shuffle vector. Insert all elements into a BUILD_VECTOR instead.
UseBuildVector = true;
break;
}
// Add the mask index for the new shuffle vector.
Mask.push_back(Idx + OpNo * NumLaneElems);
}
if (UseBuildVector) {
SmallVector<SDValue, 16> SVOps;
for (unsigned i = 0; i != NumLaneElems; ++i) {
// The mask element. This indexes into the input.
int Idx = SVOp->getMaskElt(i+LaneStart);
if (Idx < 0) {
SVOps.push_back(DAG.getUNDEF(EltVT));
continue;
}
// The input vector this mask element indexes into.
int Input = Idx / NumElems;
// Turn the index into an offset from the start of the input vector.
Idx -= Input * NumElems;
// Extract the vector element by hand.
SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
SVOp->getOperand(Input),
DAG.getIntPtrConstant(Idx)));
}
// Construct the output using a BUILD_VECTOR.
Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &SVOps[0],
SVOps.size());
} else if (InputUsed[0] < 0) {
// No input vectors were used! The result is undefined.
Output[l] = DAG.getUNDEF(NVT);
} else {
SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
(InputUsed[0] % 2) * NumLaneElems,
DAG, dl);
// If only one input was used, use an undefined vector for the other.
SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
(InputUsed[1] % 2) * NumLaneElems, DAG, dl);
// At least one input vector was used. Create a new shuffle vector.
Output[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
}
Mask.clear();
}
// Concatenate the result back
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Output[0], Output[1]);
}
/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
/// 4 elements, and match them with several different shuffle types.
static SDValue
LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
MVT VT = SVOp->getSimpleValueType(0);
assert(VT.is128BitVector() && "Unsupported vector size");
std::pair<int, int> Locs[4];
int Mask1[] = { -1, -1, -1, -1 };
SmallVector<int, 8> PermMask(SVOp->getMask().begin(), SVOp->getMask().end());
unsigned NumHi = 0;
unsigned NumLo = 0;
for (unsigned i = 0; i != 4; ++i) {
int Idx = PermMask[i];
if (Idx < 0) {
Locs[i] = std::make_pair(-1, -1);
} else {
assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
if (Idx < 4) {
Locs[i] = std::make_pair(0, NumLo);
Mask1[NumLo] = Idx;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < 4)
Mask1[2+NumHi] = Idx;
NumHi++;
}
}
}
if (NumLo <= 2 && NumHi <= 2) {
// If no more than two elements come from either vector. This can be
// implemented with two shuffles. First shuffle gather the elements.
// The second shuffle, which takes the first shuffle as both of its
// vector operands, put the elements into the right order.
V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
int Mask2[] = { -1, -1, -1, -1 };
for (unsigned i = 0; i != 4; ++i)
if (Locs[i].first != -1) {
unsigned Idx = (i < 2) ? 0 : 4;
Idx += Locs[i].first * 2 + Locs[i].second;
Mask2[i] = Idx;
}
return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
}
if (NumLo == 3 || NumHi == 3) {
// Otherwise, we must have three elements from one vector, call it X, and
// one element from the other, call it Y. First, use a shufps to build an
// intermediate vector with the one element from Y and the element from X
// that will be in the same half in the final destination (the indexes don't
// matter). Then, use a shufps to build the final vector, taking the half
// containing the element from Y from the intermediate, and the other half
// from X.
if (NumHi == 3) {
// Normalize it so the 3 elements come from V1.
CommuteVectorShuffleMask(PermMask, 4);
std::swap(V1, V2);
}
// Find the element from V2.
unsigned HiIndex;
for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
int Val = PermMask[HiIndex];
if (Val < 0)
continue;
if (Val >= 4)
break;
}
Mask1[0] = PermMask[HiIndex];
Mask1[1] = -1;
Mask1[2] = PermMask[HiIndex^1];
Mask1[3] = -1;
V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
if (HiIndex >= 2) {
Mask1[0] = PermMask[0];
Mask1[1] = PermMask[1];
Mask1[2] = HiIndex & 1 ? 6 : 4;
Mask1[3] = HiIndex & 1 ? 4 : 6;
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
}
Mask1[0] = HiIndex & 1 ? 2 : 0;
Mask1[1] = HiIndex & 1 ? 0 : 2;
Mask1[2] = PermMask[2];
Mask1[3] = PermMask[3];
if (Mask1[2] >= 0)
Mask1[2] += 4;
if (Mask1[3] >= 0)
Mask1[3] += 4;
return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
int LoMask[] = { -1, -1, -1, -1 };
int HiMask[] = { -1, -1, -1, -1 };
int *MaskPtr = LoMask;
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = 2;
for (unsigned i = 0; i != 4; ++i) {
if (i == 2) {
MaskPtr = HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = 2;
}
int Idx = PermMask[i];
if (Idx < 0) {
Locs[i] = std::make_pair(-1, -1);
} else if (Idx < 4) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
MaskPtr[LoIdx] = Idx;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
MaskPtr[HiIdx] = Idx;
HiIdx++;
}
}
SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
int MaskOps[] = { -1, -1, -1, -1 };
for (unsigned i = 0; i != 4; ++i)
if (Locs[i].first != -1)
MaskOps[i] = Locs[i].first * 4 + Locs[i].second;
return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
}
static bool MayFoldVectorLoad(SDValue V) {
while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
V = V.getOperand(0);
if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR &&
V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF)
// BUILD_VECTOR (load), undef
V = V.getOperand(0);
return MayFoldLoad(V);
}
static
SDValue getMOVDDup(SDValue &Op, SDLoc &dl, SDValue V1, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
// Canonizalize to v2f64.
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
return DAG.getNode(ISD::BITCAST, dl, VT,
getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
V1, DAG));
}
static
SDValue getMOVLowToHigh(SDValue &Op, SDLoc &dl, SelectionDAG &DAG,
bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
assert(VT != MVT::v2i64 && "unsupported shuffle type");
if (HasSSE2 && VT == MVT::v2f64)
return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
// v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
return DAG.getNode(ISD::BITCAST, dl, VT,
getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32,
DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1),
DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG));
}
static
SDValue getMOVHighToLow(SDValue &Op, SDLoc &dl, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
"unsupported shuffle type");
if (V2.getOpcode() == ISD::UNDEF)
V2 = V1;
// v4i32 or v4f32
return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
}
static
SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
unsigned NumElems = VT.getVectorNumElements();
// Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
// operand of these instructions is only memory, so check if there's a
// potencial load folding here, otherwise use SHUFPS or MOVSD to match the
// same masks.
bool CanFoldLoad = false;
// Trivial case, when V2 comes from a load.
if (MayFoldVectorLoad(V2))
CanFoldLoad = true;
// When V1 is a load, it can be folded later into a store in isel, example:
// (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
// turns into:
// (MOVLPSmr addr:$src1, VR128:$src2)
// So, recognize this potential and also use MOVLPS or MOVLPD
else if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
CanFoldLoad = true;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
if (CanFoldLoad) {
if (HasSSE2 && NumElems == 2)
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
if (NumElems == 4)
// If we don't care about the second element, proceed to use movss.
if (SVOp->getMaskElt(1) != -1)
return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
}
// movl and movlp will both match v2i64, but v2i64 is never matched by
// movl earlier because we make it strict to avoid messing with the movlp load
// folding logic (see the code above getMOVLP call). Match it here then,
// this is horrible, but will stay like this until we move all shuffle
// matching to x86 specific nodes. Note that for the 1st condition all
// types are matched with movsd.
if (HasSSE2) {
// FIXME: isMOVLMask should be checked and matched before getMOVLP,
// as to remove this logic from here, as much as possible
if (NumElems == 2 || !isMOVLMask(SVOp->getMask(), VT))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
}
assert(VT != MVT::v4i32 && "unsupported shuffle type");
// Invert the operand order and use SHUFPS to match it.
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V2, V1,
getShuffleSHUFImmediate(SVOp), DAG);
}
// Reduce a vector shuffle to zext.
static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
// PMOVZX is only available from SSE41.
if (!Subtarget->hasSSE41())
return SDValue();
MVT VT = Op.getSimpleValueType();
// Only AVX2 support 256-bit vector integer extending.
if (!Subtarget->hasInt256() && VT.is256BitVector())
return SDValue();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDLoc DL(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
unsigned NumElems = VT.getVectorNumElements();
// Extending is an unary operation and the element type of the source vector
// won't be equal to or larger than i64.
if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() ||
VT.getVectorElementType() == MVT::i64)
return SDValue();
// Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4.
unsigned Shift = 1; // Start from 2, i.e. 1 << 1.
while ((1U << Shift) < NumElems) {
if (SVOp->getMaskElt(1U << Shift) == 1)
break;
Shift += 1;
// The maximal ratio is 8, i.e. from i8 to i64.
if (Shift > 3)
return SDValue();
}
// Check the shuffle mask.
unsigned Mask = (1U << Shift) - 1;
for (unsigned i = 0; i != NumElems; ++i) {
int EltIdx = SVOp->getMaskElt(i);
if ((i & Mask) != 0 && EltIdx != -1)
return SDValue();
if ((i & Mask) == 0 && (unsigned)EltIdx != (i >> Shift))
return SDValue();
}
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
MVT NeVT = MVT::getIntegerVT(NBits);
MVT NVT = MVT::getVectorVT(NeVT, NumElems >> Shift);
if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT))
return SDValue();
// Simplify the operand as it's prepared to be fed into shuffle.
unsigned SignificantBits = NVT.getSizeInBits() >> Shift;
if (V1.getOpcode() == ISD::BITCAST &&
V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
V1.getOperand(0).getOperand(0)
.getSimpleValueType().getSizeInBits() == SignificantBits) {
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
ConstantSDNode *CIdx =
dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1));
// If it's foldable, i.e. normal load with single use, we will let code
// selection to fold it. Otherwise, we will short the conversion sequence.
if (CIdx && CIdx->getZExtValue() == 0 &&
(!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
MVT FullVT = V.getSimpleValueType();
MVT V1VT = V1.getSimpleValueType();
if (FullVT.getSizeInBits() > V1VT.getSizeInBits()) {
// The "ext_vec_elt" node is wider than the result node.
// In this case we should extract subvector from V.
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
unsigned Ratio = FullVT.getSizeInBits() / V1VT.getSizeInBits();
MVT SubVecVT = MVT::getVectorVT(FullVT.getVectorElementType(),
FullVT.getVectorNumElements()/Ratio);
V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
DAG.getIntPtrConstant(0));
}
V1 = DAG.getNode(ISD::BITCAST, DL, V1VT, V);
}
}
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
}
static SDValue
NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
if (isZeroShuffle(SVOp))
return getZeroVector(VT, Subtarget, DAG, dl);
// Handle splat operations
if (SVOp->isSplat()) {
// Use vbroadcast whenever the splat comes from a foldable load
SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
if (Broadcast.getNode())
return Broadcast;
}
// Check integer expanding shuffles.
SDValue NewOp = LowerVectorIntExtend(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
VT == MVT::v16i16 || VT == MVT::v32i8) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
} else if ((VT == MVT::v4i32 ||
(VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
MVT NewVT = NewOp.getSimpleValueType();
if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
NewVT, true, false))
return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
DAG, Subtarget, dl);
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
MVT NewVT = NewOp.getSimpleValueType();
if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
DAG, Subtarget, dl);
}
}
}
return SDValue();
}
SDValue
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
unsigned NumElems = VT.getVectorNumElements();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
bool HasSSE2 = Subtarget->hasSSE2();
bool HasFp256 = Subtarget->hasFp256();
bool HasInt256 = Subtarget->hasInt256();
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize = MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
if (V1IsUndef && V2IsUndef)
return DAG.getUNDEF(VT);
assert(!V1IsUndef && "Op 1 of shuffle should not be undef");
// Vector shuffle lowering takes 3 steps:
//
// 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
// narrowing and commutation of operands should be handled.
// 2) Matching of shuffles with known shuffle masks to x86 target specific
// shuffle nodes.
// 3) Rewriting of unmatched masks into new generic shuffle operations,
// so the shuffle can be broken into other shuffles and the legalizer can
// try the lowering again.
//
// The general idea is that no vector_shuffle operation should be left to
// be matched during isel, all of them must be converted to a target specific
// node here.
// Normalize the input vectors. Here splats, zeroed vectors, profitable
// narrowing and commutation of operands should be handled. The actual code
// doesn't include all of those, work in progress...
SDValue NewOp = NormalizeVectorShuffle(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
SmallVector<int, 8> M(SVOp->getMask().begin(), SVOp->getMask().end());
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() &&
V2IsUndef && MayFoldVectorLoad(V1))
return getMOVDDup(Op, dl, V1, DAG);
if (isMOVHLPS_v_undef_Mask(M, VT))
return getMOVHighToLow(Op, dl, DAG);
// Use to match splats
if (HasSSE2 && isUNPCKHMask(M, VT, HasInt256) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (isPSHUFDMask(M, VT)) {
// The actual implementation will match the mask in the if above and then
// during isel it can match several different instructions, not only pshufd
// as its name says, sad but true, emulate the behavior for now...
if (isMOVDDUPMask(M, VT) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask,
DAG);
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
TargetMask, DAG);
}
if (isPALIGNRMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
getShufflePALIGNRImmediate(SVOp),
DAG);
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
SDValue ShVal;
bool isShift = HasSSE2 && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
MVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
if (isMOVLMask(M, VT)) {
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
if (!isMOVLPMask(M, VT)) {
if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
if (VT == MVT::v4i32 || VT == MVT::v4f32)
return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
}
}
// FIXME: fold these into legal mask.
if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasInt256))
return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
if (isMOVHLPSMask(M, VT))
return getMOVHighToLow(Op, dl, DAG);
if (V2IsUndef && isMOVSHDUPMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
if (V2IsUndef && isMOVSLDUPMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
if (isMOVLPMask(M, VT))
return getMOVLP(Op, dl, DAG, HasSSE2);
if (ShouldXformToMOVHLPS(M, VT) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), M, VT))
return CommuteVectorShuffle(SVOp, DAG);
if (isShift) {
// No better options. Use a vshldq / vsrldq.
MVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
V1IsSplat = isSplatVector(V1.getNode());
V2IsSplat = isSplatVector(V2.getNode());
// Canonicalize the splat or undef, if present, to be on the RHS.
if (!V2IsUndef && V1IsSplat && !V2IsSplat) {
CommuteVectorShuffleMask(M, NumElems);
std::swap(V1, V2);
std::swap(V1IsSplat, V2IsSplat);
Commuted = true;
}
if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
// Shuffling low element of v1 into undef, just return v1.
if (V2IsUndef)
return V1;
// If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
// the instruction selector will not match, so get a canonical MOVL with
// swapped operands to undo the commute.
return getMOVL(DAG, dl, VT, V2, V1);
}
if (isUNPCKLMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
if (isUNPCKHMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.p
SmallVector<int, 8> NewMask(M.begin(), M.end());
NormalizeMask(NewMask, NumElems);
if (isUNPCKLMask(NewMask, VT, HasInt256, true))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
if (isUNPCKHMask(NewMask, VT, HasInt256, true))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
if (Commuted) {
// Commute is back and try unpck* again.
// FIXME: this seems wrong.
CommuteVectorShuffleMask(M, NumElems);
std::swap(V1, V2);
std::swap(V1IsSplat, V2IsSplat);
Commuted = false;
if (isUNPCKLMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
if (isUNPCKHMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
// Normalize the node to match x86 shuffle ops if needed
if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
// inlined here right now to enable us to directly emit target specific
// nodes, and remove one by one until they don't return Op anymore.
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
if (VT == MVT::v2f64 || VT == MVT::v2i64)
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
}
if (isPSHUFHWMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
getShufflePSHUFHWImmediate(SVOp),
DAG);
if (isPSHUFLWMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
getShufflePSHUFLWImmediate(SVOp),
DAG);
if (isSHUFPMask(M, VT))
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
getShuffleSHUFImmediate(SVOp), DAG);
if (isUNPCKL_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
if (isUNPCKH_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
//===--------------------------------------------------------------------===//
// Generate target specific nodes for 128 or 256-bit shuffles only
// supported in the AVX instruction set.
//
// Handle VMOVDDUPY permutations
if (V2IsUndef && isMOVDDUPYMask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
// Handle VPERMILPS/D* permutations
if (isVPERMILPMask(M, VT)) {
if ((HasInt256 && VT == MVT::v8i32) || VT == MVT::v16i32)
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
}
// Handle VPERM2F128/VPERM2I128 permutations
if (isVPERM2X128Mask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(SVOp, Subtarget, DAG);
if (BlendOp.getNode())
return BlendOp;
unsigned Imm8;
if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8))
return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG);
if ((V2IsUndef && HasInt256 && VT.is256BitVector() && NumElems == 8) ||
VT.is512BitVector()) {
MVT MaskEltVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
MVT MaskVectorVT = MVT::getVectorVT(MaskEltVT, NumElems);
SmallVector<SDValue, 16> permclMask;
for (unsigned i = 0; i != NumElems; ++i) {
permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT));
}
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT,
&permclMask[0], NumElems);
if (V2IsUndef)
// Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
return DAG.getNode(X86ISD::VPERMV, dl, VT,
DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
return DAG.getNode(X86ISD::VPERMV3, dl, VT,
DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1, V2);
}
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
// lower it into other known shuffles. FIXME: this isn't true yet, but
// this is the plan.
//
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
}
if (VT == MVT::v16i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
}
if (VT == MVT::v32i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
}
// Handle all 128-bit wide vectors with 4 elements, and match them with
// several different shuffle types.
if (NumElems == 4 && VT.is128BitVector())
return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
// Handle general 256-bit shuffles
if (VT.is256BitVector())
return LowerVECTOR_SHUFFLE_256(SVOp, DAG);
return SDValue();
}
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
if (!Op.getOperand(0).getSimpleValueType().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
if (VT.getSizeInBits() == 16) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
// If Idx is 0, it's cheaper to do a move instead of a pextrw.
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1)));
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
if (VT == MVT::f32) {
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
// the result back to FR32 register. It's only worth matching if the
// result has a single use which is a store or a bitcast to i32. And in
// the case of a store, it's not worth it if the index is a constant 0,
// because a MOVSSmr can be used instead, which is smaller and faster.
if (!Op.hasOneUse())
return SDValue();
SDNode *User = *Op.getNode()->use_begin();
if ((User->getOpcode() != ISD::STORE ||
(isa<ConstantSDNode>(Op.getOperand(1)) &&
cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
(User->getOpcode() != ISD::BITCAST ||
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1));
return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
}
if (VT == MVT::i32 || VT == MVT::i64) {
// ExtractPS/pextrq works with constant index.
if (isa<ConstantSDNode>(Op.getOperand(1)))
return Op;
}
return SDValue();
}
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
if (!isa<ConstantSDNode>(Idx)) {
if (VecVT.is512BitVector() ||
(VecVT.is256BitVector() && Subtarget->hasInt256() &&
VecVT.getVectorElementType().getSizeInBits() == 32)) {
MVT MaskEltVT =
MVT::getIntegerVT(VecVT.getVectorElementType().getSizeInBits());
MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() /
MaskEltVT.getSizeInBits());
Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
getZeroVector(MaskVT, Subtarget, DAG, dl),
Idx, DAG.getConstant(0, getPointerTy()));
SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(),
Perm, DAG.getConstant(0, getPointerTy()));
}
return SDValue();
}
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
if (VecVT.is256BitVector() || VecVT.is512BitVector()) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// Get the 128-bit vector.
Vec = Extract128BitVector(Vec, IdxVal, DAG, dl);
MVT EltVT = VecVT.getVectorElementType();
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
//if (IdxVal >= NumElems/2)
// IdxVal -= NumElems/2;
IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getConstant(IdxVal, MVT::i32));
}
assert(VecVT.is128BitVector() && "Unexpected vector length");
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
if (Res.getNode())
return Res;
}
MVT VT = Op.getSimpleValueType();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
SDValue Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT EltVT = MVT::i32;
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
if (VT.getSizeInBits() == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
MVT VVT = Op.getOperand(0).getSimpleValueType();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
}
if (VT.getSizeInBits() == 64) {
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
int Mask[2] = { 1, -1 };
MVT VVT = Op.getOperand(0).getSimpleValueType();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
}
return SDValue();
}
static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
if (!VT.is128BitVector())
return SDValue();
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
isa<ConstantSDNode>(N2)) {
unsigned Opc;
if (VT == MVT::v8i16)
Opc = X86ISD::PINSRW;
else if (VT == MVT::v16i8)
Opc = X86ISD::PINSRB;
else
Opc = X86ISD::PINSRB;
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
}
if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into these
// bits. For example (insert (extract, 3), 2) could be matched by putting
// the '3' into bits [7:6] of X86ISD::INSERTPS.
// Bits [5:4] of the constant are the destination select. This is the
// value of the incoming immediate.
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
// Create this as a scalar to vector..
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
}
if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
// PINSR* works with constant index.
return Op;
}
return SDValue();
}
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
// If this is a 256-bit vector result, first extract the 128-bit vector,
// insert the element into the extracted half and then place it back.
if (VT.is256BitVector() || VT.is512BitVector()) {
if (!isa<ConstantSDNode>(N2))
return SDValue();
// Get the desired 128-bit vector half.
unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
// Insert the element into the desired half.
unsigned NumEltsIn128 = 128/EltVT.getSizeInBits();
unsigned IdxIn128 = IdxVal - (IdxVal/NumEltsIn128) * NumEltsIn128;
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, MVT::i32));
// Insert the changed part back to the 256-bit vector
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
}
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EltVT == MVT::i8)
return SDValue();
if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
}
return SDValue();
}
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT OpVT = Op.getSimpleValueType();
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
unsigned SizeFactor = OpVT.getSizeInBits()/128;
MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
OpVT.getVectorNumElements() / SizeFactor);
Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
// Insert the 128-bit vector.
return Insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
}
if (OpVT == MVT::v1i64 &&
Op.getOperand(0).getValueType() == MVT::i64)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
assert(OpVT.is128BitVector() && "Expected an SSE type!");
return DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
}
// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
// a simple subregister reference or explicit instructions to grab
// upper bits of a vector.
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
SDValue In = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
MVT ResVT = Op.getSimpleValueType();
MVT InVT = In.getSimpleValueType();
if (Subtarget->hasFp256()) {
if (ResVT.is128BitVector() &&
(InVT.is256BitVector() || InVT.is512BitVector()) &&
isa<ConstantSDNode>(Idx)) {
return Extract128BitVector(In, IdxVal, DAG, dl);
}
if (ResVT.is256BitVector() && InVT.is512BitVector() &&
isa<ConstantSDNode>(Idx)) {
return Extract256BitVector(In, IdxVal, DAG, dl);
}
}
return SDValue();
}
// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
// simple superregister reference or explicit instructions to insert
// the upper bits of a vector.
static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
if (Subtarget->hasFp256()) {
SDLoc dl(Op.getNode());
SDValue Vec = Op.getNode()->getOperand(0);
SDValue SubVec = Op.getNode()->getOperand(1);
SDValue Idx = Op.getNode()->getOperand(2);
if ((Op.getNode()->getSimpleValueType(0).is256BitVector() ||
Op.getNode()->getSimpleValueType(0).is512BitVector()) &&
SubVec.getNode()->getSimpleValueType(0).is128BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
}
if (Op.getNode()->getSimpleValueType(0).is512BitVector() &&
SubVec.getNode()->getSimpleValueType(0).is256BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
}
}
return SDValue();
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDValue
X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
CodeModel::Model M = getTargetMachine().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
WrapperKind = X86ISD::WrapperRIP;
else if (Subtarget->isPICStyleGOT())
OpFlag = X86II::MO_GOTOFF;
else if (Subtarget->isPICStyleStubPIC())
OpFlag = X86II::MO_PIC_BASE_OFFSET;
SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
CP->getAlignment(),
CP->getOffset(), OpFlag);
SDLoc DL(CP);
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (OpFlag) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), getPointerTy()),
Result);
}
return Result;
}
SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
CodeModel::Model M = getTargetMachine().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
WrapperKind = X86ISD::WrapperRIP;
else if (Subtarget->isPICStyleGOT())
OpFlag = X86II::MO_GOTOFF;
else if (Subtarget->isPICStyleStubPIC())
OpFlag = X86II::MO_PIC_BASE_OFFSET;
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
OpFlag);
SDLoc DL(JT);
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (OpFlag)
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), getPointerTy()),
Result);
return Result;
}
SDValue
X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
CodeModel::Model M = getTargetMachine().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel)) {
if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF())
OpFlag = X86II::MO_GOTPCREL;
WrapperKind = X86ISD::WrapperRIP;
} else if (Subtarget->isPICStyleGOT()) {
OpFlag = X86II::MO_GOT;
} else if (Subtarget->isPICStyleStubPIC()) {
OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE;
} else if (Subtarget->isPICStyleStubNoDynamic()) {
OpFlag = X86II::MO_DARWIN_NONLAZY;
}
SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
SDLoc DL(Op);
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->is64Bit()) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), getPointerTy()),
Result);
}
// For symbols that require a load from a stub to get the address, emit the
// load.
if (isGlobalStubReference(OpFlag))
Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, false, 0);
return Result;
}
SDValue
X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
// Create the TargetBlockAddressAddress node.
unsigned char OpFlags =
Subtarget->ClassifyBlockAddressReference();
CodeModel::Model M = getTargetMachine().getCodeModel();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
SDLoc dl(Op);
SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(), Offset,
OpFlags);
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
else
Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (isGlobalRelativeToPICBase(OpFlags)) {
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
Result);
}
return Result;
}
SDValue
X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
int64_t Offset, SelectionDAG &DAG) const {
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
CodeModel::Model M = getTargetMachine().getCodeModel();
SDValue Result;
if (OpFlags == X86II::MO_NO_FLAG &&
X86::isOffsetSuitableForCodeModel(Offset, M)) {
// A direct static reference to a global.
Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
Offset = 0;
} else {
Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
}
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
else
Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (isGlobalRelativeToPICBase(OpFlags)) {
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
Result);
}
// For globals that require a load from a stub to get the address, emit the
// load.
if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
if (Offset != 0)
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
DAG.getConstant(Offset, getPointerTy()));
return Result;
}
SDValue
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
return LowerGlobalAddress(GV, SDLoc(Op), Offset, DAG);
}
static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags, bool LocalDynamic = false) {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDLoc dl(GA);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(),
OperandFlags);
X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;
if (InFlag) {
SDValue Ops[] = { Chain, TGA, *InFlag };
Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
} else {
SDValue Ops[] = { Chain, TGA };
Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
}
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
MFI->setAdjustsStack(true);
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
static SDValue
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
SDValue InFlag;
SDLoc dl(GA); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), PtrVT), InFlag);
InFlag = Chain.getValue(1);
return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
static SDValue
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
X86::RAX, X86II::MO_TLSGD);
}
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
const EVT PtrVT,
bool is64Bit) {
SDLoc dl(GA);
// Get the start address of the TLS block for this module.
X86MachineFunctionInfo* MFI = DAG.getMachineFunction()
.getInfo<X86MachineFunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
SDValue Base;
if (is64Bit) {
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX,
X86II::MO_TLSLD, /*LocalDynamic=*/true);
} else {
SDValue InFlag;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag);
InFlag = Chain.getValue(1);
Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
X86II::MO_TLSLDM, /*LocalDynamic=*/true);
}
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
// of Base.
// Build x@dtpoff.
unsigned char OperandFlags = X86II::MO_DTPOFF;
unsigned WrapperKind = X86ISD::Wrapper;
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
// Add x@dtpoff with the base.
return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
}
// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT, TLSModel::Model model,
bool is64Bit, bool isPIC) {
SDLoc dl(GA);
// Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
is64Bit ? 257 : 256));
SDValue ThreadPointer =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0),
MachinePointerInfo(Ptr), false, false, false, 0);
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
// initialexec.
unsigned WrapperKind = X86ISD::Wrapper;
if (model == TLSModel::LocalExec) {
OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
} else if (model == TLSModel::InitialExec) {
if (is64Bit) {
OperandFlags = X86II::MO_GOTTPOFF;
WrapperKind = X86ISD::WrapperRIP;
} else {
OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF;
}
} else {
llvm_unreachable("Unexpected model");
}
// emit "addl x@ntpoff,%eax" (local exec)
// or "addl x@indntpoff,%eax" (initial exec)
// or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
SDValue TGA =
DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec) {
if (isPIC && !is64Bit) {
Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
Offset);
}
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
MachinePointerInfo::getGOT(), false, false, false, 0);
}
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
}
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
TLSModel::Model model = getTargetMachine().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
if (Subtarget->is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
case TLSModel::LocalDynamic:
return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(),
Subtarget->is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
Subtarget->is64Bit(),
getTargetMachine().getRelocationModel() == Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
if (Subtarget->isTargetDarwin()) {
// Darwin only has one model of TLS. Lower to that.
unsigned char OpFlag = 0;
unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
X86ISD::WrapperRIP : X86ISD::Wrapper;
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
!Subtarget->is64Bit();
if (PIC32)
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
OpFlag = X86II::MO_TLVP;
SDLoc DL(Op);
SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
GA->getValueType(0),
GA->getOffset(), OpFlag);
SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
// With PIC32, the address is actually $g + Offset.
if (PIC32)
Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), getPointerTy()),
Offset);
// Lowering the machine isd will make sure everything is in the right
// location.
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Args[] = { Chain, Offset };
Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setAdjustsStack(true);
// And our return value (tls address) is in the standard call return value
// location.
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
Chain.getValue(1));
}
if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) {
// Just use the implicit TLS architecture
// Need to generate someting similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
// ; from TEB
// mov ecx, dword [rel _tls_index]: Load index (from C runtime)
// mov rcx, qword [rdx+rcx*8]
// mov eax, .tls$:tlsvar
// [rax+rcx] contains the address
// Windows 64bit: gs:0x58
// Windows 32bit: fs:__tls_array
// If GV is an alias then use the aliasee for determining
// thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GV = GA->resolveAliasedGlobal(false);
SDLoc dl(GA);
SDValue Chain = DAG.getEntryNode();
// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
// %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
// use its literal value of 0x2C.
Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
? Type::getInt8PtrTy(*DAG.getContext(),
256)
: Type::getInt32PtrTy(*DAG.getContext(),
257));
SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) :
(Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) :
DAG.getExternalSymbol("_tls_array", getPointerTy()));
SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
MachinePointerInfo(Ptr),
false, false, false, 0);
// Load the _tls_index variable
SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy());
if (Subtarget->is64Bit())
IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain,
IDX, MachinePointerInfo(), MVT::i32,
false, false, 0);
else
IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
false, false, false, 0);
SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
getPointerTy());
IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
SDValue res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(),
false, false, false, 0);
// Get the offset of start of .tls section
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), X86II::MO_SECREL);
SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), TGA);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, dl, getPointerTy(), res, Offset);
}
llvm_unreachable("TLS not implemented for this target.");
}
/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values
/// and take a 2 x i32 value to shift plus a shift amount.
SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
SDLoc dl(Op);
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
// X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
// generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
// during isel.
SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits - 1, MVT::i8));
SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
DAG.getConstant(VTBits - 1, MVT::i8))
: DAG.getConstant(0, VT);
SDValue Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
} else {
Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
}
// If the shift amount is larger or equal than the width of a part we can't
// rely on the results of shld/shrd. Insert a test and select the appropriate
// values for large shift amounts.
SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
AndNode, DAG.getConstant(0, MVT::i8));
SDValue Hi, Lo;
SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
if (Op.getOpcode() == ISD::SHL_PARTS) {
Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
} else {
Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
}
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
}
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
if (SrcVT.isVector())
return SDValue();
assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
// These are really Legal; return the operand so the caller accepts it as
// Legal.
if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
return Op;
if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
Subtarget->is64Bit()) {
return Op;
}
SDLoc dl(Op);
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
SDValue StackSlot,
SelectionDAG &DAG) const {
// Build the FILD
SDLoc DL(Op);
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
if (useSSE)
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
unsigned ByteSize = SrcVT.getSizeInBits()/8;
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
MachineMemOperand *MMO;
if (FI) {
int SSFI = FI->getIndex();
MMO =
DAG.getMachineFunction()
.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, ByteSize, ByteSize);
} else {
MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
StackSlot = StackSlot.getOperand(1);
}
SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
X86ISD::FILD, DL,
Tys, Ops, array_lengthof(Ops),
SrcVT, MMO);
if (useSSE) {
Chain = Result.getValue(1);
SDValue InFlag = Result.getValue(2);
// FIXME: Currently the FST is flagged to the FILD_FLAG. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
SDValue Ops[] = {
Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
};
MachineMemOperand *MMO =
DAG.getMachineFunction()
.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, SSFISize, SSFISize);
Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
Ops, array_lengthof(Ops),
Op.getValueType(), MMO);
Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
false, false, false, 0);
}
return Result;
}
// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
SelectionDAG &DAG) const {
// This algorithm is not obvious. Here it is what we're trying to output:
/*
movq %rax, %xmm0
punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
#ifdef __SSE3__
haddpd %xmm0, %xmm0
#else
pshufd $0x4e, %xmm0, %xmm1
addpd %xmm1, %xmm0
#endif
*/
SDLoc dl(Op);
LLVMContext *Context = DAG.getContext();
// Build some magic constants.
static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
Constant *C0 = ConstantDataVector::get(*Context, CV0);
SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
SmallVector<Constant*,2> CV1;
CV1.push_back(
ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
APInt(64, 0x4330000000000000ULL))));
CV1.push_back(
ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
// Load the 64-bit value into an XMM register.
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
Op.getOperand(0));
SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32,
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, XR1),
CLod0);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck1);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
if (Subtarget->hasSSE3()) {
// FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
SDValue S2F = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Sub);
SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32,
S2F, 0x4E, DAG);
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Shuffle),
Sub);
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
DAG.getIntPtrConstant(0));
}
// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
// FP constant to bias correct the final result.
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
MVT::f64);
// Load the 32-bit value into an XMM register.
SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
Op.getOperand(0));
// Zero out the upper parts of the register.
Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
DAG.getIntPtrConstant(0));
// Or the load with the bias.
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, Load)),
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, Bias)));
Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
DAG.getIntPtrConstant(0));
// Subtract the bias.
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
// Handle final rounding.
EVT DestVT = Op.getValueType();
if (DestVT.bitsLT(MVT::f64))
return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
DAG.getIntPtrConstant(0));
if (DestVT.bitsGT(MVT::f64))
return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
// Handle final rounding.
return Sub;
}
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
EVT SVT = N0.getValueType();
SDLoc dl(Op);
assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 ||
SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
"Custom UINT_TO_FP is not supported!");
EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
SVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
}
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
if (Op.getValueType().isVector())
return lowerUINT_TO_FP_vec(Op, DAG);
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
EVT SrcVT = N0.getValueType();
EVT DstVT = Op.getValueType();
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG);
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i32(Op, DAG);
if (Subtarget->is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
return SDValue();
// Make a 64-bit buffer, and use it to build an FILD.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
if (SrcVT == MVT::i32) {
SDValue WordOff = DAG.getConstant(4, getPointerTy());
SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
getPointerTy(), StackSlot, WordOff);
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, MachinePointerInfo(),
false, false, 0);
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
OffsetSlot, MachinePointerInfo(),
false, false, 0);
SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
return Fild;
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, MachinePointerInfo(),
false, false, 0);
// For i64 source, we need to add the appropriate power of 2 if the input
// was negative. This is the same as the optimization in
// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
// we must be careful to do the computation in x87 extended precision, not
// in SSE. (The generic code can't know it's OK to do this, or how to.)
int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
MachineMemOperand *MMO =
DAG.getMachineFunction()
.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, 8, 8);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
array_lengthof(Ops), MVT::i64, MMO);
APInt FF(32, 0x5F800000ULL);
// Check whether the sign bit is set.
SDValue SignSet = DAG.getSetCC(dl,
getSetCCResultType(*DAG.getContext(), MVT::i64),
Op.getOperand(0), DAG.getConstant(0, MVT::i64),
ISD::SETLT);
// Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
SDValue FudgePtr = DAG.getConstantPool(
ConstantInt::get(*DAG.getContext(), FF.zext(64)),
getPointerTy());
// Get a pointer to FF if the sign bit was set, or to 0 otherwise.
SDValue Zero = DAG.getIntPtrConstant(0);
SDValue Four = DAG.getIntPtrConstant(4);
SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
Zero, Four);
FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
FudgePtr, MachinePointerInfo::getConstantPool(),
MVT::f32, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
}
std::pair<SDValue,SDValue>
X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
bool IsSigned, bool IsReplace) const {
SDLoc DL(Op);
EVT DstTy = Op.getValueType();
if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
DstTy = MVT::i64;
}
assert(DstTy.getSimpleVT() <= MVT::i64 &&
DstTy.getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_INT to lower!");
// These are really Legal.
if (DstTy == MVT::i32 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
if (Subtarget->is64Bit() &&
DstTy == MVT::i64 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
// We lower FP->int64 either into FISTP64 followed by a load from a temporary
// stack slot, or into the FTOL runtime function.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
if (!IsSigned && isIntegerTypeFTOL(DstTy))
Opc = X86ISD::WIN_FTOL;
else
switch (DstTy.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
EVT TheVT = Op.getOperand(0).getValueType();
// FIXME This causes a redundant load/store if the SSE-class value is already
// in memory, such as if it is on the callstack.
if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, DL, Value, StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDValue Ops[] = {
Chain, StackSlot, DAG.getValueType(TheVT)
};
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, MemSize, MemSize);
Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops,
array_lengthof(Ops), DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
}
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, MemSize, MemSize);
if (Opc != X86ISD::WIN_FTOL) {
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
Ops, array_lengthof(Ops), DstTy,
MMO);
return std::make_pair(FIST, StackSlot);
} else {
SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
DAG.getVTList(MVT::Other, MVT::Glue),
Chain, Value);
SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
MVT::i32, ftol.getValue(1));
SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
MVT::i32, eax.getValue(2));
SDValue Ops[] = { eax, edx };
SDValue pair = IsReplace
? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, array_lengthof(Ops))
: DAG.getMergeValues(Ops, array_lengthof(Ops), DL);
return std::make_pair(pair, SDValue());
}
}
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
// Optimize vectors in AVX mode:
//
// v8i16 -> v8i32
// Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
// Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
// Concat upper and lower parts.
//
// v4i32 -> v4i64
// Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
// Concat upper and lower parts.
//
if (((VT != MVT::v16i16) || (InVT != MVT::v16i8)) &&
((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
return SDValue();
if (Subtarget->hasInt256())
return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In);
SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
SDValue Undef = DAG.getUNDEF(InVT);
bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
SelectionDAG &DAG) {
MVT VT = Op->getValueType(0).getSimpleVT();
SDValue In = Op->getOperand(0);
MVT InVT = In.getValueType().getSimpleVT();
SDLoc DL(Op);
unsigned int NumElts = VT.getVectorNumElements();
if (NumElts != 8 && NumElts != 16)
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Now we have only mask extension
assert(InVT.getVectorElementType() == MVT::i1);
SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld);
if (VT.is512BitVector())
return Brcst;
return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst);
}
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
if (Res.getNode())
return Res;
}
return SDValue();
}
static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
return LowerZERO_EXTEND_AVX512(Op, DAG);
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
if (Res.getNode())
return Res;
}
assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
VT.getVectorNumElements() != SVT.getVectorNumElements());
return SDValue();
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT InVT = In.getSimpleValueType();
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
if (VT.getVectorElementType().getSizeInBits() >=8)
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
unsigned NumElts = InVT.getVectorNumElements();
assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
if (InVT.getSizeInBits() < 512) {
MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
InVT = ExtVT;
}
SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld);
SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
}
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In);
In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
ShufMask);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
DAG.getIntPtrConstant(0));
}
// On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS.
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(2));
OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
// The PSHUFD mask:
static const int ShufMask1[] = {0, 2, 0, 0};
SDValue Undef = DAG.getUNDEF(VT);
OpLo = DAG.getVectorShuffle(VT, DL, OpLo, Undef, ShufMask1);
OpHi = DAG.getVectorShuffle(VT, DL, OpHi, Undef, ShufMask1);
// The MOVLHPS mask:
static const int ShufMask2[] = {0, 1, 4, 5};
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
}
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
// On AVX2, v8i32 -> v8i16 becomed PSHUFB.
if (Subtarget->hasInt256()) {
In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
SmallVector<SDValue,32> pshufbMask;
for (unsigned i = 0; i < 2; ++i) {
pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
for (unsigned j = 0; j < 8; ++j)
pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
}
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8,
&pshufbMask[0], 32);
In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In);
static const int ShufMask[] = {0, 2, -1, -1};
In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64),
&ShufMask[0]);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0));
return DAG.getNode(ISD::BITCAST, DL, VT, In);
}
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(0));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(4));
OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi);
// The PSHUFB mask:
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1};
SDValue Undef = DAG.getUNDEF(MVT::v16i8);
OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1);
OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1);
OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
// The MOVLHPS Mask:
static const int ShufMask2[] = {0, 1, 4, 5};
SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res);
}
// Handle truncation of V256 to V128 using shuffles.
if (!VT.is128BitVector() || !InVT.is256BitVector())
return SDValue();
assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
NumElems * 2);
SmallVector<int, 16> MaskVec(NumElems * 2, -1);
// Prepare truncation shuffle mask
for (unsigned i = 0; i != NumElems; ++i)
MaskVec[i] = i * 2;
SDValue V = DAG.getVectorShuffle(NVT, DL,
DAG.getNode(ISD::BITCAST, DL, NVT, In),
DAG.getUNDEF(NVT), &MaskVec[0]);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
DAG.getIntPtrConstant(0));
}
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
if (VT.isVector()) {
if (VT == MVT::v8i16)
return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT,
DAG.getNode(ISD::FP_TO_SINT, SDLoc(Op),
MVT::v8i32, Op.getOperand(0)));
return SDValue();
}
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
/*IsSigned=*/ true, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
if (FIST.getNode() == 0) return Op;
if (StackSlot.getNode())
// Load the result.
return DAG.getLoad(Op.getValueType(), SDLoc(Op),
FIST, StackSlot, MachinePointerInfo(),
false, false, false, 0);
// The node is the result.
return FIST;
}
SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
SelectionDAG &DAG) const {
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
/*IsSigned=*/ false, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
assert(FIST.getNode() && "Unexpected failure");
if (StackSlot.getNode())
// Load the result.
return DAG.getLoad(Op.getValueType(), SDLoc(Op),
FIST, StackSlot, MachinePointerInfo(),
false, false, false, 0);
// The node is the result.
return FIST;
}
static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
return DAG.getNode(X86ISD::VFPEXT, DL, VT,
DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
In, DAG.getUNDEF(SVT)));
}
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
NumElts = VT.getVectorNumElements();
}
Constant *C;
if (EltVT == MVT::f64)
C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
APInt(64, ~(1ULL << 63))));
else
C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
APInt(32, ~(1U << 31))));
C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
if (VT.isVector()) {
MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::AND, dl, ANDVT,
DAG.getNode(ISD::BITCAST, dl, ANDVT,
Op.getOperand(0)),
DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask)));
}
return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
}
SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
NumElts = VT.getVectorNumElements();
}
Constant *C;
if (EltVT == MVT::f64)
C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
APInt(64, 1ULL << 63)));
else
C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
APInt(32, 1U << 31)));
C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
if (VT.isVector()) {
MVT XORVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits()/64);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, XORVT,
DAG.getNode(ISD::BITCAST, dl, XORVT,
Op.getOperand(0)),
DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
}
return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
}
SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT SrcVT = Op1.getSimpleValueType();
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
SrcVT = VT;
}
// And if it is bigger, shrink it first.
if (SrcVT.bitsGT(VT)) {
Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
SrcVT = VT;
}
// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
SmallVector<Constant*,4> CV;
if (SrcVT == MVT::f64) {
const fltSemantics &Sem = APFloat::IEEEdouble;
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63))));
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
} else {
const fltSemantics &Sem = APFloat::IEEEsingle;
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31))));
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
// Shift sign bit right or left if the two operands have different types.
if (SrcVT.bitsGT(VT)) {
// Op0 is MVT::f32, Op1 is MVT::f64.
SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
DAG.getConstant(32, MVT::i32));
SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
DAG.getIntPtrConstant(0));
}
// Clear first operand sign bit.
CV.clear();
if (VT == MVT::f64) {
const fltSemantics &Sem = APFloat::IEEEdouble;
CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
APInt(64, ~(1ULL << 63)))));
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
} else {
const fltSemantics &Sem = APFloat::IEEEsingle;
CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
APInt(32, ~(1U << 31)))));
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
// Or the value with the sign bit.
return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
}
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
// Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
DAG.getConstant(1, VT));
return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
}
// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
//
static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget->hasSSE41())
return SDValue();
if (!Op->hasOneUse())
return SDValue();
SDNode *N = Op.getNode();
SDLoc DL(N);
SmallVector<SDValue, 8> Opnds;
DenseMap<SDValue, unsigned> VecInMap;
EVT VT = MVT::Other;
// Recognize a special case where a vector is casted into wide integer to
// test all 0s.
Opnds.push_back(N->getOperand(0));
Opnds.push_back(N->getOperand(1));
for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot;
// BFS traverse all OR'd operands.
if (I->getOpcode() == ISD::OR) {
Opnds.push_back(I->getOperand(0));
Opnds.push_back(I->getOperand(1));
// Re-evaluate the number of nodes to be traversed.
e += 2; // 2 more nodes (LHS and RHS) are pushed.
continue;
}
// Quit if a non-EXTRACT_VECTOR_ELT
if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
// Quit if without a constant index.
SDValue Idx = I->getOperand(1);
if (!isa<ConstantSDNode>(Idx))
return SDValue();
SDValue ExtractedFromVec = I->getOperand(0);
DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec);
if (M == VecInMap.end()) {
VT = ExtractedFromVec.getValueType();
// Quit if not 128/256-bit vector.
if (!VT.is128BitVector() && !VT.is256BitVector())
return SDValue();
// Quit if not the same type.
if (VecInMap.begin() != VecInMap.end() &&
VT != VecInMap.begin()->first.getValueType())
return SDValue();
M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first;
}
M->second |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
}
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Not extracted from 128-/256-bit vector.");
unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;
SmallVector<SDValue, 8> VecIns;
for (DenseMap<SDValue, unsigned>::const_iterator
I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) {
// Quit if not all elements are used.
if (I->second != FullMask)
return SDValue();
VecIns.push_back(I->first);
}
EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
// Cast all vectors into TestVT for PTEST.
for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]);
// If more than one full vectors are evaluated, OR them first before PTEST.
for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
// Each iteration will OR 2 nodes and append the result until there is only
// 1 node left, i.e. the final OR'd value of all vectors.
SDValue LHS = VecIns[Slot];
SDValue RHS = VecIns[Slot + 1];
VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
}
return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
VecIns.back(), VecIns.back());
}
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
SelectionDAG &DAG) const {
SDLoc dl(Op);
// CF and OF aren't always set the way we want. Determine which
// of these we need.
bool NeedCF = false;
bool NeedOF = false;
switch (X86CC) {
default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
NeedCF = true;
break;
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
case X86::COND_O: case X86::COND_NO:
NeedOF = true;
break;
}
// See if we can use the EFLAGS value from the operand instead of
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
if (Op.getResNo() != 0 || NeedOF || NeedCF)
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
unsigned Opcode = 0;
unsigned NumOperands = 0;
// Truncate operations may prevent the merge of the SETCC instruction
// and the arithmetic instruction before it. Attempt to truncate the operands
// of the arithmetic instruction and use a reduced bit-width instruction.
bool NeedTruncation = false;
SDValue ArithOp = Op;
if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
SDValue Arith = Op->getOperand(0);
// Both the trunc and the arithmetic op need to have one user each.
if (Arith->hasOneUse())
switch (Arith.getOpcode()) {
default: break;
case ISD::ADD:
case ISD::SUB:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
NeedTruncation = true;
ArithOp = Arith;
}
}
}
// NOTICE: In the code below we use ArithOp to hold the arithmetic operation
// which may be the result of a CAST. We use the variable 'Op', which is the
// non-casted variable when we check for possible users.
switch (ArithOp.getOpcode()) {
case ISD::ADD:
// Due to an isel shortcoming, be conservative if this add is likely to be
// selected as part of a load-modify-store instruction. When the root node
// in a match is a store, isel doesn't know how to remap non-chain non-flag
// uses of other nodes in the match, such as the ADD in this case. This
// leads to the ADD being left around and reselected, with the result being
// two adds in the output. Alas, even if none our users are stores, that
// doesn't prove we're O.K. Ergo, if we have any parents that aren't
// CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
// climbing the DAG back to the root, and it doesn't seem to be worth the
// effort.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() != ISD::CopyToReg &&
UI->getOpcode() != ISD::SETCC &&
UI->getOpcode() != ISD::STORE)
goto default_case;
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
if (C->getAPIntValue() == 1) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}
// An add of negative one (subtract of one) will be selected as a DEC.
if (C->getAPIntValue().isAllOnesValue()) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
}
}
// Otherwise use a regular EFLAGS-setting add.
Opcode = X86ISD::ADD;
NumOperands = 2;
break;
case ISD::AND: {
// If the primary and result isn't used, don't bother using X86ISD::AND,
// because a TEST instruction will be better.
bool NonFlagUse = false;
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
unsigned UOpNo = UI.getOperandNo();
if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
// Look pass truncate.
UOpNo = User->use_begin().getOperandNo();
User = *User->use_begin();
}
if (User->getOpcode() != ISD::BRCOND &&
User->getOpcode() != ISD::SETCC &&
!(User->getOpcode() == ISD::SELECT && UOpNo == 0)) {
NonFlagUse = true;
break;
}
}
if (!NonFlagUse)
break;
}
// FALL THROUGH
case ISD::SUB:
case ISD::OR:
case ISD::XOR:
// Due to the ISEL shortcoming noted above, be conservative if this op is
// likely to be selected as part of a load-modify-store instruction.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() == ISD::STORE)
goto default_case;
// Otherwise use a regular EFLAGS-setting instruction.
switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
case ISD::OR: {
if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG);
if (EFLAGS.getNode())
return EFLAGS;
}
Opcode = X86ISD::OR;
break;
}
}
NumOperands = 2;
break;
case X86ISD::ADD:
case X86ISD::SUB:
case X86ISD::INC:
case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
return SDValue(Op.getNode(), 1);
default:
default_case:
break;
}
// If we found that truncation is beneficial, perform the truncation and
// update 'Op'.
if (NeedTruncation) {
EVT VT = Op.getValueType();
SDValue WideVal = Op->getOperand(0);
EVT WideVT = WideVal.getValueType();
unsigned ConvertedOp = 0;
// Use a target machine opcode to prevent further DAGCombine
// optimizations that may separate the arithmetic operations
// from the setcc node.
switch (WideVal.getOpcode()) {
default: break;
case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
case ISD::AND: ConvertedOp = X86ISD::AND; break;
case ISD::OR: ConvertedOp = X86ISD::OR; break;
case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
}
if (ConvertedOp) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
}
}
}
if (Opcode == 0)
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0; i != NumOperands; ++i)
Ops.push_back(Op.getOperand(i));
SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
DAG.ReplaceAllUsesWith(Op, New);
return SDValue(New.getNode(), 1);
}
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
/// equivalent.
SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
SelectionDAG &DAG) const {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
if (C->getAPIntValue() == 0)
return EmitTest(Op0, X86CC, DAG);
SDLoc dl(Op0);
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
// Use SUB instead of CMP to enable CSE between SUB and CMP.
SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
Op0, Op1);
return SDValue(Sub.getNode(), 1);
}
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
/// Convert a comparison if required by the subtarget.
SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
SelectionDAG &DAG) const {
// If the subtarget does not support the FUCOMI instruction, floating-point
// comparisons have to be converted.
if (Subtarget->hasCMov() ||
Cmp.getOpcode() != X86ISD::CMP ||
!Cmp.getOperand(0).getValueType().isFloatingPoint() ||
!Cmp.getOperand(1).getValueType().isFloatingPoint())
return Cmp;
// The instruction selector will select an FUCOM instruction instead of
// FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
// build an SDNode sequence that transfers the result from FPSW into EFLAGS:
// (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8))))
SDLoc dl(Cmp);
SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
DAG.getConstant(8, MVT::i8));
SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
static bool isAllOnes(SDValue V) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
return C && C->isAllOnesValue();
}
/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
/// if it's possible.
SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
SDLoc dl, SelectionDAG &DAG) const {
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
if (Op1.getOpcode() == ISD::TRUNCATE)
Op1 = Op1.getOperand(0);
SDValue LHS, RHS;
if (Op1.getOpcode() == ISD::SHL)
std::swap(Op0, Op1);
if (Op0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
if (And00C->getZExtValue() == 1) {
// If we looked past a truncate, check that it's only truncating away
// known zeros.
unsigned BitWidth = Op0.getValueSizeInBits();
unsigned AndBitWidth = And.getValueSizeInBits();
if (BitWidth > AndBitWidth) {
APInt Zeros, Ones;
DAG.ComputeMaskedBits(Op0, Zeros, Ones);
if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
return SDValue();
}
LHS = Op1;
RHS = Op0.getOperand(1);
}
} else if (Op1.getOpcode() == ISD::Constant) {
ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
uint64_t AndRHSVal = AndRHS->getZExtValue();
SDValue AndLHS = Op0;
if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
LHS = AndLHS.getOperand(0);
RHS = AndLHS.getOperand(1);
}
// Use BT if the immediate can't be encoded in a TEST instruction.
if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
LHS = AndLHS;
RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), LHS.getValueType());
}
}
if (LHS.getNode()) {
// If LHS is i8, promote it to i32 with any_extend. There is no i8 BT
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i32 value is ok. We extend to i32 because
// the encoding for the i16 version is larger than the i32 version.
// Also promote i16 to i32 for performance / code size reason.
if (LHS.getValueType() == MVT::i8 ||
LHS.getValueType() == MVT::i16)
LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
// If the operand types disagree, extend the shift amount to match. Since
// BT ignores high bits (like shifts) we can use anyextend.
if (LHS.getValueType() != RHS.getValueType())
RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(Cond, MVT::i8), BT);
}
return SDValue();
}
/// \brief - Turns an ISD::CondCode into a value suitable for SSE floating point
/// mask CMPs.
static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
SDValue &Op1) {
unsigned SSECC;
bool Swap = false;
// SSE Condition code mapping:
// 0 - EQ
// 1 - LT
// 2 - LE
// 3 - UNORD
// 4 - NEQ
// 5 - NLT
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
case ISD::SETGT: Swap = true; // Fallthrough
case ISD::SETLT:
case ISD::SETOLT: SSECC = 1; break;
case ISD::SETOGE:
case ISD::SETGE: Swap = true; // Fallthrough
case ISD::SETLE:
case ISD::SETOLE: SSECC = 2; break;
case ISD::SETUO: SSECC = 3; break;
case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
case ISD::SETULE: Swap = true; // Fallthrough
case ISD::SETUGE: SSECC = 5; break;
case ISD::SETULT: Swap = true; // Fallthrough
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
case ISD::SETUEQ:
case ISD::SETONE: SSECC = 8; break;
}
if (Swap)
std::swap(Op0, Op1);
return SSECC;
}
// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
// ones, and then concatenate the result back.
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
SDLoc dl(Op);
SDValue CC = Op.getOperand(2);
// Extract the LHS vectors
SDValue LHS = Op.getOperand(0);
SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
// Extract the RHS vectors
SDValue RHS = Op.getOperand(1);
SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
// Issue the operation on the smaller types and concatenate the result back
MVT EltVT = VT.getVectorElementType();
MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 &&
Op.getValueType().getScalarType() == MVT::i1 &&
"Cannot set masked compare for this operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
bool Unsigned = false;
unsigned SSECC;
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: SSECC = 4; break;
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETUGT: Unsigned = true;
case ISD::SETGT: SSECC = 6; break; // NLE
case ISD::SETULT: Unsigned = true;
case ISD::SETLT: SSECC = 1; break;
case ISD::SETUGE: Unsigned = true;
case ISD::SETGE: SSECC = 5; break; // NLT
case ISD::SETULE: Unsigned = true;
case ISD::SETLE: SSECC = 2; break;
}
unsigned Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, MVT::i8));
}
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
SDLoc dl(Op);
if (isFP) {
#ifndef NDEBUG
MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
unsigned Opc = X86ISD::CMPP;
if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) {
assert(VT.getVectorNumElements() <= 16);
Opc = X86ISD::CMPM;
}
// In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) {
unsigned CC0, CC1;
unsigned CombineOpc;
if (SetCCOpcode == ISD::SETUEQ) {
CC0 = 3; CC1 = 0; CombineOpc = ISD::OR;
} else {
assert(SetCCOpcode == ISD::SETONE);
CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
}
SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC0, MVT::i8));
SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC1, MVT::i8));
return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
}
// Handle all other FP comparisons here.
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, MVT::i8));
}
// Break 256-bit integer vector compare into smaller ones.
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntVSETCC(Op, DAG);
bool MaskResult = (VT.getVectorElementType() == MVT::i1);
EVT OpVT = Op1.getValueType();
if (Subtarget->hasAVX512()) {
if (Op1.getValueType().is512BitVector() ||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
return LowerIntVSETCC_AVX512(Op, DAG);
// In AVX-512 architecture setcc returns mask with i1 elements,
// But there is no compare instruction for i8 and i16 elements.
// We are not talking about 512-bit operands in this case, these
// types are illegal.
if (MaskResult &&
(OpVT.getVectorElementType().getSizeInBits() < 32 &&
OpVT.getVectorElementType().getSizeInBits() >= 8))
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
}
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
unsigned Opc;
bool Swap = false, Invert = false, FlipSigns = false, MinMax = false;
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = MaskResult? X86ISD::PCMPEQM: X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
case ISD::SETGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; break;
case ISD::SETGE: Swap = true;
case ISD::SETLE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
Invert = true; break;
case ISD::SETULT: Swap = true;
case ISD::SETUGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
FlipSigns = true; break;
case ISD::SETUGE: Swap = true;
case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
FlipSigns = true; Invert = true; break;
}
// Special case: Use min/max operations for SETULE/SETUGE
MVT VET = VT.getVectorElementType();
bool hasMinMax =
(Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
|| (Subtarget->hasSSE2() && (VET == MVT::i8));
if (hasMinMax) {
switch (SetCCOpcode) {
default: break;
case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break;
case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break;
}
if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
}
if (Swap)
std::swap(Op0, Op1);
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
if (VT == MVT::v2i64) {
if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) {
assert(Subtarget->hasSSE2() && "Don't know how to lower!");
// First cast everything to the right type.
Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations. The lower
// compare is always unsigned.
SDValue SB;
if (FlipSigns) {
SB = DAG.getConstant(0x80000000U, MVT::v4i32);
} else {
SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32);
SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32);
SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
Sign, Zero, Sign, Zero);
}
Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);
// Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
// Create masks for only the low parts/high parts of the 64 bit integers.
static const int MaskHi[] = { 1, 1, 3, 3 };
static const int MaskLo[] = { 0, 0, 2, 2 };
SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
// pcmpeqd + pshufd + pand.
assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
// First cast everything to the right type.
Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
// Do the compare.
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
// Make sure the lower and upper halves are both all-ones.
static const int Mask[] = { 1, 0, 3, 2 };
SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}
}
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
EVT EltVT = VT.getVectorElementType();
SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT);
Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB);
}
SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
// If the logical-not of the result is required, perform that now.
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
if (MinMax)
Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result);
return Result;
}
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDLoc dl(Op);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// Optimize to BT if possible.
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
Op1.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(Op1)->isNullValue() &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
if (NewSetCC.getNode())
return NewSetCC;
}
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
// these.
if (Op1.getOpcode() == ISD::Constant &&
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
cast<ConstantSDNode>(Op1)->isNullValue()) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// If the input is a setcc, then reuse the input setcc or use a new one with
// the inverted condition.
if (Op0.getOpcode() == X86ISD::SETCC) {
X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
bool Invert = (CC == ISD::SETNE) ^
cast<ConstantSDNode>(Op1)->isNullValue();
if (!Invert) return Op0;
CCode = X86::GetOppositeBranchCondition(CCode);
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
}
}
bool isFP = Op1.getSimpleValueType().isFloatingPoint();
unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
if (X86CC == X86::COND_INVALID)
return SDValue();
SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), EFLAGS);
}
// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getNode()->getOpcode();
if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
Opc == X86ISD::SAHF)
return true;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD ||
Opc == X86ISD::SUB ||
Opc == X86ISD::ADC ||
Opc == X86ISD::SBB ||
Opc == X86ISD::SMUL ||
Opc == X86ISD::UMUL ||
Opc == X86ISD::INC ||
Opc == X86ISD::DEC ||
Opc == X86ISD::OR ||
Opc == X86ISD::XOR ||
Opc == X86ISD::AND))
return true;
if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
return true;
return false;
}
static bool isZero(SDValue V) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
return C && C->isNullValue();
}
static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
if (V.getOpcode() != ISD::TRUNCATE)
return false;
SDValue VOp0 = V.getOperand(0);
unsigned InBits = VOp0.getValueSizeInBits();
unsigned Bits = V.getValueSizeInBits();
return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
}
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Cond = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
SDLoc DL(Op);
EVT VT = Op1.getValueType();
SDValue CC;
// Lower fp selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
// are available. Otherwise fp cmovs get lowered into a less efficient branch
// sequence later on.
if (Cond.getOpcode() == ISD::SETCC &&
((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
(Subtarget->hasSSE1() && VT == MVT::f32)) &&
VT == Cond.getOperand(0).getValueType() && Cond->hasOneUse()) {
SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
int SSECC = translateX86FSETCC(
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
if (SSECC != 8) {
unsigned Opcode = VT == MVT::f32 ? X86ISD::FSETCCss : X86ISD::FSETCCsd;
SDValue Cmp = DAG.getNode(Opcode, DL, VT, CondOp0, CondOp1,
DAG.getConstant(SSECC, MVT::i8));
SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);
}
}
if (Cond.getOpcode() == ISD::SETCC) {
SDValue NewCond = LowerSETCC(Cond, DAG);
if (NewCond.getNode())
Cond = NewCond;
}
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
// (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
// (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
if (Cond.getOpcode() == X86ISD::SETCC &&
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
isZero(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
SDValue CmpOp0 = Cmp.getOperand(0);
// Apply further optimizations for special cases
// (select (x != 0), -1, 0) -> neg & sbb
// (select (x == 0), 0, -1) -> neg & sbb
if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y))
if (YC->isNullValue() &&
(isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
DAG.getConstant(0, CmpOp0.getValueType()),
CmpOp0);
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, MVT::i8),
SDValue(Neg.getNode(), 1));
return Res;
}
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
SDValue Res = // Res = 0 or -1.
DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
if (isAllOnes(Op1) != (CondCode == X86::COND_E))
Res = DAG.getNOT(DL, Res, Res.getValueType());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
if (N2C == 0 || !N2C->isNullValue())
Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
return Res;
}
}
// Look past (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
unsigned CondOpcode = Cond.getOpcode();
if (CondOpcode == X86ISD::SETCC ||
CondOpcode == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
MVT VT = Op.getSimpleValueType();
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
!isScalarFPTypeInSSEReg(VT)) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
Opc == X86ISD::BT) { // FIXME
Cond = Cmp;
addTest = false;
}
} else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
Cond.getOperand(0).getValueType() != MVT::i8)) {
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
unsigned X86Opcode;
unsigned X86Cond;
SDVTList VTs;
switch (CondOpcode) {
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
default: llvm_unreachable("unexpected overflowing operator");
}
if (CondOpcode == ISD::UMULO)
VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
MVT::i32);
else
VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);
if (CondOpcode == ISD::UMULO)
Cond = X86Op.getValue(2);
else
Cond = X86Op.getValue(1);
CC = DAG.getConstant(X86Cond, MVT::i8);
addTest = false;
}
if (addTest) {
// Look pass the truncate if the high bits are known zero.
if (isTruncWithZeroHighBitsInput(Cond, DAG))
Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
addTest = false;
}
}
}
if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
// a < b ? -1 : 0 -> RES = ~setcc_carry
// a < b ? 0 : -1 -> RES = setcc_carry
// a >= b ? -1 : 0 -> RES = setcc_carry
// a >= b ? 0 : -1 -> RES = ~setcc_carry
if (Cond.getOpcode() == X86ISD::SUB) {
Cond = ConvertCmpIfNecessary(Cond, DAG);
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
(isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, MVT::i8), Cond);
if (isAllOnes(Op1) != (CondCode == X86::COND_B))
return DAG.getNOT(DL, Res, Res.getValueType());
return Res;
}
}
// X86 doesn't have an i8 cmov. If both operands are the result of a truncate
// widen the cmov and push the truncate through. This avoids introducing a new
// branch during isel and doesn't add any extensions.
if (Op.getValueType() == MVT::i8 &&
Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
if (T1.getValueType() == T2.getValueType() &&
// Blacklist CopyFromReg to avoid partial register stalls.
T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond);
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
}
}
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = { Op2, Op1, CC, Cond };
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
}
static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
unsigned int NumElts = VT.getVectorNumElements();
if (NumElts != 8 && NumElts != 16)
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32;
Constant *C = ConstantInt::get(*DAG.getContext(),
APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits()));
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld);
if (VT.is512BitVector())
return Brcst;
return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst);
}
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
return LowerSIGN_EXTEND_AVX512(Op, DAG);
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
(VT != MVT::v8i32 || InVT != MVT::v8i16) &&
(VT != MVT::v16i16 || InVT != MVT::v16i8))
return SDValue();
if (Subtarget->hasInt256())
return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In);
// Optimize vectors in AVX mode
// Sign extend v8i16 to v8i32 and
// v4i32 to v4i64
//
// Divide input vector into two parts
// for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
// concat the vectors to original VT
unsigned NumElems = InVT.getVectorNumElements();
SDValue Undef = DAG.getUNDEF(InVT);
SmallVector<int,8> ShufMask1(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask1[i] = i;
SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]);
SmallVector<int,8> ShufMask2(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask2[i] = i + NumElems/2;
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
// from the AND / OR.
static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
Opc = Op.getOpcode();
if (Opc != ISD::OR && Opc != ISD::AND)
return false;
return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
Op.getOperand(0).hasOneUse() &&
Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
Op.getOperand(1).hasOneUse());
}
// isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and
// 1 and that the SETCC node has a single use.
static bool isXor1OfSetCC(SDValue Op) {
if (Op.getOpcode() != ISD::XOR)
return false;
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (N1C && N1C->getAPIntValue() == 1) {
return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
Op.getOperand(0).hasOneUse();
}
return false;
}
SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
SDValue Dest = Op.getOperand(2);
SDLoc dl(Op);
SDValue CC;
bool Inverted = false;
if (Cond.getOpcode() == ISD::SETCC) {
// Check for setcc([su]{add,sub,mul}o == 0).
if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
isa<ConstantSDNode>(Cond.getOperand(1)) &&
cast<ConstantSDNode>(Cond.getOperand(1))->isNullValue() &&
Cond.getOperand(0).getResNo() == 1 &&
(Cond.getOperand(0).getOpcode() == ISD::SADDO ||
Cond.getOperand(0).getOpcode() == ISD::UADDO ||
Cond.getOperand(0).getOpcode() == ISD::SSUBO ||
Cond.getOperand(0).getOpcode() == ISD::USUBO ||
Cond.getOperand(0).getOpcode() == ISD::SMULO ||
Cond.getOperand(0).getOpcode() == ISD::UMULO)) {
Inverted = true;
Cond = Cond.getOperand(0);
} else {
SDValue NewCond = LowerSETCC(Cond, DAG);
if (NewCond.getNode())
Cond = NewCond;
}
}
#if 0
// FIXME: LowerXALUO doesn't handle these!!
else if (Cond.getOpcode() == X86ISD::ADD ||
Cond.getOpcode() == X86ISD::SUB ||
Cond.getOpcode() == X86ISD::SMUL ||
Cond.getOpcode() == X86ISD::UMUL)
Cond = LowerXALUO(Cond, DAG);
#endif
// Look pass (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
unsigned CondOpcode = Cond.getOpcode();
if (CondOpcode == X86ISD::SETCC ||
CondOpcode == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
// FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
Cond = Cmp;
addTest = false;
} else {
switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
default: break;
case X86::COND_O:
case X86::COND_B:
// These can only come from an arithmetic instruction with overflow,
// e.g. SADDO, UADDO.
Cond = Cond.getNode()->getOperand(1);
addTest = false;
break;
}
}
}
CondOpcode = Cond.getOpcode();
if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
Cond.getOperand(0).getValueType() != MVT::i8)) {
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
unsigned X86Opcode;
unsigned X86Cond;
SDVTList VTs;
switch (CondOpcode) {
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
default: llvm_unreachable("unexpected overflowing operator");
}
if (Inverted)
X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
if (CondOpcode == ISD::UMULO)
VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
MVT::i32);
else
VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);
if (CondOpcode == ISD::UMULO)
Cond = X86Op.getValue(2);
else
Cond = X86Op.getValue(1);
CC = DAG.getConstant(X86Cond, MVT::i8);
addTest = false;
} else {
unsigned CondOpc;
if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
SDValue Cmp = Cond.getOperand(0).getOperand(1);
if (CondOpc == ISD::OR) {
// Also, recognize the pattern generated by an FCMP_UNE. We can emit
// two branches instead of an explicit OR instruction with a
// separate test.
if (Cmp == Cond.getOperand(1).getOperand(1) &&
isX86LogicalCmp(Cmp)) {
CC = Cond.getOperand(0).getOperand(0);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
CC = Cond.getOperand(1).getOperand(0);
Cond = Cmp;
addTest = false;
}
} else { // ISD::AND
// Also, recognize the pattern generated by an FCMP_OEQ. We can emit
// two branches instead of an explicit AND instruction with a
// separate test. However, we only do this if this block doesn't
// have a fall-through edge, because this requires an explicit
// jmp when the condition is false.
if (Cmp == Cond.getOperand(1).getOperand(1) &&
isX86LogicalCmp(Cmp) &&
Op.getNode()->hasOneUse()) {
X86::CondCode CCode =
(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, MVT::i8);
SDNode *User = *Op.getNode()->use_begin();
// Look for an unconditional branch following this conditional branch.
// We need this because we need to reverse the successors in order
// to implement FCMP_OEQ.
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
Dest = FalseBB;
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
X86::CondCode CCode =
(X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, MVT::i8);
Cond = Cmp;
addTest = false;
}
}
}
} else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
// Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
// It should be transformed during dag combiner except when the condition
// is set by a arithmetics with overflow node.
X86::CondCode CCode =
(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, MVT::i8);
Cond = Cond.getOperand(0).getOperand(1);
addTest = false;
} else if (Cond.getOpcode() == ISD::SETCC &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETOEQ) {
// For FCMP_OEQ, we can emit
// two branches instead of an explicit AND instruction with a
// separate test. However, we only do this if this block doesn't
// have a fall-through edge, because this requires an explicit
// jmp when the condition is false.
if (Op.getNode()->hasOneUse()) {
SDNode *User = *Op.getNode()->use_begin();
// Look for an unconditional branch following this conditional branch.
// We need this because we need to reverse the successors in order
// to implement FCMP_OEQ.
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
Dest = FalseBB;
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
CC = DAG.getConstant(X86::COND_P, MVT::i8);
Cond = Cmp;
addTest = false;
}
}
} else if (Cond.getOpcode() == ISD::SETCC &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
// For FCMP_UNE, we can emit
// two branches instead of an explicit AND instruction with a
// separate test. However, we only do this if this block doesn't
// have a fall-through edge, because this requires an explicit
// jmp when the condition is false.
if (Op.getNode()->hasOneUse()) {
SDNode *User = *Op.getNode()->use_begin();
// Look for an unconditional branch following this conditional branch.
// We need this because we need to reverse the successors in order
// to implement FCMP_UNE.
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
CC = DAG.getConstant(X86::COND_NP, MVT::i8);
Cond = Cmp;
addTest = false;
Dest = FalseBB;
}
}
}
}
if (addTest) {
// Look pass the truncate if the high bits are known zero.
if (isTruncWithZeroHighBitsInput(Cond, DAG))
Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
addTest = false;
}
}
}
if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cond);
}
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca is needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
// that the guard pages used by the OS virtual memory manager are allocated in
// correct sequence.
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() ||
getTargetMachine().Options.EnableSegmentedStacks) &&
"This should be used only on Windows targets or when segmented stacks "
"are being used");
assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
SDLoc dl(Op);
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
if (getTargetMachine().Options.EnableSegmentedStacks) {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
if (Is64Bit) {
// The 64 bit implementation of segmented stacks needs to clobber both r10
// r11. This makes it impossible to use it along with nested parameters.
const Function *F = MF.getFunction();
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I)
if (I->hasNestAttr())
report_fatal_error("Cannot use segmented stacks with functions that "
"have nested arguments.");
}
const TargetRegisterClass *AddrRegClass =
getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
DAG.getRegister(Vreg, SPTy));
SDValue Ops1[2] = { Value, Chain };
return DAG.getMergeValues(Ops1, 2, dl);
} else {
SDValue Flag;
unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
Flag = Chain.getValue(1);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
unsigned SPReg = RegInfo->getStackRegister();
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
Chain = SP.getValue(1);
if (Align) {
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
DAG.getConstant(-(uint64_t)Align, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
}
SDValue Ops1[2] = { SP, Chain };
return DAG.getMergeValues(Ops1, 2, dl);
}
}
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SDLoc DL(Op);
if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
}
// __va_list_tag:
// gp_offset (0 - 6 * 8)
// fp_offset (48 - 48 + 8 * 16)
// overflow_arg_area (point to parameters coming in memory).
// reg_save_area
SmallVector<SDValue, 8> MemOps;
SDValue FIN = Op.getOperand(1);
// Store gp_offset
SDValue Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
MVT::i32),
FIN, MachinePointerInfo(SV), false, false, 0);
MemOps.push_back(Store);
// Store fp_offset
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
MVT::i32),
FIN, MachinePointerInfo(SV, 4), false, false, 0);
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
MachinePointerInfo(SV, 8),
false, false, 0);
MemOps.push_back(Store);
// Store ptr to reg_save_area.
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(8));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
MachinePointerInfo(SV, 16), false, false, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&MemOps[0], MemOps.size());
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->is64Bit() &&
"LowerVAARG only handles 64-bit va_arg!");
assert((Subtarget->isTargetLinux() ||
Subtarget->isTargetDarwin()) &&
"Unhandled target in LowerVAARG");
assert(Op.getNode()->getNumOperands() == 4);
SDValue Chain = Op.getOperand(0);
SDValue SrcPtr = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
unsigned Align = Op.getConstantOperandVal(3);
SDLoc dl(Op);
EVT ArgVT = Op.getNode()->getValueType(0);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
uint8_t ArgMode;
// Decide which area this value should be read from.
// TODO: Implement the AMD64 ABI in its entirety. This simple
// selection mechanism works only for the basic types.
if (ArgVT == MVT::f80) {
llvm_unreachable("va_arg for f80 not yet implemented");
} else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
} else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
} else {
llvm_unreachable("Unhandled argument type in LowerVAARG");
}
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
.getFunction()->getAttributes()
.hasAttribute(AttributeSet::FunctionIndex,
Attribute::NoImplicitFloat)) &&
Subtarget->hasSSE1());
}
// Insert VAARG_64 node into the DAG
// VAARG_64 returns two values: Variable Argument Address, Chain
SmallVector<SDValue, 11> InstOps;
InstOps.push_back(Chain);
InstOps.push_back(SrcPtr);
InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
InstOps.push_back(DAG.getConstant(Align, MVT::i32));
SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
VTs, &InstOps[0], InstOps.size(),
MVT::i64,
MachinePointerInfo(SV),
/*Align=*/0,
/*Volatile=*/false,
/*ReadMem=*/true,
/*WriteMem=*/true);
Chain = VAARG.getValue(1);
// Load the next argument and return it
return DAG.getLoad(ArgVT, dl,
Chain,
VAARG,
MachinePointerInfo(),
false, false, false, 0);
}
static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
SDValue Chain = Op.getOperand(0);
SDValue DstPtr = Op.getOperand(1);
SDValue SrcPtr = Op.getOperand(2);
const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
SDLoc DL(Op);
return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
false,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
// getTargetVShiftByConstNode - Handle vector element shifts where the shift
// amount is a constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue SrcOp, uint64_t ShiftAmt,
SelectionDAG &DAG) {
// Check for ShiftAmt >= element width
if (ShiftAmt >= VT.getVectorElementType().getSizeInBits()) {
if (Opc == X86ISD::VSRAI)
ShiftAmt = VT.getVectorElementType().getSizeInBits() - 1;
else
return DAG.getConstant(0, VT);
}
assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
&& "Unknown target vector shift-by-constant node");
return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8));
}
// getTargetVShiftNode - Handle vector element shifts where the shift amount
// may or may not be a constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue SrcOp, SDValue ShAmt,
SelectionDAG &DAG) {
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
// Catch shift-by-constant.
if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
CShAmt->getZExtValue(), DAG);
// Change opcode to non-immediate version
switch (Opc) {
default: llvm_unreachable("Unknown target vector shift node");
case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
}
// Need to build a vector containing shift amount
// Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
SDValue ShOps[4];
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
// The return type has to be a 128-bit type with the same element
// type as the input type.
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
// Comparison intrinsics.
case Intrinsic::x86_sse_comieq_ss:
case Intrinsic::x86_sse_comilt_ss:
case Intrinsic::x86_sse_comile_ss:
case Intrinsic::x86_sse_comigt_ss:
case Intrinsic::x86_sse_comige_ss:
case Intrinsic::x86_sse_comineq_ss:
case Intrinsic::x86_sse_ucomieq_ss:
case Intrinsic::x86_sse_ucomilt_ss:
case Intrinsic::x86_sse_ucomile_ss:
case Intrinsic::x86_sse_ucomigt_ss:
case Intrinsic::x86_sse_ucomige_ss:
case Intrinsic::x86_sse_ucomineq_ss:
case Intrinsic::x86_sse2_comieq_sd:
case Intrinsic::x86_sse2_comilt_sd:
case Intrinsic::x86_sse2_comile_sd:
case Intrinsic::x86_sse2_comigt_sd:
case Intrinsic::x86_sse2_comige_sd:
case Intrinsic::x86_sse2_comineq_sd:
case Intrinsic::x86_sse2_ucomieq_sd:
case Intrinsic::x86_sse2_ucomilt_sd:
case Intrinsic::x86_sse2_ucomile_sd:
case Intrinsic::x86_sse2_ucomigt_sd:
case Intrinsic::x86_sse2_ucomige_sd:
case Intrinsic::x86_sse2_ucomineq_sd: {
unsigned Opc;
ISD::CondCode CC;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_comieq_ss:
case Intrinsic::x86_sse2_comieq_sd:
Opc = X86ISD::COMI;
CC = ISD::SETEQ;
break;
case Intrinsic::x86_sse_comilt_ss:
case Intrinsic::x86_sse2_comilt_sd:
Opc = X86ISD::COMI;
CC = ISD::SETLT;
break;
case Intrinsic::x86_sse_comile_ss:
case Intrinsic::x86_sse2_comile_sd:
Opc = X86ISD::COMI;
CC = ISD::SETLE;
break;
case Intrinsic::x86_sse_comigt_ss:
case Intrinsic::x86_sse2_comigt_sd:
Opc = X86ISD::COMI;
CC = ISD::SETGT;
break;
case Intrinsic::x86_sse_comige_ss:
case Intrinsic::x86_sse2_comige_sd:
Opc = X86ISD::COMI;
CC = ISD::SETGE;
break;
case Intrinsic::x86_sse_comineq_ss:
case Intrinsic::x86_sse2_comineq_sd:
Opc = X86ISD::COMI;
CC = ISD::SETNE;
break;
case Intrinsic::x86_sse_ucomieq_ss:
case Intrinsic::x86_sse2_ucomieq_sd:
Opc = X86ISD::UCOMI;
CC = ISD::SETEQ;
break;
case Intrinsic::x86_sse_ucomilt_ss:
case Intrinsic::x86_sse2_ucomilt_sd:
Opc = X86ISD::UCOMI;
CC = ISD::SETLT;
break;
case Intrinsic::x86_sse_ucomile_ss:
case Intrinsic::x86_sse2_ucomile_sd:
Opc = X86ISD::UCOMI;
CC = ISD::SETLE;
break;
case Intrinsic::x86_sse_ucomigt_ss:
case Intrinsic::x86_sse2_ucomigt_sd:
Opc = X86ISD::UCOMI;
CC = ISD::SETGT;
break;
case Intrinsic::x86_sse_ucomige_ss:
case Intrinsic::x86_sse2_ucomige_sd:
Opc = X86ISD::UCOMI;
CC = ISD::SETGE;
break;
case Intrinsic::x86_sse_ucomineq_ss:
case Intrinsic::x86_sse2_ucomineq_sd:
Opc = X86ISD::UCOMI;
CC = ISD::SETNE;
break;
}
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
// Arithmetic intrinsics.
case Intrinsic::x86_sse2_pmulu_dq:
case Intrinsic::x86_avx2_pmulu_dq:
return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
// SSE2/AVX2 sub with unsigned saturation intrinsics
case Intrinsic::x86_sse2_psubus_b:
case Intrinsic::x86_sse2_psubus_w:
case Intrinsic::x86_avx2_psubus_b:
case Intrinsic::x86_avx2_psubus_w:
return DAG.getNode(X86ISD::SUBUS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
// SSE3/AVX horizontal add/sub intrinsics
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
case Intrinsic::x86_avx_hadd_ps_256:
case Intrinsic::x86_avx_hadd_pd_256:
case Intrinsic::x86_sse3_hsub_ps:
case Intrinsic::x86_sse3_hsub_pd:
case Intrinsic::x86_avx_hsub_ps_256:
case Intrinsic::x86_avx_hsub_pd_256:
case Intrinsic::x86_ssse3_phadd_w_128:
case Intrinsic::x86_ssse3_phadd_d_128:
case Intrinsic::x86_avx2_phadd_w:
case Intrinsic::x86_avx2_phadd_d:
case Intrinsic::x86_ssse3_phsub_w_128:
case Intrinsic::x86_ssse3_phsub_d_128:
case Intrinsic::x86_avx2_phsub_w:
case Intrinsic::x86_avx2_phsub_d: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
case Intrinsic::x86_avx_hadd_ps_256:
case Intrinsic::x86_avx_hadd_pd_256:
Opcode = X86ISD::FHADD;
break;
case Intrinsic::x86_sse3_hsub_ps:
case Intrinsic::x86_sse3_hsub_pd:
case Intrinsic::x86_avx_hsub_ps_256:
case Intrinsic::x86_avx_hsub_pd_256:
Opcode = X86ISD::FHSUB;
break;
case Intrinsic::x86_ssse3_phadd_w_128:
case Intrinsic::x86_ssse3_phadd_d_128:
case Intrinsic::x86_avx2_phadd_w:
case Intrinsic::x86_avx2_phadd_d:
Opcode = X86ISD::HADD;
break;
case Intrinsic::x86_ssse3_phsub_w_128:
case Intrinsic::x86_ssse3_phsub_d_128:
case Intrinsic::x86_avx2_phsub_w:
case Intrinsic::x86_avx2_phsub_d:
Opcode = X86ISD::HSUB;
break;
}
return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
// SSE2/SSE41/AVX2 integer max/min intrinsics.
case Intrinsic::x86_sse2_pmaxu_b:
case Intrinsic::x86_sse41_pmaxuw:
case Intrinsic::x86_sse41_pmaxud:
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
case Intrinsic::x86_avx512_pmaxu_d:
case Intrinsic::x86_avx512_pmaxu_q:
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_sse41_pminuw:
case Intrinsic::x86_sse41_pminud:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
case Intrinsic::x86_avx512_pminu_d:
case Intrinsic::x86_avx512_pminu_q:
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_sse2_pmaxs_w:
case Intrinsic::x86_sse41_pmaxsd:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
case Intrinsic::x86_avx512_pmaxs_d:
case Intrinsic::x86_avx512_pmaxs_q:
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_sse2_pmins_w:
case Intrinsic::x86_sse41_pminsd:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d:
case Intrinsic::x86_avx512_pmins_d:
case Intrinsic::x86_avx512_pmins_q: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse2_pmaxu_b:
case Intrinsic::x86_sse41_pmaxuw:
case Intrinsic::x86_sse41_pmaxud:
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
case Intrinsic::x86_avx512_pmaxu_d:
case Intrinsic::x86_avx512_pmaxu_q:
Opcode = X86ISD::UMAX;
break;
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_sse41_pminuw:
case Intrinsic::x86_sse41_pminud:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
case Intrinsic::x86_avx512_pminu_d:
case Intrinsic::x86_avx512_pminu_q:
Opcode = X86ISD::UMIN;
break;
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_sse2_pmaxs_w:
case Intrinsic::x86_sse41_pmaxsd:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
case Intrinsic::x86_avx512_pmaxs_d:
case Intrinsic::x86_avx512_pmaxs_q:
Opcode = X86ISD::SMAX;
break;
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_sse2_pmins_w:
case Intrinsic::x86_sse41_pminsd:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d:
case Intrinsic::x86_avx512_pmins_d:
case Intrinsic::x86_avx512_pmins_q:
Opcode = X86ISD::SMIN;
break;
}
return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
// SSE/SSE2/AVX floating point max/min intrinsics.
case Intrinsic::x86_sse_max_ps:
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
case Intrinsic::x86_avx512_max_ps_512:
case Intrinsic::x86_avx512_max_pd_512:
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256:
case Intrinsic::x86_avx512_min_ps_512:
case Intrinsic::x86_avx512_min_pd_512: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_max_ps:
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
case Intrinsic::x86_avx512_max_ps_512:
case Intrinsic::x86_avx512_max_pd_512:
Opcode = X86ISD::FMAX;
break;
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256:
case Intrinsic::x86_avx512_min_ps_512:
case Intrinsic::x86_avx512_min_pd_512:
Opcode = X86ISD::FMIN;
break;
}
return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
// AVX2 variable shift intrinsics
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
case Intrinsic::x86_avx2_psllv_d_256:
case Intrinsic::x86_avx2_psllv_q_256:
case Intrinsic::x86_avx2_psrlv_d:
case Intrinsic::x86_avx2_psrlv_q:
case Intrinsic::x86_avx2_psrlv_d_256:
case Intrinsic::x86_avx2_psrlv_q_256:
case Intrinsic::x86_avx2_psrav_d:
case Intrinsic::x86_avx2_psrav_d_256: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
case Intrinsic::x86_avx2_psllv_d_256:
case Intrinsic::x86_avx2_psllv_q_256:
Opcode = ISD::SHL;
break;
case Intrinsic::x86_avx2_psrlv_d:
case Intrinsic::x86_avx2_psrlv_q:
case Intrinsic::x86_avx2_psrlv_d_256:
case Intrinsic::x86_avx2_psrlv_q_256:
Opcode = ISD::SRL;
break;
case Intrinsic::x86_avx2_psrav_d:
case Intrinsic::x86_avx2_psrav_d_256:
Opcode = ISD::SRA;
break;
}
return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_psign_b_128:
case Intrinsic::x86_ssse3_psign_w_128:
case Intrinsic::x86_ssse3_psign_d_128:
case Intrinsic::x86_avx2_psign_b:
case Intrinsic::x86_avx2_psign_w:
case Intrinsic::x86_avx2_psign_d:
return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse41_insertps:
return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::x86_avx_vperm2f128_ps_256:
case Intrinsic::x86_avx_vperm2f128_pd_256:
case Intrinsic::x86_avx_vperm2f128_si_256:
case Intrinsic::x86_avx2_vperm2i128:
return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
// but second operand for node/instruction.
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
case Intrinsic::x86_sse_sqrt_ps:
case Intrinsic::x86_sse2_sqrt_pd:
case Intrinsic::x86_avx_sqrt_ps_256:
case Intrinsic::x86_avx_sqrt_pd_256:
return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1));
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestz_256:
case Intrinsic::x86_avx_ptestc_256:
case Intrinsic::x86_avx_ptestnzc_256:
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestc_ps:
case Intrinsic::x86_avx_vtestnzc_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
bool IsTestPacked = false;
unsigned X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_avx_ptestz_256:
// ZF = 1
X86CC = X86::COND_E;
break;
case Intrinsic::x86_avx_vtestc_ps:
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestc_pd_256:
IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_avx_ptestc_256:
// CF = 1
X86CC = X86::COND_B;
break;
case Intrinsic::x86_avx_vtestnzc_ps:
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestnzc_pd_256:
IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestnzc_256:
// ZF and CF = 0
X86CC = X86::COND_A;
break;
}
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case Intrinsic::x86_avx512_kortestz:
case Intrinsic::x86_avx512_kortestc: {
unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B;
SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
// SSE/AVX shift intrinsics
case Intrinsic::x86_sse2_psll_w:
case Intrinsic::x86_sse2_psll_d:
case Intrinsic::x86_sse2_psll_q:
case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_psll_d:
case Intrinsic::x86_avx2_psll_q:
case Intrinsic::x86_sse2_psrl_w:
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
case Intrinsic::x86_avx2_psrl_w:
case Intrinsic::x86_avx2_psrl_d:
case Intrinsic::x86_avx2_psrl_q:
case Intrinsic::x86_sse2_psra_w:
case Intrinsic::x86_sse2_psra_d:
case Intrinsic::x86_avx2_psra_w:
case Intrinsic::x86_avx2_psra_d: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse2_psll_w:
case Intrinsic::x86_sse2_psll_d:
case Intrinsic::x86_sse2_psll_q:
case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_psll_d:
case Intrinsic::x86_avx2_psll_q:
Opcode = X86ISD::VSHL;
break;
case Intrinsic::x86_sse2_psrl_w:
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
case Intrinsic::x86_avx2_psrl_w:
case Intrinsic::x86_avx2_psrl_d:
case Intrinsic::x86_avx2_psrl_q:
Opcode = X86ISD::VSRL;
break;
case Intrinsic::x86_sse2_psra_w:
case Intrinsic::x86_sse2_psra_d:
case Intrinsic::x86_avx2_psra_w:
case Intrinsic::x86_avx2_psra_d:
Opcode = X86ISD::VSRA;
break;
}
return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
// SSE/AVX immediate shift intrinsics
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_sse2_psrli_d:
case Intrinsic::x86_sse2_psrli_q:
case Intrinsic::x86_avx2_psrli_w:
case Intrinsic::x86_avx2_psrli_d:
case Intrinsic::x86_avx2_psrli_q:
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
case Intrinsic::x86_avx2_psrai_w:
case Intrinsic::x86_avx2_psrai_d: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
Opcode = X86ISD::VSHLI;
break;
case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_sse2_psrli_d:
case Intrinsic::x86_sse2_psrli_q:
case Intrinsic::x86_avx2_psrli_w:
case Intrinsic::x86_avx2_psrli_d:
case Intrinsic::x86_avx2_psrli_q:
Opcode = X86ISD::VSRLI;
break;
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
case Intrinsic::x86_avx2_psrai_w:
case Intrinsic::x86_avx2_psrai_d:
Opcode = X86ISD::VSRAI;
break;
}
return getTargetVShiftNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
}
case Intrinsic::x86_sse42_pcmpistria128:
case Intrinsic::x86_sse42_pcmpestria128:
case Intrinsic::x86_sse42_pcmpistric128:
case Intrinsic::x86_sse42_pcmpestric128:
case Intrinsic::x86_sse42_pcmpistrio128:
case Intrinsic::x86_sse42_pcmpestrio128:
case Intrinsic::x86_sse42_pcmpistris128:
case Intrinsic::x86_sse42_pcmpestris128:
case Intrinsic::x86_sse42_pcmpistriz128:
case Intrinsic::x86_sse42_pcmpestriz128: {
unsigned Opcode;
unsigned X86CC;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse42_pcmpistria128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_A;
break;
case Intrinsic::x86_sse42_pcmpestria128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_A;
break;
case Intrinsic::x86_sse42_pcmpistric128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_B;
break;
case Intrinsic::x86_sse42_pcmpestric128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_B;
break;
case Intrinsic::x86_sse42_pcmpistrio128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_O;
break;
case Intrinsic::x86_sse42_pcmpestrio128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_O;
break;
case Intrinsic::x86_sse42_pcmpistris128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_S;
break;
case Intrinsic::x86_sse42_pcmpestris128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_S;
break;
case Intrinsic::x86_sse42_pcmpistriz128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_E;
break;
case Intrinsic::x86_sse42_pcmpestriz128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_E;
break;
}
SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8),
SDValue(PCMP.getNode(), 1));
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case Intrinsic::x86_sse42_pcmpistri128:
case Intrinsic::x86_sse42_pcmpestri128: {
unsigned Opcode;
if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
Opcode = X86ISD::PCMPISTRI;
else
Opcode = X86ISD::PCMPESTRI;
SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
}
case Intrinsic::x86_fma_vfmadd_ps:
case Intrinsic::x86_fma_vfmadd_pd:
case Intrinsic::x86_fma_vfmsub_ps:
case Intrinsic::x86_fma_vfmsub_pd:
case Intrinsic::x86_fma_vfnmadd_ps:
case Intrinsic::x86_fma_vfnmadd_pd:
case Intrinsic::x86_fma_vfnmsub_ps:
case Intrinsic::x86_fma_vfnmsub_pd:
case Intrinsic::x86_fma_vfmaddsub_ps:
case Intrinsic::x86_fma_vfmaddsub_pd:
case Intrinsic::x86_fma_vfmsubadd_ps:
case Intrinsic::x86_fma_vfmsubadd_pd:
case Intrinsic::x86_fma_vfmadd_ps_256:
case Intrinsic::x86_fma_vfmadd_pd_256:
case Intrinsic::x86_fma_vfmsub_ps_256:
case Intrinsic::x86_fma_vfmsub_pd_256:
case Intrinsic::x86_fma_vfnmadd_ps_256:
case Intrinsic::x86_fma_vfnmadd_pd_256:
case Intrinsic::x86_fma_vfnmsub_ps_256:
case Intrinsic::x86_fma_vfnmsub_pd_256:
case Intrinsic::x86_fma_vfmaddsub_ps_256:
case Intrinsic::x86_fma_vfmaddsub_pd_256:
case Intrinsic::x86_fma_vfmsubadd_ps_256:
case Intrinsic::x86_fma_vfmsubadd_pd_256:
case Intrinsic::x86_fma_vfmadd_ps_512:
case Intrinsic::x86_fma_vfmadd_pd_512:
case Intrinsic::x86_fma_vfmsub_ps_512:
case Intrinsic::x86_fma_vfmsub_pd_512:
case Intrinsic::x86_fma_vfnmadd_ps_512:
case Intrinsic::x86_fma_vfnmadd_pd_512:
case Intrinsic::x86_fma_vfnmsub_ps_512:
case Intrinsic::x86_fma_vfnmsub_pd_512:
case Intrinsic::x86_fma_vfmaddsub_ps_512:
case Intrinsic::x86_fma_vfmaddsub_pd_512:
case Intrinsic::x86_fma_vfmsubadd_ps_512:
case Intrinsic::x86_fma_vfmsubadd_pd_512: {
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_fma_vfmadd_ps:
case Intrinsic::x86_fma_vfmadd_pd:
case Intrinsic::x86_fma_vfmadd_ps_256:
case Intrinsic::x86_fma_vfmadd_pd_256:
case Intrinsic::x86_fma_vfmadd_ps_512:
case Intrinsic::x86_fma_vfmadd_pd_512:
Opc = X86ISD::FMADD;
break;
case Intrinsic::x86_fma_vfmsub_ps:
case Intrinsic::x86_fma_vfmsub_pd:
case Intrinsic::x86_fma_vfmsub_ps_256:
case Intrinsic::x86_fma_vfmsub_pd_256:
case Intrinsic::x86_fma_vfmsub_ps_512:
case Intrinsic::x86_fma_vfmsub_pd_512:
Opc = X86ISD::FMSUB;
break;
case Intrinsic::x86_fma_vfnmadd_ps:
case Intrinsic::x86_fma_vfnmadd_pd:
case Intrinsic::x86_fma_vfnmadd_ps_256:
case Intrinsic::x86_fma_vfnmadd_pd_256:
case Intrinsic::x86_fma_vfnmadd_ps_512:
case Intrinsic::x86_fma_vfnmadd_pd_512:
Opc = X86ISD::FNMADD;
break;
case Intrinsic::x86_fma_vfnmsub_ps:
case Intrinsic::x86_fma_vfnmsub_pd:
case Intrinsic::x86_fma_vfnmsub_ps_256:
case Intrinsic::x86_fma_vfnmsub_pd_256:
case Intrinsic::x86_fma_vfnmsub_ps_512:
case Intrinsic::x86_fma_vfnmsub_pd_512:
Opc = X86ISD::FNMSUB;
break;
case Intrinsic::x86_fma_vfmaddsub_ps:
case Intrinsic::x86_fma_vfmaddsub_pd:
case Intrinsic::x86_fma_vfmaddsub_ps_256:
case Intrinsic::x86_fma_vfmaddsub_pd_256:
case Intrinsic::x86_fma_vfmaddsub_ps_512:
case Intrinsic::x86_fma_vfmaddsub_pd_512:
Opc = X86ISD::FMADDSUB;
break;
case Intrinsic::x86_fma_vfmsubadd_ps:
case Intrinsic::x86_fma_vfmsubadd_pd:
case Intrinsic::x86_fma_vfmsubadd_ps_256:
case Intrinsic::x86_fma_vfmsubadd_pd_256:
case Intrinsic::x86_fma_vfmsubadd_ps_512:
case Intrinsic::x86_fma_vfmsubadd_pd_512:
Opc = X86ISD::FMSUBADD;
break;
}
return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
}
}
static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Base, SDValue Index,
SDValue ScaleOp, SDValue Chain,
const X86Subtarget * Subtarget) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getValueType().getVectorNumElements());
SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl);
}
static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget * Subtarget) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getValueType().getVectorNumElements());
SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
if (Src.getOpcode() == ISD::UNDEF)
Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl);
}
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Base, SDValue Index,
SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getValueType().getVectorNumElements());
SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
return SDValue(Res, 1);
}
static SDValue getMScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getValueType().getVectorNumElements());
SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
return SDValue(Res, 1);
}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
// RDRAND/RDSEED intrinsics.
case Intrinsic::x86_rdrand_16:
case Intrinsic::x86_rdrand_32:
case Intrinsic::x86_rdrand_64:
case Intrinsic::x86_rdseed_16:
case Intrinsic::x86_rdseed_32:
case Intrinsic::x86_rdseed_64: {
unsigned Opcode = (IntNo == Intrinsic::x86_rdseed_16 ||
IntNo == Intrinsic::x86_rdseed_32 ||
IntNo == Intrinsic::x86_rdseed_64) ? X86ISD::RDSEED :
X86ISD::RDRAND;
// Emit the node with the right value type.
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
SDValue Result = DAG.getNode(Opcode, dl, VTs, Op.getOperand(0));
// If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
// Otherwise return the value from Rand, which is always 0, casted to i32.
SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
DAG.getConstant(1, Op->getValueType(1)),
DAG.getConstant(X86::COND_B, MVT::i32),
SDValue(Result.getNode(), 1) };
SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
DAG.getVTList(Op->getValueType(1), MVT::Glue),
Ops, array_lengthof(Ops));
// Return { result, isValid, chain }.
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
//int_gather(index, base, scale);
case Intrinsic::x86_avx512_gather_qpd_512:
case Intrinsic::x86_avx512_gather_qps_512:
case Intrinsic::x86_avx512_gather_dpd_512:
case Intrinsic::x86_avx512_gather_qpi_512:
case Intrinsic::x86_avx512_gather_qpq_512:
case Intrinsic::x86_avx512_gather_dpq_512:
case Intrinsic::x86_avx512_gather_dps_512:
case Intrinsic::x86_avx512_gather_dpi_512: {
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break;
case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break;
case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break;
case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break;
case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break;
case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break;
case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break;
case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break;
}
SDValue Chain = Op.getOperand(0);
SDValue Index = Op.getOperand(2);
SDValue Base = Op.getOperand(3);
SDValue Scale = Op.getOperand(4);
return getGatherNode(Opc, Op, DAG, Base, Index, Scale, Chain, Subtarget);
}
//int_gather_mask(v1, mask, index, base, scale);
case Intrinsic::x86_avx512_gather_qps_mask_512:
case Intrinsic::x86_avx512_gather_qpd_mask_512:
case Intrinsic::x86_avx512_gather_dpd_mask_512:
case Intrinsic::x86_avx512_gather_dps_mask_512:
case Intrinsic::x86_avx512_gather_qpi_mask_512:
case Intrinsic::x86_avx512_gather_qpq_mask_512:
case Intrinsic::x86_avx512_gather_dpi_mask_512:
case Intrinsic::x86_avx512_gather_dpq_mask_512: {
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::x86_avx512_gather_qps_mask_512:
Opc = X86::VGATHERQPSZrm; break;
case Intrinsic::x86_avx512_gather_qpd_mask_512:
Opc = X86::VGATHERQPDZrm; break;
case Intrinsic::x86_avx512_gather_dpd_mask_512:
Opc = X86::VGATHERDPDZrm; break;
case Intrinsic::x86_avx512_gather_dps_mask_512:
Opc = X86::VGATHERDPSZrm; break;
case Intrinsic::x86_avx512_gather_qpi_mask_512:
Opc = X86::VPGATHERQDZrm; break;
case Intrinsic::x86_avx512_gather_qpq_mask_512:
Opc = X86::VPGATHERQQZrm; break;
case Intrinsic::x86_avx512_gather_dpi_mask_512:
Opc = X86::VPGATHERDDZrm; break;
case Intrinsic::x86_avx512_gather_dpq_mask_512:
Opc = X86::VPGATHERDQZrm; break;
}
SDValue Chain = Op.getOperand(0);
SDValue Src = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
SDValue Base = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getMGatherNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
Subtarget);
}
//int_scatter(base, index, v1, scale);
case Intrinsic::x86_avx512_scatter_qpd_512:
case Intrinsic::x86_avx512_scatter_qps_512:
case Intrinsic::x86_avx512_scatter_dpd_512:
case Intrinsic::x86_avx512_scatter_qpi_512:
case Intrinsic::x86_avx512_scatter_qpq_512:
case Intrinsic::x86_avx512_scatter_dpq_512:
case Intrinsic::x86_avx512_scatter_dps_512:
case Intrinsic::x86_avx512_scatter_dpi_512: {
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::x86_avx512_scatter_qpd_512:
Opc = X86::VSCATTERQPDZmr; break;
case Intrinsic::x86_avx512_scatter_qps_512:
Opc = X86::VSCATTERQPSZmr; break;
case Intrinsic::x86_avx512_scatter_dpd_512:
Opc = X86::VSCATTERDPDZmr; break;
case Intrinsic::x86_avx512_scatter_dps_512:
Opc = X86::VSCATTERDPSZmr; break;
case Intrinsic::x86_avx512_scatter_qpi_512:
Opc = X86::VPSCATTERQDZmr; break;
case Intrinsic::x86_avx512_scatter_qpq_512:
Opc = X86::VPSCATTERQQZmr; break;
case Intrinsic::x86_avx512_scatter_dpq_512:
Opc = X86::VPSCATTERDQZmr; break;
case Intrinsic::x86_avx512_scatter_dpi_512:
Opc = X86::VPSCATTERDDZmr; break;
}
SDValue Chain = Op.getOperand(0);
SDValue Base = Op.getOperand(2);
SDValue Index = Op.getOperand(3);
SDValue Src = Op.getOperand(4);
SDValue Scale = Op.getOperand(5);
return getScatterNode(Opc, Op, DAG, Src, Base, Index, Scale, Chain);
}
//int_scatter_mask(base, mask, index, v1, scale);
case Intrinsic::x86_avx512_scatter_qps_mask_512:
case Intrinsic::x86_avx512_scatter_qpd_mask_512:
case Intrinsic::x86_avx512_scatter_dpd_mask_512:
case Intrinsic::x86_avx512_scatter_dps_mask_512:
case Intrinsic::x86_avx512_scatter_qpi_mask_512:
case Intrinsic::x86_avx512_scatter_qpq_mask_512:
case Intrinsic::x86_avx512_scatter_dpi_mask_512:
case Intrinsic::x86_avx512_scatter_dpq_mask_512: {
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::x86_avx512_scatter_qpd_mask_512:
Opc = X86::VSCATTERQPDZmr; break;
case Intrinsic::x86_avx512_scatter_qps_mask_512:
Opc = X86::VSCATTERQPSZmr; break;
case Intrinsic::x86_avx512_scatter_dpd_mask_512:
Opc = X86::VSCATTERDPDZmr; break;
case Intrinsic::x86_avx512_scatter_dps_mask_512:
Opc = X86::VSCATTERDPSZmr; break;
case Intrinsic::x86_avx512_scatter_qpi_mask_512:
Opc = X86::VPSCATTERQDZmr; break;
case Intrinsic::x86_avx512_scatter_qpq_mask_512:
Opc = X86::VPSCATTERQQZmr; break;
case Intrinsic::x86_avx512_scatter_dpq_mask_512:
Opc = X86::VPSCATTERDQZmr; break;
case Intrinsic::x86_avx512_scatter_dpi_mask_512:
Opc = X86::VPSCATTERDDZmr; break;
}
SDValue Chain = Op.getOperand(0);
SDValue Base = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
SDValue Src = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getMScatterNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
}
// XTEST intrinsics.
case Intrinsic::x86_xtest: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0));
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86::COND_NE, MVT::i8),
InTrans);
SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
Ret, SDValue(InTrans.getNode(), 1));
}
}
}
SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setReturnAddressIsTaken(true);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
EVT PtrVT = getPointerTy();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, PtrVT,
FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
RetAddrFI, MachinePointerInfo(), false, false, false, 0);
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
"Invalid Frame Register!");
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo(),
false, false, false, 0);
return FrameAddr;
}
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
SDLoc dl (Op);
EVT PtrVT = getPointerTy();
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
"Invalid Frame Register!");
SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
DAG.getIntPtrConstant(RegInfo->getSlotSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain,
DAG.getRegister(StoreAddrReg, PtrVT));
}
SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
DAG.getVTList(MVT::i32, MVT::Other),
Op.getOperand(0), Op.getOperand(1));
}
SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
Op.getOperand(0), Op.getOperand(1));
}
static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
return Op.getOperand(0);
}
SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
SDValue Root = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
SDLoc dl (Op);
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
if (Subtarget->is64Bit()) {
SDValue OutChains[6];
// Large code-model.
const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7;
const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
// Load the pointer to the nested function into R11.
unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
SDValue Addr = Trmp;
OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
Addr, MachinePointerInfo(TrmpAddr),
false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(2, MVT::i64));
OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
MachinePointerInfo(TrmpAddr, 2),
false, false, 2);
// Load the 'nest' parameter value into R10.
// R10 is specified in X86CallingConv.td
OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(10, MVT::i64));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
Addr, MachinePointerInfo(TrmpAddr, 10),
false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(12, MVT::i64));
OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
MachinePointerInfo(TrmpAddr, 12),
false, false, 2);
// Jump to the nested function.
OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(20, MVT::i64));
OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
Addr, MachinePointerInfo(TrmpAddr, 20),
false, false, 0);
unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(22, MVT::i64));
OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
MachinePointerInfo(TrmpAddr, 22),
false, false, 0);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6);
} else {
const Function *Func =
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
CallingConv::ID CC = Func->getCallingConv();
unsigned NestReg;
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
case CallingConv::C:
case CallingConv::X86_StdCall: {
// Pass 'nest' parameter in ECX.
// Must be kept in sync with X86CallingConv.td
NestReg = X86::ECX;
// Check that ECX wasn't needed by an 'inreg' parameter.
FunctionType *FTy = Func->getFunctionType();
const AttributeSet &Attrs = Func->getAttributes();
if (!Attrs.isEmpty() && !Func->isVarArg()) {
unsigned InRegCount = 0;
unsigned Idx = 1;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
if (Attrs.hasAttribute(Idx, Attribute::InReg))
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
if (InRegCount > 2) {
report_fatal_error("Nest register in use - reduce number of inreg"
" parameters!");
}
}
break;
}
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
case CallingConv::Fast:
// Pass 'nest' parameter in EAX.
// Must be kept in sync with X86CallingConv.td
NestReg = X86::EAX;
break;
}
SDValue OutChains[4];
SDValue Addr, Disp;
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(10, MVT::i32));
Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
// This is storing the opcode for MOV32ri.
const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;
OutChains[0] = DAG.getStore(Root, dl,
DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, MachinePointerInfo(TrmpAddr),
false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(1, MVT::i32));
OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
MachinePointerInfo(TrmpAddr, 1),
false, false, 1);
const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(5, MVT::i32));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
MachinePointerInfo(TrmpAddr, 5),
false, false, 1);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(6, MVT::i32));
OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
MachinePointerInfo(TrmpAddr, 6),
false, false, 1);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4);
}
}
SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
/*
The rounding mode is in bits 11:10 of FPSR, and has the following
settings:
00 Round to nearest
01 Round to -inf
10 Round to +inf
11 Round to 0
FLT_ROUNDS, on the other hand, expects the following:
-1 Undefined
0 Round to 0
1 Round to nearest
2 Round to +inf
3 Round to -inf
To perform the conversion, we do:
(((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
*/
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
const TargetFrameLowering &TFI = *TM.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
EVT VT = Op.getValueType();
SDLoc DL(Op);
// Save FP Control Word to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, 2, 2);
SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
DAG.getVTList(MVT::Other),
Ops, array_lengthof(Ops), MVT::i16,
MMO);
// Load FP Control Word from stack slot
SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
MachinePointerInfo(), false, false, false, 0);
// Transform as necessary
SDValue CWD1 =
DAG.getNode(ISD::SRL, DL, MVT::i16,
DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x800, MVT::i16)),
DAG.getConstant(11, MVT::i8));
SDValue CWD2 =
DAG.getNode(ISD::SRL, DL, MVT::i16,
DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x400, MVT::i16)),
DAG.getConstant(9, MVT::i8));
SDValue RetVal =
DAG.getNode(ISD::AND, DL, MVT::i16,
DAG.getNode(ISD::ADD, DL, MVT::i16,
DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
DAG.getConstant(1, MVT::i16)),
DAG.getConstant(3, MVT::i16));
return DAG.getNode((VT.getSizeInBits() < 16 ?
ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
Op = Op.getOperand(0);
if (VT == MVT::i8) {
// Zero extend to i32 since there is not an i8 bsr.
OpVT = MVT::i32;
Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
}
// Issue a bsr (scan bits in reverse) which also sets EFLAGS.
SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
// If src is zero (i.e. bsr sets ZF), returns NumBits.
SDValue Ops[] = {
Op,
DAG.getConstant(NumBits+NumBits-1, OpVT),
DAG.getConstant(X86::COND_E, MVT::i8),
Op.getValue(1)
};
Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
// Finally xor with NumBits-1.
Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
if (VT == MVT::i8)
Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
return Op;
}
static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
Op = Op.getOperand(0);
if (VT == MVT::i8) {
// Zero extend to i32 since there is not an i8 bsr.
OpVT = MVT::i32;
Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
}
// Issue a bsr (scan bits in reverse).
SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
// And xor with NumBits-1.
Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
if (VT == MVT::i8)
Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
return Op;
}
static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
Op = Op.getOperand(0);
// Issue a bsf (scan bits forward) which also sets EFLAGS.
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
// If src is zero (i.e. bsf sets ZF), returns NumBits.
SDValue Ops[] = {
Op,
DAG.getConstant(NumBits, VT),
DAG.getConstant(X86::COND_E, MVT::i8),
Op.getValue(1)
};
return DAG.getNode(X86ISD::CMOV, dl, VT, Ops, array_lengthof(Ops));
}
// Lower256IntArith - Break a 256-bit integer operation into two new 128-bit
// ones, and then concatenate the result back.
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
assert(VT.is256BitVector() && VT.isInteger() &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
SDLoc dl(Op);
// Extract the LHS vectors
SDValue LHS = Op.getOperand(0);
SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
// Extract the RHS vectors
SDValue RHS = Op.getOperand(1);
SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
}
static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
assert(Op.getValueType().is256BitVector() &&
Op.getValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
assert(Op.getValueType().is256BitVector() &&
Op.getValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
EVT VT = Op.getValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntArith(Op, DAG);
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
// Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
if (VT == MVT::v4i32) {
assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() &&
"Should not custom lower when pmuldq is available!");
// Extract the odd parts.
static const int UnpackMask[] = { 1, -1, 3, -1 };
SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
// Multiply the even parts.
SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B);
// Now multiply odd parts.
SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);
Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens);
Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds);
// Merge the two vectors back together with a shuffle. This expands into 2
// shuffles.
static const int ShufMask[] = { 0, 4, 2, 6 };
return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
}
assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
"Only know how to lower V2I64/V4I64/V8I64 multiply");
// Ahi = psrlqi(a, 32);
// Bhi = psrlqi(b, 32);
//
// AloBlo = pmuludq(a, b);
// AloBhi = pmuludq(a, Bhi);
// AhiBlo = pmuludq(Ahi, b);
// AloBhi = psllqi(AloBhi, 32);
// AhiBlo = psllqi(AhiBlo, 32);
// return AloBlo + AloBhi + AhiBlo;
SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
// Bit cast to 32-bit vectors for MULUDQ
EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
(VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi);
SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT EltTy = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
// Lower sdiv X, pow2-const.
BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
if (!C)
return SDValue();
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (!C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs) ||
EltTy.getSizeInBits() < SplatBitSize)
return SDValue();
if ((SplatValue != 0) &&
(SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
unsigned Lg2 = SplatValue.countTrailingZeros();
// Splat the sign bit.
SmallVector<SDValue, 16> Sz(NumElts,
DAG.getConstant(EltTy.getSizeInBits() - 1,
EltTy));
SDValue SGN = DAG.getNode(ISD::SRA, dl, VT, N0,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Sz[0],
NumElts));
// Add (N0 < 0) ? abs2 - 1 : 0;
SmallVector<SDValue, 16> Amt(NumElts,
DAG.getConstant(EltTy.getSizeInBits() - Lg2,
EltTy));
SDValue SRL = DAG.getNode(ISD::SRL, dl, VT, SGN,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Amt[0],
NumElts));
SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
SmallVector<SDValue, 16> Lg2Amt(NumElts, DAG.getConstant(Lg2, EltTy));
SDValue SRA = DAG.getNode(ISD::SRA, dl, VT, ADD,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Lg2Amt[0],
NumElts));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (SplatValue.isNonNegative())
return SRA;
SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy));
SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts);
return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA);
}
return SDValue();
}
static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = Op.getValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
// Optimize shl/srl/sra with constant shift amount.
if (isSplatVector(Amt.getNode())) {
SDValue SclrAmt = Amt->getOperand(0);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
uint64_t ShiftAmt = C->getZExtValue();
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
(VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) ||
(Subtarget->hasAVX512() &&
(VT == MVT::v8i64 || VT == MVT::v16i32))) {
if (Op.getOpcode() == ISD::SHL)
return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
DAG);
if (Op.getOpcode() == ISD::SRL)
return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
DAG);
if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
DAG);
}
if (VT == MVT::v16i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
MVT::v8i16, R, ShiftAmt,
DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 16> V(16,
DAG.getConstant(uint8_t(-1U << ShiftAmt),
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
MVT::v8i16, R, ShiftAmt,
DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 16> V(16,
DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
}
if (Op.getOpcode() == ISD::SRA) {
if (ShiftAmt == 7) {
// R s>> 7 === R s< 0
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
}
// R s>> a === ((R u>> a) ^ m) - m
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
MVT::i8));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
}
llvm_unreachable("Unknown shift opcode.");
}
if (Subtarget->hasInt256() && VT == MVT::v32i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
MVT::v16i16, R, ShiftAmt,
DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 32> V(32,
DAG.getConstant(uint8_t(-1U << ShiftAmt),
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
MVT::v16i16, R, ShiftAmt,
DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 32> V(32,
DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
}
if (Op.getOpcode() == ISD::SRA) {
if (ShiftAmt == 7) {
// R s>> 7 === R s< 0
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
}
// R s>> a === ((R u>> a) ^ m) - m
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
MVT::i8));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
}
llvm_unreachable("Unknown shift opcode.");
}
}
}
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
if (!Subtarget->is64Bit() &&
(VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
unsigned Ratio = Amt.getValueType().getVectorNumElements() /
VT.getVectorNumElements();
unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
uint64_t ShiftAmt = 0;
for (unsigned i = 0; i != Ratio; ++i) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
if (C == 0)
return SDValue();
// 6 == Log2(64)
ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
}
// Check remaining shift amounts.
for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
uint64_t ShAmt = 0;
for (unsigned j = 0; j != Ratio; ++j) {
ConstantSDNode *C =
dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
if (C == 0)
return SDValue();
// 6 == Log2(64)
ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
}
if (ShAmt != ShiftAmt)
return SDValue();
}
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
DAG);
case ISD::SRL:
return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
DAG);
case ISD::SRA:
return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
DAG);
}
}
return SDValue();
}
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget* Subtarget) {
EVT VT = Op.getValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
VT == MVT::v8i32 || VT == MVT::v16i16)) ||
(Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) {
SDValue BaseShAmt;
EVT EltVT = VT.getVectorElementType();
if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
unsigned NumElts = VT.getVectorNumElements();
unsigned i, j;
for (i = 0; i != NumElts; ++i) {
if (Amt.getOperand(i).getOpcode() == ISD::UNDEF)
continue;
break;
}
for (j = i; j != NumElts; ++j) {
SDValue Arg = Amt.getOperand(j);
if (Arg.getOpcode() == ISD::UNDEF) continue;
if (Arg != Amt.getOperand(i))
break;
}
if (i != NumElts && j == NumElts)
BaseShAmt = Amt.getOperand(i);
} else {
if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
Amt = Amt.getOperand(0);
if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE &&
cast<ShuffleVectorSDNode>(Amt)->isSplat()) {
SDValue InVec = Amt.getOperand(0);
if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
unsigned NumElts = InVec.getValueType().getVectorNumElements();
unsigned i = 0;
for (; i != NumElts; ++i) {
SDValue Arg = InVec.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) continue;
BaseShAmt = Arg;
break;
}
} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
unsigned SplatIdx =
cast<ShuffleVectorSDNode>(Amt)->getSplatIndex();
if (C->getZExtValue() == SplatIdx)
BaseShAmt = InVec.getOperand(1);
}
}
if (BaseShAmt.getNode() == 0)
BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
DAG.getIntPtrConstant(0));
}
}
if (BaseShAmt.getNode()) {
if (EltVT.bitsGT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
else if (EltVT.bitsLT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
switch (VT.getSimpleVT().SimpleTy) {
default: return SDValue();
case MVT::v2i64:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v4i64:
case MVT::v8i32:
case MVT::v16i16:
case MVT::v16i32:
case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRA:
switch (VT.getSimpleVT().SimpleTy) {
default: return SDValue();
case MVT::v4i32:
case MVT::v8i16:
case MVT::v8i32:
case MVT::v16i16:
case MVT::v16i32:
case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRL:
switch (VT.getSimpleVT().SimpleTy) {
default: return SDValue();
case MVT::v2i64:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v4i64:
case MVT::v8i32:
case MVT::v16i16:
case MVT::v16i32:
case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
}
}
}
}
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
if (!Subtarget->is64Bit() &&
(VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) ||
(Subtarget->hasAVX512() && VT == MVT::v8i64)) &&
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
unsigned Ratio = Amt.getValueType().getVectorNumElements() /
VT.getVectorNumElements();
std::vector<SDValue> Vals(Ratio);
for (unsigned i = 0; i != Ratio; ++i)
Vals[i] = Amt.getOperand(i);
for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
for (unsigned j = 0; j != Ratio; ++j)
if (Vals[j] != Amt.getOperand(i + j))
return SDValue();
}
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
case ISD::SRL:
return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
case ISD::SRA:
return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
}
}
return SDValue();
}
static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
EVT VT = Op.getValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
SDValue V;
if (!Subtarget->hasSSE2())
return SDValue();
V = LowerScalarImmediateShift(Op, DAG, Subtarget);
if (V.getNode())
return V;
V = LowerScalarVariableShift(Op, DAG, Subtarget);
if (V.getNode())
return V;
if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64))
return Op;
// AVX2 has VPSLLV/VPSRAV/VPSRLV.
if (Subtarget->hasInt256()) {
if (Op.getOpcode() == ISD::SRL &&
(VT == MVT::v2i64 || VT == MVT::v4i32 ||
VT == MVT::v4i64 || VT == MVT::v8i32))
return Op;
if (Op.getOpcode() == ISD::SHL &&
(VT == MVT::v2i64 || VT == MVT::v4i32 ||
VT == MVT::v4i64 || VT == MVT::v8i32))
return Op;
if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
return Op;
}
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
// a = a << 5;
Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
// Turn 'a' into a mask suitable for VSELECT
SDValue VSelM = DAG.getConstant(0x80, VT);
SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
SDValue CM1 = DAG.getConstant(0x0f, VT);
SDValue CM2 = DAG.getConstant(0x3f, VT);
// r = VSELECT(r, psllw(r & (char16)15, 4), a);
SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 4, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
// r = VSELECT(r, psllw(r & (char16)63, 2), a);
M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 2, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
// return VSELECT(r, r+r, a);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
DAG.getNode(ISD::ADD, dl, VT, R, R), R);
return R;
}
// Decompose 256-bit shifts into smaller 128-bit shifts.
if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
// Extract the two vectors
SDValue V1 = Extract128BitVector(R, 0, DAG, dl);
SDValue V2 = Extract128BitVector(R, NumElems/2, DAG, dl);
// Recreate the shift amount vectors
SDValue Amt1, Amt2;
if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
// Constant shift amount
SmallVector<SDValue, 4> Amt1Csts;
SmallVector<SDValue, 4> Amt2Csts;
for (unsigned i = 0; i != NumElems/2; ++i)
Amt1Csts.push_back(Amt->getOperand(i));
for (unsigned i = NumElems/2; i != NumElems; ++i)
Amt2Csts.push_back(Amt->getOperand(i));
Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
&Amt1Csts[0], NumElems/2);
Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
&Amt2Csts[0], NumElems/2);
} else {
// Variable shift amount
Amt1 = Extract128BitVector(Amt, 0, DAG, dl);
Amt2 = Extract128BitVector(Amt, NumElems/2, DAG, dl);
}
// Issue new vector shifts for the smaller types
V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
// Concatenate the result back
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
}
return SDValue();
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
// looks for this combo and may remove the "setcc" instruction if the "setcc"
// has only one use.
SDNode *N = Op.getNode();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
unsigned BaseOp = 0;
unsigned Cond = 0;
SDLoc DL(Op);
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
// A subtract of one will be selected as a INC. Note that INC doesn't
// set CF, so we can't do this for UADDO.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
if (C->isOne()) {
BaseOp = X86ISD::INC;
Cond = X86::COND_O;
break;
}
BaseOp = X86ISD::ADD;
Cond = X86::COND_O;
break;
case ISD::UADDO:
BaseOp = X86ISD::ADD;
Cond = X86::COND_B;
break;
case ISD::SSUBO:
// A subtract of one will be selected as a DEC. Note that DEC doesn't
// set CF, so we can't do this for USUBO.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
if (C->isOne()) {
BaseOp = X86ISD::DEC;
Cond = X86::COND_O;
break;
}
BaseOp = X86ISD::SUB;
Cond = X86::COND_O;
break;
case ISD::USUBO:
BaseOp = X86ISD::SUB;
Cond = X86::COND_B;
break;
case ISD::SMULO:
BaseOp = X86ISD::SMUL;
Cond = X86::COND_O;
break;
case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
MVT::i32);
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
SDValue SetCC =
DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(X86::COND_O, MVT::i32),
SDValue(Sum.getNode(), 2));
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
}
// Also sets EFLAGS.
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
SDValue SetCC =
DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
DAG.getConstant(Cond, MVT::i32),
SDValue(Sum.getNode(), 1));
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
EVT VT = Op.getValueType();
if (!Subtarget->hasSSE2() || !VT.isVector())
return SDValue();
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
switch (VT.getSimpleVT().SimpleTy) {
default: return SDValue();
case MVT::v8i32:
case MVT::v16i16:
if (!Subtarget->hasFp256())
return SDValue();
if (!Subtarget->hasInt256()) {
// needs to be split
unsigned NumElems = VT.getVectorNumElements();
// Extract the LHS vectors
SDValue LHS = Op.getOperand(0);
SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
EVT ExtraEltVT = ExtraVT.getVectorElementType();
unsigned ExtraNumElems = ExtraVT.getVectorNumElements();
ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
ExtraNumElems/2);
SDValue Extra = DAG.getValueType(ExtraVT);
LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);
}
// fall through
case MVT::v4i32:
case MVT::v8i16: {
SDValue Op0 = Op.getOperand(0);
SDValue Op00 = Op0.getOperand(0);
SDValue Tmp1;
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
if (Op0.getOpcode() == ISD::BITCAST &&
Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
// (sext (vzext x)) -> (vsext x)
Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
if (Tmp1.getNode()) {
EVT ExtraEltVT = ExtraVT.getVectorElementType();
// This folding is only valid when the in-reg type is a vector of i8,
// i16, or i32.
if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
ExtraEltVT == MVT::i32) {
SDValue Tmp1Op0 = Tmp1.getOperand(0);
assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
"This optimization is invalid without a VZEXT.");
return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
}
Op0 = Tmp1;
}
}
// If the above didn't work, then just use Shift-Left + Shift-Right.
Tmp1 = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0, BitsDiff,
DAG);
return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Tmp1, BitsDiff,
DAG);
}
}
}
static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
// The only fence that needs an instruction is a sequentially-consistent
// cross-thread fence.
if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
// no-sse2). There isn't any reason to disable it if the target processor
// supports it.
if (Subtarget->hasSSE2() || Subtarget->is64Bit())
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
SDValue Zero = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), // Base
DAG.getTargetConstant(1, MVT::i8), // Scale
DAG.getRegister(0, MVT::i32), // Index
DAG.getTargetConstant(0, MVT::i32), // Disp
DAG.getRegister(0, MVT::i32), // Segment.
Zero,
Chain
};
SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
return SDValue(Res, 0);
}
// MEMBARRIER is a compiler barrier; it codegens to a no-op.
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
}
static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
EVT T = Op.getValueType();
SDLoc DL(Op);
unsigned Reg = 0;
unsigned size = 0;
switch(T.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid value type!");
case MVT::i8: Reg = X86::AL; size = 1; break;
case MVT::i16: Reg = X86::AX; size = 2; break;
case MVT::i32: Reg = X86::EAX; size = 4; break;
case MVT::i64:
assert(Subtarget->is64Bit() && "Node not type legal!");
Reg = X86::RAX; size = 8;
break;
}
SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
Op.getOperand(2), SDValue());
SDValue Ops[] = { cpIn.getValue(0),
Op.getOperand(1),
Op.getOperand(3),
DAG.getTargetConstant(size, MVT::i8),
cpIn.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
Ops, array_lengthof(Ops), T, MMO);
SDValue cpOut =
DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
return cpOut;
}
static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(Subtarget->is64Bit() && "Result not type legalized?");
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = Op.getOperand(0);
SDLoc dl(Op);
SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1));
SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64,
rax.getValue(2));
SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx,
DAG.getConstant(32, MVT::i8));
SDValue Ops[] = {
DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
rdx.getValue(1)
};
return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
}
static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
Subtarget->hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
"Unexpected custom BITCAST");
// i64 <=> MMX conversions are Legal.
if (SrcVT==MVT::i64 && DstVT.isVector())
return Op;
if (DstVT==MVT::i64 && SrcVT.isVector())
return Op;
// MMX <=> MMX conversions are Legal.
if (SrcVT.isVector() && DstVT.isVector())
return Op;
// All other conversions need to be expanded.
return SDValue();
}
static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
SDLoc dl(Node);
EVT T = Node->getValueType(0);
SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
DAG.getConstant(0, T), Node->getOperand(2));
return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
cast<AtomicSDNode>(Node)->getMemoryVT(),
Node->getOperand(0),
Node->getOperand(1), negOp,
cast<AtomicSDNode>(Node)->getSrcValue(),
cast<AtomicSDNode>(Node)->getAlignment(),
cast<AtomicSDNode>(Node)->getOrdering(),
cast<AtomicSDNode>(Node)->getSynchScope());
}
static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
SDLoc dl(Node);
EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
// Convert seq_cst store -> xchg
// Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
// FIXME: On 32-bit, store -> fist or movq would be more efficient
// (The only way to get a 16-byte store is cmpxchg16b)
// FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
if (cast<AtomicSDNode>(Node)->getOrdering() == SequentiallyConsistent ||
!DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
cast<AtomicSDNode>(Node)->getMemoryVT(),
Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2),
cast<AtomicSDNode>(Node)->getMemOperand(),
cast<AtomicSDNode>(Node)->getOrdering(),
cast<AtomicSDNode>(Node)->getSynchScope());
return Swap.getValue(1);
}
// Other atomic stores have a simple pattern.
return Op;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getNode()->getValueType(0);
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
default: llvm_unreachable("Invalid code");
case ISD::ADDC: Opc = X86ISD::ADD; break;
case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
case ISD::SUBC: Opc = X86ISD::SUB; break;
case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
}
if (!ExtraOp)
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1));
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1), Op.getOperand(2));
}
static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
// For MacOSX, we want to call an alternative entry point: __sincos_stret,
// which returns the values as { float, float } (in XMM0) or
// { double, double } (which is returned in XMM0, XMM1).
SDLoc dl(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
bool isF64 = ArgVT == MVT::f64;
// Only optimize x86_64 for now. i386 is a bit messy. For f32,
// the small struct {f32, f32} is returned in (eax, edx). For f64,
// the results are returned via SRet in memory.
const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy());
Type *RetTy = isF64
? (Type*)StructType::get(ArgTy, ArgTy, NULL)
: (Type*)VectorType::get(ArgTy, 4);
TargetLowering::
CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
false, false, false, false, 0,
CallingConv::C, /*isTaillCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed*/true,
Callee, Args, DAG, dl);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
if (isF64)
// Returned in xmm0 and xmm1.
return CallResult.first;
// Returned in bits 0:31 and 32:64 xmm0.
SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
CallResult.first, DAG.getIntPtrConstant(0));
SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
CallResult.first, DAG.getIntPtrConstant(1));
SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
}
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG);
case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::FRAME_TO_ARGS_OFFSET:
return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
case ISD::SDIV: return LowerSDIV(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
}
}
static void ReplaceATOMIC_LOAD(SDNode *Node,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) {
SDLoc dl(Node);
EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
// Convert wide load -> cmpxchg8b/cmpxchg16b
// FIXME: On 32-bit, load -> fild or movq would be more efficient
// (The only way to get a 16-byte load is cmpxchg16b)
// FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
SDValue Zero = DAG.getConstant(0, VT);
SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
Node->getOperand(0),
Node->getOperand(1), Zero, Zero,
cast<AtomicSDNode>(Node)->getMemOperand(),
cast<AtomicSDNode>(Node)->getOrdering(),
cast<AtomicSDNode>(Node)->getSynchScope());
Results.push_back(Swap.getValue(0));
Results.push_back(Swap.getValue(1));
}
static void
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG, unsigned NewOp) {
SDLoc dl(Node);
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
Node->getOperand(2), DAG.getIntPtrConstant(0));
SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
Node->getOperand(2), DAG.getIntPtrConstant(1));
SDValue Ops[] = { Chain, In1, In2L, In2H };
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue Result =
DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, array_lengthof(Ops), MVT::i64,
cast<MemSDNode>(Node)->getMemOperand());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
Results.push_back(Result.getValue(2));
}
/// ReplaceNodeResults - Replace a node with an illegal result type
/// with a new node built out of custom code.
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
SDLoc dl(N);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
case ISD::SIGN_EXTEND_INREG:
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE:
// We don't want to expand or promote these.
return;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
return;
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
if (FIST.getNode() != 0) {
EVT VT = N->getValueType(0);
// Return a load from the stack slot.
if (StackSlot.getNode() != 0)
Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
MachinePointerInfo(),
false, false, false, 0));
else
Results.push_back(FIST);
}
return;
}
case ISD::UINT_TO_FP: {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
if (N->getOperand(0).getValueType() != MVT::v2i32 ||
N->getValueType(0) != MVT::v2f32)
return;
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
N->getOperand(0));
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
MVT::f64);
SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias);
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias));
Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
return;
}
case ISD::FP_ROUND: {
if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
return;
SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
return;
}
case ISD::READCYCLECOUNTER: {
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = N->getOperand(0);
SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
rd.getValue(1));
SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32,
eax.getValue(2));
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
SDValue Ops[] = { eax, edx };
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops,
array_lengthof(Ops)));
Results.push_back(edx.getValue(1));
return;
}
case ISD::ATOMIC_CMP_SWAP: {
EVT T = N->getValueType(0);
assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
bool Regs64bit = T == MVT::i128;
EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
SDValue cpInL, cpInH;
cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
DAG.getConstant(0, HalfT));
cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
DAG.getConstant(1, HalfT));
cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
Regs64bit ? X86::RAX : X86::EAX,
cpInL, SDValue());
cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
Regs64bit ? X86::RDX : X86::EDX,
cpInH, cpInL.getValue(1));
SDValue swapInL, swapInH;
swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
DAG.getConstant(0, HalfT));
swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
DAG.getConstant(1, HalfT));
swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl,
Regs64bit ? X86::RBX : X86::EBX,
swapInL, cpInH.getValue(1));
swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl,
Regs64bit ? X86::RCX : X86::ECX,
swapInH, swapInL.getValue(1));
SDValue Ops[] = { swapInH.getValue(0),
N->getOperand(1),
swapInH.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
X86ISD::LCMPXCHG8_DAG;
SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys,
Ops, array_lengthof(Ops), T, MMO);
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
Regs64bit ? X86::RAX : X86::EAX,
HalfT, Result.getValue(1));
SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
Regs64bit ? X86::RDX : X86::EDX,
HalfT, cpOutL.getValue(2));
SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF, 2));
Results.push_back(cpOutH.getValue(1));
return;
}
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_SWAP: {
unsigned Opc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode");
case ISD::ATOMIC_LOAD_ADD:
Opc = X86ISD::ATOMADD64_DAG;
break;
case ISD::ATOMIC_LOAD_AND:
Opc = X86ISD::ATOMAND64_DAG;
break;
case ISD::ATOMIC_LOAD_NAND:
Opc = X86ISD::ATOMNAND64_DAG;
break;
case ISD::ATOMIC_LOAD_OR:
Opc = X86ISD::ATOMOR64_DAG;
break;
case ISD::ATOMIC_LOAD_SUB:
Opc = X86ISD::ATOMSUB64_DAG;
break;
case ISD::ATOMIC_LOAD_XOR:
Opc = X86ISD::ATOMXOR64_DAG;
break;
case ISD::ATOMIC_LOAD_MAX:
Opc = X86ISD::ATOMMAX64_DAG;
break;
case ISD::ATOMIC_LOAD_MIN:
Opc = X86ISD::ATOMMIN64_DAG;
break;
case ISD::ATOMIC_LOAD_UMAX:
Opc = X86ISD::ATOMUMAX64_DAG;
break;
case ISD::ATOMIC_LOAD_UMIN:
Opc = X86ISD::ATOMUMIN64_DAG;
break;
case ISD::ATOMIC_SWAP:
Opc = X86ISD::ATOMSWAP64_DAG;
break;
}
ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
return;
}
case ISD::ATOMIC_LOAD:
ReplaceATOMIC_LOAD(N, Results, DAG);
}
}
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return NULL;
case X86ISD::BSF: return "X86ISD::BSF";
case X86ISD::BSR: return "X86ISD::BSR";
case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FAND: return "X86ISD::FAND";
case X86ISD::FANDN: return "X86ISD::FANDN";
case X86ISD::FOR: return "X86ISD::FOR";
case X86ISD::FXOR: return "X86ISD::FXOR";
case X86ISD::FSRL: return "X86ISD::FSRL";
case X86ISD::FILD: return "X86ISD::FILD";
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
case X86ISD::BT: return "X86ISD::BT";
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::CMPM: return "X86ISD::CMPM";
case X86ISD::CMPMU: return "X86ISD::CMPMU";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
case X86ISD::Wrapper: return "X86ISD::Wrapper";
case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP";
case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGN: return "X86ISD::PSIGN";
case X86ISD::BLENDV: return "X86ISD::BLENDV";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
case X86ISD::SUBUS: return "X86ISD::SUBUS";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::UMAX: return "X86ISD::UMAX";
case X86ISD::UMIN: return "X86ISD::UMIN";
case X86ISD::SMAX: return "X86ISD::SMAX";
case X86ISD::SMIN: return "X86ISD::SMIN";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FMAXC: return "X86ISD::FMAXC";
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP";
case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r";
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
case X86ISD::VSRL: return "X86ISD::VSRL";
case X86ISD::VSRA: return "X86ISD::VSRA";
case X86ISD::VSHLI: return "X86ISD::VSHLI";
case X86ISD::VSRLI: return "X86ISD::VSRLI";
case X86ISD::VSRAI: return "X86ISD::VSRAI";
case X86ISD::CMPP: return "X86ISD::CMPP";
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";
case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
case X86ISD::ADC: return "X86ISD::ADC";
case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
case X86ISD::INC: return "X86ISD::INC";
case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
case X86ISD::BLSI: return "X86ISD::BLSI";
case X86ISD::BLSMSK: return "X86ISD::BLSMSK";
case X86ISD::BLSR: return "X86ISD::BLSR";
case X86ISD::BZHI: return "X86ISD::BZHI";
case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
case X86ISD::TESTM: return "X86ISD::TESTM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
case X86ISD::KTEST: return "X86ISD::KTEST";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
case X86ISD::SHUFP: return "X86ISD::SHUFP";
case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
case X86ISD::MOVSD: return "X86ISD::MOVSD";
case X86ISD::MOVSS: return "X86ISD::MOVSS";
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
case X86ISD::RDSEED: return "X86ISD::RDSEED";
case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";
case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
case X86ISD::XTEST: return "X86ISD::XTEST";
}
}
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
Type *Ty) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
Reloc::Model R = getTargetMachine().getRelocationModel();
// X86 allows a sign-extended 32-bit immediate field as a displacement.
if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
return false;
if (AM.BaseGV) {
unsigned GVFlags =
Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
// If a reference to this global requires an extra load, we can't fold it.
if (isGlobalStubReference(GVFlags))
return false;
// If BaseGV requires a register for the PIC base, we cannot also have a
// BaseReg specified.
if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
return false;
// If lower 4G is not available, then we must use rip-relative addressing.
if ((M != CodeModel::Small || R != Reloc::Static) &&
Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
return false;
}
switch (AM.Scale) {
case 0:
case 1:
case 2:
case 4:
case 8:
// These scales always work.
break;
case 3:
case 5:
case 9:
// These scales are formed with basereg+scalereg. Only accept if there is
// no basereg yet.
if (AM.HasBaseReg)
return false;
break;
default: // Other stuff never works.
return false;
}
return true;
}
bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
return NumBits1 > NumBits2;
}
bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
if (!isTypeLegal(EVT::getEVT(Ty1)))
return false;
assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
// Assuming the caller doesn't have a zeroext or signext return parameter,
// truncation all the way down to i1 is valid.
return true;
}
bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<32>(Imm);
}
bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Can also use sub to handle negated immediates.
return isInt<32>(Imm);
}
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
return NumBits1 > NumBits2;
}
bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
}
bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
}
bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
EVT VT1 = Val.getValueType();
if (isZExtFree(VT1, VT2))
return true;
if (Val.getOpcode() != ISD::LOAD)
return false;
if (!VT1.isSimple() || !VT1.isInteger() ||
!VT2.isSimple() || !VT2.isInteger())
return false;
switch (VT1.getSimpleVT().SimpleTy) {
default: break;
case MVT::i8:
case MVT::i16:
case MVT::i32:
// X86 has 8, 16, and 32-bit zero-extending loads.
return true;
}
return false;
}
bool
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
return false;
VT = VT.getScalarType();
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
return true;
default:
break;
}
return false;
}
bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
// i16 instructions are longer (0x66 prefix) and potentially slower.
return !(VT1 == MVT::i32 && VT2 == MVT::i16);
}
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
bool
X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
if (!VT.isSimple())
return false;
MVT SVT = VT.getSimpleVT();
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSizeInBits() == 64)
return false;
// FIXME: pshufb, blends, shifts.
return (SVT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, SVT) ||
isSHUFPMask(M, SVT) ||
isPSHUFDMask(M, SVT) ||
isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) ||
isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) ||
isPALIGNRMask(M, SVT, Subtarget) ||
isUNPCKLMask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()));
}
bool
X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
EVT VT) const {
if (!VT.isSimple())
return false;
MVT SVT = VT.getSimpleVT();
unsigned NumElts = SVT.getVectorNumElements();
// FIXME: This collection of masks seems suspect.
if (NumElts == 2)
return true;
if (NumElts == 4 && SVT.is128BitVector()) {
return (isMOVLMask(Mask, SVT) ||
isCommutedMOVLMask(Mask, SVT, true) ||
isSHUFPMask(Mask, SVT) ||
isSHUFPMask(Mask, SVT, /* Commuted */ true));
}
return false;
}
//===----------------------------------------------------------------------===//
// X86 Scheduler Hooks
//===----------------------------------------------------------------------===//
/// Utility function to emit xbegin specifying the start of an RTM region.
static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB,
const TargetInstrInfo *TII) {
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = MBB;
++I;
// For the v = xbegin(), we generate
//
// thisMBB:
// xbegin sinkMBB
//
// mainMBB:
// eax = -1
//
// sinkMBB:
// v = eax
MachineBasicBlock *thisMBB = MBB;
MachineFunction *MF = MBB->getParent();
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
// xbegin sinkMBB
// # fallthrough to mainMBB
// # abortion to sinkMBB
BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB);
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(sinkMBB);
// mainMBB:
// EAX = -1
BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
// EAX is live into the sinkMBB
sinkMBB->addLiveIn(X86::EAX);
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::EAX);
MI->eraseFromParent();
return sinkMBB;
}
// Get CMPXCHG opcode for the specified data type.
static unsigned getCmpXChgOpcode(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
case MVT::i8: return X86::LCMPXCHG8;
case MVT::i16: return X86::LCMPXCHG16;
case MVT::i32: return X86::LCMPXCHG32;
case MVT::i64: return X86::LCMPXCHG64;
default:
break;
}
llvm_unreachable("Invalid operand size!");
}
// Get LOAD opcode for the specified data type.
static unsigned getLoadOpcode(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
case MVT::i8: return X86::MOV8rm;
case MVT::i16: return X86::MOV16rm;
case MVT::i32: return X86::MOV32rm;
case MVT::i64: return X86::MOV64rm;
default:
break;
}
llvm_unreachable("Invalid operand size!");
}
// Get opcode of the non-atomic one from the specified atomic instruction.
static unsigned getNonAtomicOpcode(unsigned Opc) {
switch (Opc) {
case X86::ATOMAND8: return X86::AND8rr;
case X86::ATOMAND16: return X86::AND16rr;
case X86::ATOMAND32: return X86::AND32rr;
case X86::ATOMAND64: return X86::AND64rr;
case X86::ATOMOR8: return X86::OR8rr;
case X86::ATOMOR16: return X86::OR16rr;
case X86::ATOMOR32: return X86::OR32rr;
case X86::ATOMOR64: return X86::OR64rr;
case X86::ATOMXOR8: return X86::XOR8rr;
case X86::ATOMXOR16: return X86::XOR16rr;
case X86::ATOMXOR32: return X86::XOR32rr;
case X86::ATOMXOR64: return X86::XOR64rr;
}
llvm_unreachable("Unhandled atomic-load-op opcode!");
}
// Get opcode of the non-atomic one from the specified atomic instruction with
// extra opcode.
static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
unsigned &ExtraOpc) {
switch (Opc) {
case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr;
case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr;
case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr;
case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr;
case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
}
llvm_unreachable("Unhandled atomic-load-op opcode!");
}
// Get opcode of the non-atomic one from the specified atomic instruction for
// 64-bit data type on 32-bit target.
static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
switch (Opc) {
case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
}
llvm_unreachable("Unhandled atomic-load-op opcode!");
}
// Get opcode of the non-atomic one from the specified atomic instruction for
// 64-bit data type on 32-bit target with extra opcode.
static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
unsigned &HiOpc,
unsigned &ExtraOpc) {
switch (Opc) {
case X86::ATOMNAND6432:
ExtraOpc = X86::NOT32r;
HiOpc = X86::AND32rr;
return X86::AND32rr;
}
llvm_unreachable("Unhandled atomic-load-op opcode!");
}
// Get pseudo CMOV opcode from the specified data type.
static unsigned getPseudoCMOVOpc(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
case MVT::i8: return X86::CMOV_GR8;
case MVT::i16: return X86::CMOV_GR16;
case MVT::i32: return X86::CMOV_GR32;
default:
break;
}
llvm_unreachable("Unknown CMOV opcode!");
}
// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
// They will be translated into a spin-loop or compare-exchange loop from
//
// ...
// dst = atomic-fetch-op MI.addr, MI.val
// ...
//
// to
//
// ...
// t1 = LOAD MI.addr
// loop:
// t4 = phi(t1, t3 / loop)
// t2 = OP MI.val, t4
// EAX = t4
// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
// t3 = EAX
// JNE loop
// sink:
// dst = t3
// ...
MachineBasicBlock *
X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = MBB;
++I;
assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
"Expected atomic-load-op to have one memoperand");
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
unsigned DstReg, SrcReg;
unsigned MemOpndSlot;
unsigned CurOp = 0;
DstReg = MI->getOperand(CurOp++).getReg();
MemOpndSlot = CurOp;
CurOp += X86::AddrNumOperands;
SrcReg = MI->getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
MVT::SimpleValueType VT = *RC->vt_begin();
unsigned t1 = MRI.createVirtualRegister(RC);
unsigned t2 = MRI.createVirtualRegister(RC);
unsigned t3 = MRI.createVirtualRegister(RC);
unsigned t4 = MRI.createVirtualRegister(RC);
unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
unsigned LOADOpc = getLoadOpcode(VT);
// For the atomic load-arith operator, we generate
//
// thisMBB:
// t1 = LOAD [MI.addr]
// mainMBB:
// t4 = phi(t1 / thisMBB, t3 / mainMBB)
// t1 = OP MI.val, EAX
// EAX = t4
// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
// t3 = EAX
// JNE mainMBB
// sinkMBB:
// dst = t3
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
MachineInstrBuilder MIB;
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
unsigned flags = (*MMOI)->getFlags();
flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
MachineMemOperand *MMO =
MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
(*MMOI)->getSize(),
(*MMOI)->getBaseAlignment(),
(*MMOI)->getTBAAInfo(),
(*MMOI)->getRanges());
MIB.addMemOperand(MMO);
}
thisMBB->addSuccessor(mainMBB);
// mainMBB:
MachineBasicBlock *origMainMBB = mainMBB;
// Add a PHI.
MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
.addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
unsigned Opc = MI->getOpcode();
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic-load-op opcode!");
case X86::ATOMAND8:
case X86::ATOMAND16:
case X86::ATOMAND32:
case X86::ATOMAND64:
case X86::ATOMOR8:
case X86::ATOMOR16:
case X86::ATOMOR32:
case X86::ATOMOR64:
case X86::ATOMXOR8:
case X86::ATOMXOR16:
case X86::ATOMXOR32:
case X86::ATOMXOR64: {
unsigned ARITHOpc = getNonAtomicOpcode(Opc);
BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
.addReg(t4);
break;
}
case X86::ATOMNAND8:
case X86::ATOMNAND16:
case X86::ATOMNAND32:
case X86::ATOMNAND64: {
unsigned Tmp = MRI.createVirtualRegister(RC);
unsigned NOTOpc;
unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
.addReg(t4);
BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
break;
}
case X86::ATOMMAX8:
case X86::ATOMMAX16:
case X86::ATOMMAX32:
case X86::ATOMMAX64:
case X86::ATOMMIN8:
case X86::ATOMMIN16:
case X86::ATOMMIN32:
case X86::ATOMMIN64:
case X86::ATOMUMAX8:
case X86::ATOMUMAX16:
case X86::ATOMUMAX32:
case X86::ATOMUMAX64:
case X86::ATOMUMIN8:
case X86::ATOMUMIN16:
case X86::ATOMUMIN32:
case X86::ATOMUMIN64: {
unsigned CMPOpc;
unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
BuildMI(mainMBB, DL, TII->get(CMPOpc))
.addReg(SrcReg)
.addReg(t4);
if (Subtarget->hasCMov()) {
if (VT != MVT::i8) {
// Native support
BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
.addReg(SrcReg)
.addReg(t4);
} else {
// Promote i8 to i32 to use CMOV32
const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
const TargetRegisterClass *RC32 =
TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
unsigned AccReg32 = MRI.createVirtualRegister(RC32);
unsigned Tmp = MRI.createVirtualRegister(RC32);
unsigned Undef = MRI.createVirtualRegister(RC32);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32)
.addReg(Undef)
.addReg(SrcReg)
.addImm(X86::sub_8bit);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
.addReg(Undef)
.addReg(t4)
.addImm(X86::sub_8bit);
BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
.addReg(SrcReg32)
.addReg(AccReg32);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
.addReg(Tmp, 0, X86::sub_8bit);
}
} else {
// Use pseudo select and lower them.
assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
"Invalid atomic-load-op transformation!");
unsigned SelOpc = getPseudoCMOVOpc(VT);
X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
.addReg(SrcReg).addReg(t4)
.addImm(CC);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
// Replace the original PHI node as mainMBB is changed after CMOV
// lowering.
BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4)
.addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
Phi->eraseFromParent();
}
break;
}
}
// Copy PhyReg back from virtual register.
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
.addReg(t4);
MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
MIB.addReg(t2);
MIB.setMemRefs(MMOBegin, MMOEnd);
// Copy PhyReg back to virtual register.
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
.addReg(PhyReg);
BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
mainMBB->addSuccessor(origMainMBB);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstReg)
.addReg(t3);
MI->eraseFromParent();
return sinkMBB;
}
// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
// instructions. They will be translated into a spin-loop or compare-exchange
// loop from
//
// ...
// dst = atomic-fetch-op MI.addr, MI.val
// ...
//
// to
//
// ...
// t1L = LOAD [MI.addr + 0]
// t1H = LOAD [MI.addr + 4]
// loop:
// t4L = phi(t1L, t3L / loop)
// t4H = phi(t1H, t3H / loop)
// t2L = OP MI.val.lo, t4L
// t2H = OP MI.val.hi, t4H
// EAX = t4L
// EDX = t4H
// EBX = t2L
// ECX = t2H
// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
// t3L = EAX
// t3H = EDX
// JNE loop
// sink:
// dstL = t3L
// dstH = t3H
// ...
MachineBasicBlock *
X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = MBB;
++I;
assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
"Expected atomic-load-op32 to have one memoperand");
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
unsigned DstLoReg, DstHiReg;
unsigned SrcLoReg, SrcHiReg;
unsigned MemOpndSlot;
unsigned CurOp = 0;
DstLoReg = MI->getOperand(CurOp++).getReg();
DstHiReg = MI->getOperand(CurOp++).getReg();
MemOpndSlot = CurOp;
CurOp += X86::AddrNumOperands;
SrcLoReg = MI->getOperand(CurOp++).getReg();
SrcHiReg = MI->getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = &X86::GR32RegClass;
const TargetRegisterClass *RC8 = &X86::GR8RegClass;
unsigned t1L = MRI.createVirtualRegister(RC);
unsigned t1H = MRI.createVirtualRegister(RC);
unsigned t2L = MRI.createVirtualRegister(RC);
unsigned t2H = MRI.createVirtualRegister(RC);
unsigned t3L = MRI.createVirtualRegister(RC);
unsigned t3H = MRI.createVirtualRegister(RC);
unsigned t4L = MRI.createVirtualRegister(RC);
unsigned t4H = MRI.createVirtualRegister(RC);
unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
unsigned LOADOpc = X86::MOV32rm;
// For the atomic load-arith operator, we generate
//
// thisMBB:
// t1L = LOAD [MI.addr + 0]
// t1H = LOAD [MI.addr + 4]
// mainMBB:
// t4L = phi(t1L / thisMBB, t3L / mainMBB)
// t4H = phi(t1H / thisMBB, t3H / mainMBB)
// t2L = OP MI.val.lo, t4L
// t2H = OP MI.val.hi, t4H
// EBX = t2L
// ECX = t2H
// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
// t3L = EAX
// t3H = EDX
// JNE loop
// sinkMBB:
// dstL = t3L
// dstH = t3H
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
MachineInstrBuilder MIB;
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
// Lo
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
unsigned flags = (*MMOI)->getFlags();
flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
MachineMemOperand *MMO =
MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
(*MMOI)->getSize(),
(*MMOI)->getBaseAlignment(),
(*MMOI)->getTBAAInfo(),
(*MMOI)->getRanges());
MIB.addMemOperand(MMO);
};
MachineInstr *LowMI = MIB;
// Hi
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp) {
MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
} else {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
}
MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
thisMBB->addSuccessor(mainMBB);
// mainMBB:
MachineBasicBlock *origMainMBB = mainMBB;
// Add PHIs.
MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
.addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
.addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
unsigned Opc = MI->getOpcode();
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
case X86::ATOMAND6432:
case X86::ATOMOR6432:
case X86::ATOMXOR6432:
case X86::ATOMADD6432:
case X86::ATOMSUB6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
.addReg(SrcLoReg);
BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
.addReg(SrcHiReg);
break;
}
case X86::ATOMNAND6432: {
unsigned HiOpc, NOTOpc;
unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
unsigned TmpL = MRI.createVirtualRegister(RC);
unsigned TmpH = MRI.createVirtualRegister(RC);
BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
.addReg(t4L);
BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
.addReg(t4H);
BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
break;
}
case X86::ATOMMAX6432:
case X86::ATOMMIN6432:
case X86::ATOMUMAX6432:
case X86::ATOMUMIN6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
unsigned cL = MRI.createVirtualRegister(RC8);
unsigned cH = MRI.createVirtualRegister(RC8);
unsigned cL32 = MRI.createVirtualRegister(RC);
unsigned cH32 = MRI.createVirtualRegister(RC);
unsigned cc = MRI.createVirtualRegister(RC);
// cl := cmp src_lo, lo
BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
.addReg(SrcLoReg).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
// ch := cmp src_hi, hi
BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
.addReg(SrcHiReg).addReg(t4H);
BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
// cc := if (src_hi == hi) ? cl : ch;
if (Subtarget->hasCMov()) {
BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
.addReg(cH32).addReg(cL32);
} else {
MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
.addReg(cH32).addReg(cL32)
.addImm(X86::COND_E);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
}
BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
if (Subtarget->hasCMov()) {
BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
.addReg(SrcLoReg).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
.addReg(SrcHiReg).addReg(t4H);
} else {
MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
.addReg(SrcLoReg).addReg(t4L)
.addImm(X86::COND_NE);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
// As the lowered CMOV won't clobber EFLAGS, we could reuse it for the
// 2nd CMOV lowering.
mainMBB->addLiveIn(X86::EFLAGS);
MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
.addReg(SrcHiReg).addReg(t4H)
.addImm(X86::COND_NE);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
// Replace the original PHI node as mainMBB is changed after CMOV
// lowering.
BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L)
.addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H)
.addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
PhiL->eraseFromParent();
PhiH->eraseFromParent();
}
break;
}
case X86::ATOMSWAP6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
break;
}
}
// Copy EDX:EAX back from HiReg:LoReg
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
// Copy ECX:EBX from t1H:t1L
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Copy EDX:EAX back to t3H:t3L
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
mainMBB->addSuccessor(origMainMBB);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstLoReg)
.addReg(t3L);
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstHiReg)
.addReg(t3H);
MI->eraseFromParent();
return sinkMBB;
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
// or XMM0_V32I8 in AVX all of this code can be replaced with that
// in the .td file.
static MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB,
const TargetInstrInfo *TII) {
unsigned Opc;
switch (MI->getOpcode()) {
default: llvm_unreachable("illegal opcode!");
case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break;
case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break;
case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break;
case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break;
case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break;
case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break;
case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break;
case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break;
}
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
unsigned NumArgs = MI->getNumOperands();
for (unsigned i = 1; i < NumArgs; ++i) {
MachineOperand &Op = MI->getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
if (MI->hasOneMemOperand())
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
BuildMI(*BB, MI, dl,
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
return BB;
}
// FIXME: Custom handling because TableGen doesn't support multiple implicit
// defs in an instruction pattern
static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
const TargetInstrInfo *TII) {
unsigned Opc;
switch (MI->getOpcode()) {
default: llvm_unreachable("illegal opcode!");
case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break;
case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break;
case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break;
case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break;
case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break;
case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break;
case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break;
case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break;
}
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
unsigned NumArgs = MI->getNumOperands(); // remove the results
for (unsigned i = 1; i < NumArgs; ++i) {
MachineOperand &Op = MI->getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
if (MI->hasOneMemOperand())
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
BuildMI(*BB, MI, dl,
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::ECX);
MI->eraseFromParent();
return BB;
}
static MachineBasicBlock * EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
const TargetInstrInfo *TII,
const X86Subtarget* Subtarget) {
DebugLoc dl = MI->getDebugLoc();
// Address into RAX/EAX, other two args into ECX, EDX.
unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
for (int i = 0; i < X86::AddrNumOperands; ++i)
MIB.addOperand(MI->getOperand(i));
unsigned ValOps = X86::AddrNumOperands;
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
.addReg(MI->getOperand(ValOps).getReg());
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
.addReg(MI->getOperand(ValOps+1).getReg());
// The instruction doesn't actually take any operands though.
BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitVAARG64WithCustomInserter(
MachineInstr *MI,
MachineBasicBlock *MBB) const {
// Emit va_arg instruction on X86-64.
// Operands to this pseudo-instruction:
// 0 ) Output : destination address (reg)
// 1-5) Input : va_list address (addr, i64mem)
// 6 ) ArgSize : Size (in bytes) of vararg type
// 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
// 8 ) Align : Alignment of type
// 9 ) EFLAGS (implicit-def)
assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
unsigned DestReg = MI->getOperand(0).getReg();
MachineOperand &Base = MI->getOperand(1);
MachineOperand &Scale = MI->getOperand(2);
MachineOperand &Index = MI->getOperand(3);
MachineOperand &Disp = MI->getOperand(4);
MachineOperand &Segment = MI->getOperand(5);
unsigned ArgSize = MI->getOperand(6).getImm();
unsigned ArgMode = MI->getOperand(7).getImm();
unsigned Align = MI->getOperand(8).getImm();
// Memory Reference
assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
// Machine Information
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
DebugLoc DL = MI->getDebugLoc();
// struct va_list {
// i32 gp_offset
// i32 fp_offset
// i64 overflow_area (address)
// i64 reg_save_area (address)
// }
// sizeof(va_list) = 24
// alignment(va_list) = 8
unsigned TotalNumIntRegs = 6;
unsigned TotalNumXMMRegs = 8;
bool UseGPOffset = (ArgMode == 1);
bool UseFPOffset = (ArgMode == 2);
unsigned MaxOffset = TotalNumIntRegs * 8 +
(UseFPOffset ? TotalNumXMMRegs * 16 : 0);
/* Align ArgSize to a multiple of 8 */
unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
bool NeedsAlign = (Align > 8);
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *overflowMBB;
MachineBasicBlock *offsetMBB;
MachineBasicBlock *endMBB;
unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
unsigned OffsetReg = 0;
if (!UseGPOffset && !UseFPOffset) {
// If we only pull from the overflow region, we don't create a branch.
// We don't need to alter control flow.
OffsetDestReg = 0; // unused
OverflowDestReg = DestReg;
offsetMBB = NULL;
overflowMBB = thisMBB;
endMBB = thisMBB;
} else {
// First emit code to check if gp_offset (or fp_offset) is below the bound.
// If so, pull the argument from reg_save_area. (branch to offsetMBB)
// If not, pull from overflow_area. (branch to overflowMBB)
//
// thisMBB
// | .
// | .
// offsetMBB overflowMBB
// | .
// | .
// endMBB
// Registers for the PHI in endMBB
OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *MF = MBB->getParent();
overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator MBBIter = MBB;
++MBBIter;
// Insert the new basic blocks
MF->insert(MBBIter, offsetMBB);
MF->insert(MBBIter, overflowMBB);
MF->insert(MBBIter, endMBB);
// Transfer the remainder of MBB and its successor edges to endMBB.
endMBB->splice(endMBB->begin(), thisMBB,
llvm::next(MachineBasicBlock::iterator(MI)),
thisMBB->end());
endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Make offsetMBB and overflowMBB successors of thisMBB
thisMBB->addSuccessor(offsetMBB);
thisMBB->addSuccessor(overflowMBB);
// endMBB is a successor of both offsetMBB and overflowMBB
offsetMBB->addSuccessor(endMBB);
overflowMBB->addSuccessor(endMBB);
// Load the offset value into a register
OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.addOperand(Segment)
.setMemRefs(MMOBegin, MMOEnd);
// Check if there is enough room left to pull this argument.
BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
.addReg(OffsetReg)
.addImm(MaxOffset + 8 - ArgSizeA8);
// Branch to "overflowMBB" if offset >= max
// Fall through to "offsetMBB" otherwise
BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
.addMBB(overflowMBB);
}
// In offsetMBB, emit code to use the reg_save_area.
if (offsetMBB) {
assert(OffsetReg != 0);
// Read the reg_save_area address.
unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, 16)
.addOperand(Segment)
.setMemRefs(MMOBegin, MMOEnd);
// Zero-extend the offset
unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
.addImm(0)
.addReg(OffsetReg)
.addImm(X86::sub_32bit);
// Add the offset to the reg_save_area to get the final address.
BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
.addReg(OffsetReg64)
.addReg(RegSaveReg);
// Compute the offset for the next argument
unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
.addReg(OffsetReg)
.addImm(UseFPOffset ? 16 : 8);
// Store it back into the va_list.
BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.addOperand(Segment)
.addReg(NextOffsetReg)
.setMemRefs(MMOBegin, MMOEnd);
// Jump to endMBB
BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
.addMBB(endMBB);
}
//
// Emit code to use overflow area
//
// Load the overflow_area address into a register.
unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, 8)
.addOperand(Segment)
.setMemRefs(MMOBegin, MMOEnd);
// If we need to align it, do so. Otherwise, just copy the address
// to OverflowDestReg.
if (NeedsAlign) {
// Align the overflow address
assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
// aligned_addr = (addr + (align-1)) & ~(align-1)
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
.addReg(OverflowAddrReg)
.addImm(Align-1);
BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
.addReg(TmpReg)
.addImm(~(uint64_t)(Align-1));
} else {
BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
.addReg(OverflowAddrReg);
}
// Compute the next overflow address after this argument.
// (the overflow address should be kept 8-byte aligned)
unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
.addReg(OverflowDestReg)
.addImm(ArgSizeA8);
// Store the new overflow address.
BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, 8)
.addOperand(Segment)
.addReg(NextAddrReg)
.setMemRefs(MMOBegin, MMOEnd);
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
BuildMI(*endMBB, endMBB->begin(), DL,
TII->get(X86::PHI), DestReg)
.addReg(OffsetDestReg).addMBB(offsetMBB)
.addReg(OverflowDestReg).addMBB(overflowMBB);
}
// Erase the pseudo instruction
MI->eraseFromParent();
return endMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr *MI,
MachineBasicBlock *MBB) const {
// Emit code to save XMM registers to the stack. The ABI says that the
// number of registers to save is given in %al, so it's theoretically
// possible to do an indirect jump trick to avoid saving all of them,
// however this code takes a simpler approach and just executes all
// of the stores if %al is non-zero. It's less code, and it's probably
// easier on the hardware branch predictor, and stores aren't all that
// expensive anyway.
// Create the new basic blocks. One block contains all the XMM stores,
// and one block is the final destination regardless of whether any
// stores were performed.
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *F = MBB->getParent();
MachineFunction::iterator MBBIter = MBB;
++MBBIter;
MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(MBBIter, XMMSaveMBB);
F->insert(MBBIter, EndMBB);
// Transfer the remainder of MBB and its successor edges to EndMBB.
EndMBB->splice(EndMBB->begin(), MBB,
llvm::next(MachineBasicBlock::iterator(MI)),
MBB->end());
EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
// The original block will now fall through to the XMM save block.
MBB->addSuccessor(XMMSaveMBB);
// The XMMSaveMBB will fall through to the end block.
XMMSaveMBB->addSuccessor(EndMBB);
// Now add the instructions.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned CountReg = MI->getOperand(0).getReg();
int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
if (!Subtarget->isTargetWin64()) {
// If %al is 0, branch around the XMM save block.
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
MBB->addSuccessor(EndMBB);
}
+ // Make sure the last operand is EFLAGS, which gets clobbered by the branch
+ // that was just emitted, but clearly shouldn't be "saved".
+ assert((MI->getNumOperands() <= 3 ||
+ !MI->getOperand(MI->getNumOperands() - 1).isReg() ||
+ MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS)
+ && "Expected last argument to be EFLAGS");
unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
- for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
MachineMemOperand *MMO =
F->getMachineMemOperand(
MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
MachineMemOperand::MOStore,
/*Size=*/16, /*Align=*/16);
BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
.addFrameIndex(RegSaveFrameIndex)
.addImm(/*Scale=*/1)
.addReg(/*IndexReg=*/0)
.addImm(/*Disp=*/Offset)
.addReg(/*Segment=*/0)
.addReg(MI->getOperand(i).getReg())
.addMemOperand(MMO);
}
MI->eraseFromParent(); // The pseudo instruction is gone now.
return EndMBB;
}
// The EFLAGS operand of SelectItr might be missing a kill marker
// because there were multiple uses of EFLAGS, and ISel didn't know
// which to mark. Figure out whether SelectItr should have had a
// kill marker, and set it if it should. Returns the correct kill
// marker value.
static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
MachineBasicBlock* BB,
const TargetRegisterInfo* TRI) {
// Scan forward through BB for a use/def of EFLAGS.
MachineBasicBlock::iterator miI(llvm::next(SelectItr));
for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
const MachineInstr& mi = *miI;
if (mi.readsRegister(X86::EFLAGS))
return false;
if (mi.definesRegister(X86::EFLAGS))
break; // Should have kill-flag - update below.
}
// If we hit the end of the block, check whether EFLAGS is live into a
// successor.
if (miI == BB->end()) {
for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
sEnd = BB->succ_end();
sItr != sEnd; ++sItr) {
MachineBasicBlock* succ = *sItr;
if (succ->isLiveIn(X86::EFLAGS))
return false;
}
}
// We found a def, or hit the end of the basic block and EFLAGS wasn't live
// out. SelectMI should have a kill flag on EFLAGS.
SelectItr->addRegisterKilled(X86::EFLAGS, TRI);
return true;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
// thisMBB:
// ...
// TrueVal = ...
// cmpTY ccX, r1, r2
// bCC copy1MBB
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
if (!MI->killsRegister(X86::EFLAGS) &&
!checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
sinkMBB->addLiveIn(X86::EFLAGS);
}
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
// Create the conditional branch instruction.
unsigned Opc =
X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
copy0MBB->addSuccessor(sinkMBB);
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(X86::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return sinkMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
bool Is64Bit) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(getTargetMachine().Options.EnableSegmentedStacks);
unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
// BB:
// ... [Till the alloca]
// If stacklet is not large enough, jump to mallocMBB
//
// bumpMBB:
// Allocate by subtracting from RSP
// Jump to continueMBB
//
// mallocMBB:
// Allocate by call to runtime
//
// continueMBB:
// ...
// [rest of original BB]
//
MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetRegisterClass *AddrRegClass =
getRegClassFor(Is64Bit ? MVT::i64:MVT::i32);
unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
sizeVReg = MI->getOperand(1).getReg(),
physSPReg = Is64Bit ? X86::RSP : X86::ESP;
MachineFunction::iterator MBBIter = BB;
++MBBIter;
MF->insert(MBBIter, bumpMBB);
MF->insert(MBBIter, mallocMBB);
MF->insert(MBBIter, continueMBB);
continueMBB->splice(continueMBB->begin(), BB, llvm::next
(MachineBasicBlock::iterator(MI)), BB->end());
continueMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add code to the main basic block to check if the stack limit has been hit,
// and if so, jump to mallocMBB otherwise to bumpMBB.
BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
.addReg(tmpSPVReg).addReg(sizeVReg);
BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr))
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
.addReg(SPLimitVReg);
BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB);
// bumpMBB simply decreases the stack pointer, since we know the current
// stacklet has enough space.
BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
.addReg(SPLimitVReg);
BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
.addReg(SPLimitVReg);
BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
if (Is64Bit) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::RDI, RegState::Implicit)
.addReg(X86::RAX, RegState::ImplicitDefine);
} else {
BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
.addImm(12);
BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::EAX, RegState::ImplicitDefine);
}
if (!Is64Bit)
BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
.addImm(16);
BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
.addReg(Is64Bit ? X86::RAX : X86::EAX);
BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
// Set up the CFG correctly.
BB->addSuccessor(bumpMBB);
BB->addSuccessor(mallocMBB);
mallocMBB->addSuccessor(continueMBB);
bumpMBB->addSuccessor(continueMBB);
// Take care of the PHI nodes.
BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
MI->getOperand(0).getReg())
.addReg(mallocPtrVReg).addMBB(mallocMBB)
.addReg(bumpSPPtrVReg).addMBB(bumpMBB);
// Delete the original pseudo instruction.
MI->eraseFromParent();
// And we're done.
return continueMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(!Subtarget->isTargetEnvMacho());
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
if (Subtarget->isTargetWin64()) {
if (Subtarget->isTargetCygMing()) {
// ___chkstk(Mingw64):
// Clobbers R10, R11, RAX and EFLAGS.
// Updates RSP.
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
.addExternalSymbol("___chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::RSP, RegState::Implicit)
.addReg(X86::RAX, RegState::Define | RegState::Implicit)
.addReg(X86::RSP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// __chkstk(MSVCRT): does not update stack pointer.
// Clobbers R10, R11 and EFLAGS.
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
.addExternalSymbol("__chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// RAX has the offset to be subtracted from RSP.
BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(X86::RAX);
}
} else {
const char *StackProbeSymbol =
Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
.addReg(X86::ESP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
}
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const {
// This is pretty easy. We're taking the value that we received from
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
const X86InstrInfo *TII
= static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineFunction *F = BB->getParent();
assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
// Get a register mask for the lowered call.
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
}
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
MachineBasicBlock *
X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = MBB;
++I;
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
unsigned DstReg;
unsigned MemOpndSlot = 0;
unsigned CurOp = 0;
DstReg = MI->getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(RC->hasType(MVT::i32) && "Invalid destination!");
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned restoreDstReg = MRI.createVirtualRegister(RC);
MemOpndSlot = CurOp;
MVT PVT = getPointerTy();
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
// For v = setjmp(buf), we generate
//
// thisMBB:
// buf[LabelOffset] = restoreMBB
// SjLjSetup restoreMBB
//
// mainMBB:
// v_main = 0
//
// sinkMBB:
// v = phi(main, restore)
//
// restoreMBB:
// v_restore = 1
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
MF->push_back(restoreMBB);
MachineInstrBuilder MIB;
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
unsigned PtrStoreOpc = 0;
unsigned LabelReg = 0;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
Reloc::Model RM = getTargetMachine().getRelocationModel();
bool UseImmLabel = (getTargetMachine().getCodeModel() == CodeModel::Small) &&
(RM == Reloc::Static || RM == Reloc::DynamicNoPIC);
// Prepare IP either in reg or imm.
if (!UseImmLabel) {
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
LabelReg = MRI.createVirtualRegister(PtrRC);
if (Subtarget->is64Bit()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
.addReg(X86::RIP)
.addImm(0)
.addReg(0)
.addMBB(restoreMBB)
.addReg(0);
} else {
const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
.addReg(XII->getGlobalBaseReg(MF))
.addImm(0)
.addReg(0)
.addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference())
.addReg(0);
}
} else
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
// Store IP
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI->getOperand(MemOpndSlot + i), LabelOffset);
else
MIB.addOperand(MI->getOperand(MemOpndSlot + i));
}
if (!UseImmLabel)
MIB.addReg(LabelReg);
else
MIB.addMBB(restoreMBB);
MIB.setMemRefs(MMOBegin, MMOEnd);
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
.addMBB(restoreMBB);
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
MIB.addRegMask(RegInfo->getNoPreservedMask());
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(restoreMBB);
// mainMBB:
// EAX = 0
BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(X86::PHI), DstReg)
.addReg(mainDstReg).addMBB(mainMBB)
.addReg(restoreDstReg).addMBB(restoreMBB);
// restoreMBB:
BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
BuildMI(restoreMBB, DL, TII->get(X86::JMP_4)).addMBB(sinkMBB);
restoreMBB->addSuccessor(sinkMBB);
MI->eraseFromParent();
return sinkMBB;
}
MachineBasicBlock *
X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
MVT PVT = getPointerTy();
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
const TargetRegisterClass *RC =
(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
unsigned Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
unsigned SP = RegInfo->getStackRegister();
MachineInstrBuilder MIB;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t SPOffset = 2 * PVT.getStoreSize();
unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
// Reload FP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
MIB.addOperand(MI->getOperand(i));
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload IP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI->getOperand(i), LabelOffset);
else
MIB.addOperand(MI->getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload SP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI->getOperand(i), SPOffset);
else
MIB.addOperand(MI->getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Jump
BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
MI->eraseFromParent();
return MBB;
}
MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
default: llvm_unreachable("Unexpected instr type to insert");
case X86::TAILJMPd64:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
llvm_unreachable("TAILJMP64 would not be touched here.");
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
case X86::SEG_ALLOCA_32:
return EmitLoweredSegAlloca(MI, BB, false);
case X86::SEG_ALLOCA_64:
return EmitLoweredSegAlloca(MI, BB, true);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_GR8:
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_V4F32:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64:
case X86::CMOV_V8F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
case X86::CMOV_V16F32:
case X86::CMOV_V8F64:
case X86::CMOV_V8I64:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
return EmitLoweredSelect(MI, BB);
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
case X86::FP32_TO_INT64_IN_MEM:
case X86::FP64_TO_INT16_IN_MEM:
case X86::FP64_TO_INT32_IN_MEM:
case X86::FP64_TO_INT64_IN_MEM:
case X86::FP80_TO_INT16_IN_MEM:
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
unsigned OldCW =
F->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
CWFrameIdx);
// Set the high part to be round to zero...
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
.addImm(0xC7F);
// Reload the modified control word now...
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FLDCW16m)), CWFrameIdx);
// Restore the memory image of control word to original value
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
.addReg(OldCW);
// Get the X86 opcode to use.
unsigned Opc;
switch (MI->getOpcode()) {
default: llvm_unreachable("illegal opcode!");
case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
}
X86AddressMode AM;
MachineOperand &Op = MI->getOperand(0);
if (Op.isReg()) {
AM.BaseType = X86AddressMode::RegBase;
AM.Base.Reg = Op.getReg();
} else {
AM.BaseType = X86AddressMode::FrameIndexBase;
AM.Base.FrameIndex = Op.getIndex();
}
Op = MI->getOperand(1);
if (Op.isImm())
AM.Scale = Op.getImm();
Op = MI->getOperand(2);
if (Op.isImm())
AM.IndexReg = Op.getImm();
Op = MI->getOperand(3);
if (Op.isGlobal()) {
AM.GV = Op.getGlobal();
} else {
AM.Disp = Op.getImm();
}
addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
.addReg(MI->getOperand(X86::AddrNumOperands).getReg());
// Reload the original control word now.
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FLDCW16m)), CWFrameIdx);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
// String/text processing lowering.
case X86::PCMPISTRM128REG:
case X86::VPCMPISTRM128REG:
case X86::PCMPISTRM128MEM:
case X86::VPCMPISTRM128MEM:
case X86::PCMPESTRM128REG:
case X86::VPCMPESTRM128REG:
case X86::PCMPESTRM128MEM:
case X86::VPCMPESTRM128MEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
return EmitPCMPSTRM(MI, BB, getTargetMachine().getInstrInfo());
// String/text processing lowering.
case X86::PCMPISTRIREG:
case X86::VPCMPISTRIREG:
case X86::PCMPISTRIMEM:
case X86::VPCMPISTRIMEM:
case X86::PCMPESTRIREG:
case X86::VPCMPESTRIREG:
case X86::PCMPESTRIMEM:
case X86::VPCMPESTRIMEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
return EmitPCMPSTRI(MI, BB, getTargetMachine().getInstrInfo());
// Thread synchronization.
case X86::MONITOR:
return EmitMonitor(MI, BB, getTargetMachine().getInstrInfo(), Subtarget);
// xbegin
case X86::XBEGIN:
return EmitXBegin(MI, BB, getTargetMachine().getInstrInfo());
// Atomic Lowering.
case X86::ATOMAND8:
case X86::ATOMAND16:
case X86::ATOMAND32:
case X86::ATOMAND64:
// Fall through
case X86::ATOMOR8:
case X86::ATOMOR16:
case X86::ATOMOR32:
case X86::ATOMOR64:
// Fall through
case X86::ATOMXOR16:
case X86::ATOMXOR8:
case X86::ATOMXOR32:
case X86::ATOMXOR64:
// Fall through
case X86::ATOMNAND8:
case X86::ATOMNAND16:
case X86::ATOMNAND32:
case X86::ATOMNAND64:
// Fall through
case X86::ATOMMAX8:
case X86::ATOMMAX16:
case X86::ATOMMAX32:
case X86::ATOMMAX64:
// Fall through
case X86::ATOMMIN8:
case X86::ATOMMIN16:
case X86::ATOMMIN32:
case X86::ATOMMIN64:
// Fall through
case X86::ATOMUMAX8:
case X86::ATOMUMAX16:
case X86::ATOMUMAX32:
case X86::ATOMUMAX64:
// Fall through
case X86::ATOMUMIN8:
case X86::ATOMUMIN16:
case X86::ATOMUMIN32:
case X86::ATOMUMIN64:
return EmitAtomicLoadArith(MI, BB);
// This group does 64-bit operations on a 32-bit host.
case X86::ATOMAND6432:
case X86::ATOMOR6432:
case X86::ATOMXOR6432:
case X86::ATOMNAND6432:
case X86::ATOMADD6432:
case X86::ATOMSUB6432:
case X86::ATOMMAX6432:
case X86::ATOMMIN6432:
case X86::ATOMUMAX6432:
case X86::ATOMUMIN6432:
case X86::ATOMSWAP6432:
return EmitAtomicLoadArith6432(MI, BB);
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
case X86::VAARG_64:
return EmitVAARG64WithCustomInserter(MI, BB);
case X86::EH_SjLj_SetJmp32:
case X86::EH_SjLj_SetJmp64:
return emitEHSjLjSetJmp(MI, BB);
case X86::EH_SjLj_LongJmp32:
case X86::EH_SjLj_LongJmp64:
return emitEHSjLjLongJmp(MI, BB);
}
}
//===----------------------------------------------------------------------===//
// X86 Optimization Hooks
//===----------------------------------------------------------------------===//
void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = KnownZero.getBitWidth();
unsigned Opc = Op.getOpcode();
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
Opc == ISD::INTRINSIC_W_CHAIN ||
Opc == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
switch (Opc) {
default: break;
case X86ISD::ADD:
case X86ISD::SUB:
case X86ISD::ADC:
case X86ISD::SBB:
case X86ISD::SMUL:
case X86ISD::UMUL:
case X86ISD::INC:
case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
// These nodes' second result is a boolean.
if (Op.getResNo() == 0)
break;
// Fallthrough
case X86ISD::SETCC:
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned NumLoBits = 0;
switch (IntId) {
default: break;
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_avx_movmsk_ps_256:
case Intrinsic::x86_sse2_movmsk_pd:
case Intrinsic::x86_avx_movmsk_pd_256:
case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse2_pmovmskb_128:
case Intrinsic::x86_avx2_pmovmskb: {
// High bits of movmskp{s|d}, pmovmskb are known zero.
switch (IntId) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break;
case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break;
case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break;
}
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
break;
}
}
break;
}
}
}
unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
unsigned Depth) const {
// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
if (Op.getOpcode() == X86ISD::SETCC_CARRY)
return Op.getValueType().getScalarType().getSizeInBits();
// Fallback case.
return 1;
}
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
/// node is a GlobalAddress + offset.
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
const GlobalValue* &GA,
int64_t &Offset) const {
if (N->getOpcode() == X86ISD::Wrapper) {
if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
return true;
}
}
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
/// same as extracting the high 128-bit part of 256-bit vector and then
/// inserting the result into the low part of a new 256-bit vector
static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
// vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j)
if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
SVOp->getMaskElt(j) >= 0)
return false;
return true;
}
/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
/// same as extracting the low 128-bit part of 256-bit vector and then
/// inserting the result into the high part of a new 256-bit vector
static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
// vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j)
if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
SVOp->getMaskElt(j) >= 0)
return false;
return true;
}
/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget* Subtarget) {
SDLoc dl(N);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
V2.getOpcode() == ISD::CONCAT_VECTORS) {
//
// 0,0,0,...
// |
// V UNDEF BUILD_VECTOR UNDEF
// \ / \ /
// CONCAT_VECTOR CONCAT_VECTOR
// \ /
// \ /
// RESULT: V + zero extended
//
if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
V2.getOperand(1).getOpcode() != ISD::UNDEF ||
V1.getOperand(1).getOpcode() != ISD::UNDEF)
return SDValue();
if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()))
return SDValue();
// To match the shuffle mask, the first half of the mask should
// be exactly the first vector, and all the rest a splat with the
// first element of the second one.
for (unsigned i = 0; i != NumElems/2; ++i)
if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
!isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
return SDValue();
// If V1 is coming from a vector load then just fold to a VZEXT_LOAD.
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) {
if (Ld->hasNUsesOfValue(1, 0)) {
SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
SDValue ResNode =
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
array_lengthof(Ops),
Ld->getMemoryVT(),
Ld->getPointerInfo(),
Ld->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
false/*WriteMem*/);
// Make sure the newly-created LOAD is in the same position as Ld in
// terms of dependency. We create a TokenFactor for Ld and ResNode,
// and update uses of Ld's output chain to use the TokenFactor.
if (Ld->hasAnyUseOfValue(1)) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
SDValue(Ld, 1), SDValue(ResNode.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
SDValue(ResNode.getNode(), 1));
}
return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
}
}
// Emit a zeroed vector and insert the desired subvector on its
// first half.
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), 0, DAG, dl);
return DCI.CombineTo(N, InsV);
}
//===--------------------------------------------------------------------===//
// Combine some shuffles into subvector extracts and inserts:
//
// vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
if (isShuffleHigh128VectorInsertLow(SVOp)) {
SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl);
SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl);
return DCI.CombineTo(N, InsV);
}
// vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
if (isShuffleLow128VectorInsertHigh(SVOp)) {
SDValue V = Extract128BitVector(V1, 0, DAG, dl);
SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl);
return DCI.CombineTo(N, InsV);
}
return SDValue();
}
/// PerformShuffleCombine - Performs several different shuffle combines.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
// Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
return SDValue();
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
if (Subtarget->hasFp256() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
// Only handle 128 wide vector from here on.
if (!VT.is128BitVector())
return SDValue();
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
// load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
// consecutive, non-overlapping, and in the right order.
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
}
/// PerformTruncateCombine - Converts truncate operation to
/// a sequence of vector shuffle operations.
/// It is possible when we truncate 256-bit vector to 128-bit vector
static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
return SDValue();
}
/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
/// specific shuffle of a load can be folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
/// shuffles have been customed lowered so we need to handle those here.
static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue InVec = N->getOperand(0);
SDValue EltNo = N->getOperand(1);
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
EVT VT = InVec.getValueType();
bool HasShuffleIntoBitcast = false;
if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
EVT BCVT = InVec.getOperand(0).getValueType();
if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
return SDValue();
InVec = InVec.getOperand(0);
HasShuffleIntoBitcast = true;
}
if (!isTargetShuffle(InVec.getOpcode()))
return SDValue();
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
SmallVector<int, 16> ShuffleMask;
bool UnaryShuffle;
if (!getTargetShuffleMask(InVec.getNode(), VT.getSimpleVT(), ShuffleMask,
UnaryShuffle))
return SDValue();
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = VT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
: InVec.getOperand(1);
// If inputs to shuffle are the same for both ops, then allow 2 uses
unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
if (LdNode.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
return SDValue();
AllowedUses = 1; // only allow 1 load use if we have a bitcast
LdNode = LdNode.getOperand(0);
}
if (!ISD::isNormalLoad(LdNode.getNode()))
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
if (HasShuffleIntoBitcast) {
// If there's a bitcast before the shuffle, check if the load type and
// alignment is valid.
unsigned Align = LN0->getAlignment();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NewAlign = TLI.getDataLayout()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
return SDValue();
}
// All checks match so transform back to vector_shuffle so that DAG combiner
// can finish the job
SDLoc dl(N);
// Create shuffle node taking into account the case that its a unary shuffle
SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
InVec.getOperand(0), Shuffle,
&ShuffleMask[0]);
Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
EltNo);
}
/// Extract one bit from mask vector, like v16i1 or v8i1.
/// AVX-512 feature.
static SDValue ExtractBitFromMaskVector(SDNode *N, SelectionDAG &DAG) {
SDValue Vec = N->getOperand(0);
SDLoc dl(Vec);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = N->getOperand(1);
MVT EltVT = N->getSimpleValueType(0);
assert((VecVT.getVectorElementType() == MVT::i1 && EltVT == MVT::i8) ||
"Unexpected operands in ExtractBitFromMaskVector");
// variable index
if (!isa<ConstantSDNode>(Idx)) {
MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
ExtVT.getVectorElementType(), Ext);
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
MVT ScalarVT = MVT::getIntegerVT(VecVT.getSizeInBits());
unsigned MaxShift = VecVT.getSizeInBits() - 1;
Vec = DAG.getNode(ISD::BITCAST, dl, ScalarVT, Vec);
Vec = DAG.getNode(ISD::SHL, dl, ScalarVT, Vec,
DAG.getConstant(MaxShift - IdxVal, ScalarVT));
Vec = DAG.getNode(ISD::SRL, dl, ScalarVT, Vec,
DAG.getConstant(MaxShift, ScalarVT));
if (VecVT == MVT::v16i1) {
Vec = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Vec);
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Vec);
}
return DAG.getNode(ISD::BITCAST, dl, MVT::i8, Vec);
}
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// to a simple store and scalar loads to extract the elements.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
if (NewOp.getNode())
return NewOp;
SDValue InputVector = N->getOperand(0);
if (InputVector.getValueType().getVectorElementType() == MVT::i1 &&
!DCI.isBeforeLegalize())
return ExtractBitFromMaskVector(N, DAG);
// Detect whether we are trying to convert from mmx to i32 and the bitcast
// from mmx to v2i32 has a single usage.
if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx &&
InputVector.hasOneUse() && N->getValueType(0) == MVT::i32)
return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
N->getValueType(0),
InputVector.getNode()->getOperand(0));
// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.
if (InputVector.getValueType() != MVT::v4i32)
return SDValue();
// Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
// single use which is a sign-extend or zero-extend, and all elements are
// used.
SmallVector<SDNode *, 4> Uses;
unsigned ExtractedElements = 0;
for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
if (UI.getUse().getResNo() != InputVector.getResNo())
return SDValue();
SDNode *Extract = *UI;
if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
if (Extract->getValueType(0) != MVT::i32)
return SDValue();
if (!Extract->hasOneUse())
return SDValue();
if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
return SDValue();
if (!isa<ConstantSDNode>(Extract->getOperand(1)))
return SDValue();
// Record which element was extracted.
ExtractedElements |=
1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
Uses.push_back(Extract);
}
// If not all the elements were used, this may not be worthwhile.
if (ExtractedElements != 15)
return SDValue();
// Ok, we've now decided to do the transformation.
SDLoc dl(InputVector);
// Store the value to a temporary stack slot.
SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
MachinePointerInfo(), false, false, 0);
// Replace each use (extract) with a load of the appropriate element.
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
UE = Uses.end(); UI != UE; ++UI) {
SDNode *Extract = *UI;
// cOMpute the element's address.
SDValue Idx = Extract->getOperand(1);
unsigned EltSize =
InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
StackPtr, OffsetVal);
// Load the scalar.
SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
ScalarAddr, MachinePointerInfo(),
false, false, false, 0);
// Replace the exact with the load.
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
}
// The replacement was made in place; don't return anything.
return SDValue();
}
/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
static std::pair<unsigned, bool>
matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const X86Subtarget *Subtarget) {
if (!VT.isVector())
return std::make_pair(0, false);
bool NeedSplit = false;
switch (VT.getSimpleVT().SimpleTy) {
default: return std::make_pair(0, false);
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
if (!Subtarget->hasAVX2())
NeedSplit = true;
if (!Subtarget->hasAVX())
return std::make_pair(0, false);
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
if (!Subtarget->hasSSE2())
return std::make_pair(0, false);
}
// SSE2 has only a small subset of the operations.
bool hasUnsigned = Subtarget->hasSSE41() ||
(Subtarget->hasSSE2() && VT == MVT::v16i8);
bool hasSigned = Subtarget->hasSSE41() ||
(Subtarget->hasSSE2() && VT == MVT::v8i16);
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opc = 0;
// Check for x CC y ? x : y.
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
switch (CC) {
default: break;
case ISD::SETULT:
case ISD::SETULE:
Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
case ISD::SETLT:
case ISD::SETLE:
Opc = hasSigned ? X86ISD::SMIN : 0; break;
case ISD::SETGT:
case ISD::SETGE:
Opc = hasSigned ? X86ISD::SMAX : 0; break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
DAG.isEqualTo(RHS, Cond.getOperand(0))) {
switch (CC) {
default: break;
case ISD::SETULT:
case ISD::SETULE:
Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
case ISD::SETLT:
case ISD::SETLE:
Opc = hasSigned ? X86ISD::SMAX : 0; break;
case ISD::SETGT:
case ISD::SETGE:
Opc = hasSigned ? X86ISD::SMIN : 0; break;
}
}
return std::make_pair(Opc, NeedSplit);
}
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
SDValue Cond = N->getOperand(0);
// Get the LHS/RHS of the select.
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
EVT VT = LHS.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
// ignored in unsafe-math mode).
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
VT != MVT::f80 && TLI.isTypeLegal(VT) &&
(Subtarget->hasSSE2() ||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
// Check for x CC y ? x : y.
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
switch (CC) {
default: break;
case ISD::SETULT:
// Converting this to a min would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETOLE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETULE:
// Converting this to a min would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
Opcode = X86ISD::FMIN;
break;
case ISD::SETOGE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGT:
// Converting this to a max would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGE:
// Converting this to a max would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
Opcode = X86ISD::FMAX;
break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
DAG.isEqualTo(RHS, Cond.getOperand(0))) {
switch (CC) {
default: break;
case ISD::SETOGE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGT:
// Converting this to a min would handle NaNs incorrectly.
if (!DAG.getTarget().Options.UnsafeFPMath &&
(!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGE:
// Converting this to a min would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
Opcode = X86ISD::FMIN;
break;
case ISD::SETULT:
// Converting this to a max would handle NaNs incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETOLE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETULE:
// Converting this to a max would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
Opcode = X86ISD::FMAX;
break;
}
}
if (Opcode)
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
EVT CondVT = Cond.getValueType();
if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1) {
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
// lowering on AVX-512. In this case we convert it to
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
// The same situation for all 128 and 256-bit vectors of i8 and i16
EVT OpVT = LHS.getValueType();
if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16)) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
DCI.AddToWorklist(Cond.getNode());
return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
}
}
// If this is a select between two integer constants, try to do some
// optimizations.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
// Don't do this for crazy integer types.
if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
// If this is efficiently invertible, canonicalize the LHSC/RHSC values
// so that TrueC (the true value) is larger than FalseC.
bool NeedsCondInvert = false;
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
// Efficiently invertible.
(Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
(Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
isa<ConstantSDNode>(Cond.getOperand(1))))) {
NeedsCondInvert = true;
std::swap(TrueC, FalseC);
}
// Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
if (FalseC->getAPIntValue() == 0 &&
TrueC->getAPIntValue().isPowerOf2()) {
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
DAG.getConstant(ShAmt, MVT::i8));
}
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
FalseC->getValueType(0), Cond);
return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
}
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
bool isFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
default: break;
case 1: // result = add base, cond
case 2: // result = lea base( , cond*2)
case 3: // result = lea base(cond, cond*2)
case 4: // result = lea base( , cond*4)
case 5: // result = lea base(cond, cond*4)
case 8: // result = lea base( , cond*8)
case 9: // result = lea base(cond, cond*8)
isFastMultiplier = true;
break;
}
}
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
Cond);
// Scale the condition by the difference.
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
DAG.getConstant(Diff, Cond.getValueType()));
// Add the base if non-zero.
if (FalseC->getAPIntValue() != 0)
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
return Cond;
}
}
}
}
// Canonicalize max and min:
// (x > y) ? x : y -> (x >= y) ? x : y
// (x < y) ? x : y -> (x <= y) ? x : y
// This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates
// the need for an extra compare
// against zero. e.g.
// (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0
// subl %esi, %edi
// testl %edi, %edi
// movl $0, %eax
// cmovgl %edi, %eax
// =>
// xorl %eax, %eax
// subl %esi, $edi
// cmovsl %eax, %edi
if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
switch (CC) {
default: break;
case ISD::SETLT:
case ISD::SETGT: {
ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE;
Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(),
Cond.getOperand(0), Cond.getOperand(1), NewCC);
return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS);
}
}
}
// Early exit check
if (!TLI.isTypeLegal(VT))
return SDValue();
// Match VSELECTs into subs with unsigned saturation.
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
// psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
(Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
// Check if one of the arms of the VSELECT is a zero vector. If it's on the
// left side invert the predicate to simplify logic below.
SDValue Other;
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
Other = RHS;
CC = ISD::getSetCCInverse(CC, true);
} else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
Other = LHS;
}
if (Other.getNode() && Other->getNumOperands() == 2 &&
DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
SDValue CondRHS = Cond->getOperand(1);
// Look for a general sub with unsigned saturation first.
// x >= y ? x-y : 0 --> subus x, y
// x > y ? x-y : 0 --> subus x, y
if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
// If the RHS is a constant we have to reverse the const canonicalization.
// x > C-1 ? x+-C : 0 --> subus x, C
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
if (CondRHS.getConstantOperandVal(0) == -A-1)
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
DAG.getConstant(-A, VT));
}
// Another special case: If C was a sign bit, the sub has been
// canonicalized into a xor.
// FIXME: Would it be better to use ComputeMaskedBits to determine whether
// it's safe to decanonicalize the xor?
// x s< 0 ? x^C : 0 --> subus x, C
if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
isSplatVector(OpRHS.getNode())) {
APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
if (A.isSignBit())
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
}
}
}
// Try to match a min/max vector operation.
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) {
std::pair<unsigned, bool> ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget);
unsigned Opc = ret.first;
bool NeedSplit = ret.second;
if (Opc && NeedSplit) {
unsigned NumElems = VT.getVectorNumElements();
// Extract the LHS vectors
SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL);
SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL);
// Extract the RHS vectors
SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL);
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL);
// Create min/max for each subvector
LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1);
RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2);
// Merge the result
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS);
} else if (Opc)
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
// Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
// Check if SETCC has already been promoted
TLI.getSetCCResultType(*DAG.getContext(), VT) == Cond.getValueType()) {
assert(Cond.getValueType().isVector() &&
"vector select expects a vector selector!");
EVT IntVT = Cond.getValueType();
bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
if (!TValIsAllOnes && !FValIsAllZeros) {
// Try invert the condition if true value is not all 1s and false value
// is not all 0s.
bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
if (TValIsAllZeros || FValIsAllOnes) {
SDValue CC = Cond.getOperand(2);
ISD::CondCode NewCC =
ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
Cond.getOperand(0).getValueType().isInteger());
Cond = DAG.getSetCC(DL, IntVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
std::swap(LHS, RHS);
TValIsAllOnes = FValIsAllOnes;
FValIsAllZeros = TValIsAllZeros;
}
}
if (TValIsAllOnes || FValIsAllZeros) {
SDValue Ret;
if (TValIsAllOnes && FValIsAllZeros)
Ret = Cond;
else if (TValIsAllOnes)
Ret = DAG.getNode(ISD::OR, DL, IntVT, Cond,
DAG.getNode(ISD::BITCAST, DL, IntVT, RHS));
else if (FValIsAllZeros)
Ret = DAG.getNode(ISD::AND, DL, IntVT, Cond,
DAG.getNode(ISD::BITCAST, DL, IntVT, LHS));
return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
}
}
// If we know that this node is legal then we know that it is going to be
// matched by one of the SSE/AVX BLEND instructions. These instructions only
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
// to simplify previous instructions.
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) {
unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
// Don't optimize vector selects that map to mask-registers.
if (BitWidth == 1)
return SDValue();
// Check all uses of that condition operand to check whether it will be
// consumed by non-BLEND instructions, which may depend on all bits are set
// properly.
for (SDNode::use_iterator I = Cond->use_begin(),
E = Cond->use_end(); I != E; ++I)
if (I->getOpcode() != ISD::VSELECT)
// TODO: Add other opcodes eventually lowered into BLEND.
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
DCI.isBeforeLegalizeOps());
if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO))
DCI.CommitTargetLoweringOpt(TLO);
}
return SDValue();
}
// Check whether a boolean test is testing a boolean value generated by
// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
// code.
//
// Simplify the following patterns:
// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
// to (Op EFLAGS Cond)
//
// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
// to (Op EFLAGS !Cond)
//
// where Op could be BRCOND or CMOV.
//
static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
// Quit if not CMP and SUB with its value result used.
if (Cmp.getOpcode() != X86ISD::CMP &&
(Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
return SDValue();
// Quit if not used as a boolean value.
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
// Check CMP operands. One of them should be 0 or 1 and the other should be
// an SetCC or extended from it.
SDValue Op1 = Cmp.getOperand(0);
SDValue Op2 = Cmp.getOperand(1);
SDValue SetCC;
const ConstantSDNode* C = 0;
bool needOppositeCond = (CC == X86::COND_E);
bool checkAgainstTrue = false; // Is it a comparison against 1?
if ((C = dyn_cast<ConstantSDNode>(Op1)))
SetCC = Op2;
else if ((C = dyn_cast<ConstantSDNode>(Op2)))
SetCC = Op1;
else // Quit if all operands are not constants.
return SDValue();
if (C->getZExtValue() == 1) {
needOppositeCond = !needOppositeCond;
checkAgainstTrue = true;
} else if (C->getZExtValue() != 0)
// Quit if the constant is neither 0 or 1.
return SDValue();
bool truncatedToBoolWithAnd = false;
// Skip (zext $x), (trunc $x), or (and $x, 1) node.
while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
SetCC.getOpcode() == ISD::TRUNCATE ||
SetCC.getOpcode() == ISD::AND) {
if (SetCC.getOpcode() == ISD::AND) {
int OpIdx = -1;
ConstantSDNode *CS;
if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
CS->getZExtValue() == 1)
OpIdx = 1;
if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
CS->getZExtValue() == 1)
OpIdx = 0;
if (OpIdx == -1)
break;
SetCC = SetCC.getOperand(OpIdx);
truncatedToBoolWithAnd = true;
} else
SetCC = SetCC.getOperand(0);
}
switch (SetCC.getOpcode()) {
case X86ISD::SETCC_CARRY:
// Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
// simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
// i.e. it's a comparison against true but the result of SETCC_CARRY is not
// truncated to i1 using 'and'.
if (checkAgainstTrue && !truncatedToBoolWithAnd)
break;
assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
"Invalid use of SETCC_CARRY!");
// FALL THROUGH
case X86ISD::SETCC:
// Set the condition code or opposite one if necessary.
CC = X86::CondCode(SetCC.getConstantOperandVal(0));
if (needOppositeCond)
CC = X86::GetOppositeBranchCondition(CC);
return SetCC.getOperand(1);
case X86ISD::CMOV: {
// Check whether false/true value has canonical one, i.e. 0 or 1.
ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0));
ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1));
// Quit if true value is not a constant.
if (!TVal)
return SDValue();
// Quit if false value is not a constant.
if (!FVal) {
SDValue Op = SetCC.getOperand(0);
// Skip 'zext' or 'trunc' node.
if (Op.getOpcode() == ISD::ZERO_EXTEND ||
Op.getOpcode() == ISD::TRUNCATE)
Op = Op.getOperand(0);
// A special case for rdrand/rdseed, where 0 is set if false cond is
// found.
if ((Op.getOpcode() != X86ISD::RDRAND &&
Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
return SDValue();
}
// Quit if false value is not the constant 0 or 1.
bool FValIsFalse = true;
if (FVal && FVal->getZExtValue() != 0) {
if (FVal->getZExtValue() != 1)
return SDValue();
// If FVal is 1, opposite cond is needed.
needOppositeCond = !needOppositeCond;
FValIsFalse = false;
}
// Quit if TVal is not the constant opposite of FVal.
if (FValIsFalse && TVal->getZExtValue() != 1)
return SDValue();
if (!FValIsFalse && TVal->getZExtValue() != 0)
return SDValue();
CC = X86::CondCode(SetCC.getConstantOperandVal(2));
if (needOppositeCond)
CC = X86::GetOppositeBranchCondition(CC);
return SetCC.getOperand(3);
}
}
return SDValue();
}
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
// If the flag operand isn't dead, don't touch this CMOV.
if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
return SDValue();
SDValue FalseOp = N->getOperand(0);
SDValue TrueOp = N->getOperand(1);
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
SDValue Cond = N->getOperand(3);
if (CC == X86::COND_E || CC == X86::COND_NE) {
switch (Cond.getOpcode()) {
default: break;
case X86ISD::BSR:
case X86ISD::BSF:
// If operand of BSR / BSF are proven never zero, then ZF cannot be set.
if (DAG.isKnownNeverZero(Cond.getOperand(0)))
return (CC == X86::COND_E) ? FalseOp : TrueOp;
}
}
SDValue Flags;
Flags = checkBoolTestSetCCCombine(Cond, CC);
if (Flags.getNode() &&
// Extra check as FCMOV only supports a subset of X86 cond.
(FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
SDValue Ops[] = { FalseOp, TrueOp,
DAG.getConstant(CC, MVT::i8), Flags };
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
Ops, array_lengthof(Ops));
}
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
// Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
// larger than FalseC (the false value).
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueC, FalseC);
std::swap(TrueOp, FalseOp);
}
// Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
// This is efficient for any integer data type (including i8/i16) and
// shift amount.
if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
DAG.getConstant(ShAmt, MVT::i8));
if (N->getNumValues() == 2) // Dead flag value?
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
// for any integer data type, including i8/i16.
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
FalseC->getValueType(0), Cond);
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
if (N->getNumValues() == 2) // Dead flag value?
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
bool isFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
default: break;
case 1: // result = add base, cond
case 2: // result = lea base( , cond*2)
case 3: // result = lea base(cond, cond*2)
case 4: // result = lea base( , cond*4)
case 5: // result = lea base(cond, cond*4)
case 8: // result = lea base( , cond*8)
case 9: // result = lea base(cond, cond*8)
isFastMultiplier = true;
break;
}
}
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
Cond);
// Scale the condition by the difference.
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
DAG.getConstant(Diff, Cond.getValueType()));
// Add the base if non-zero.
if (FalseC->getAPIntValue() != 0)
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
if (N->getNumValues() == 2) // Dead flag value?
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
}
}
}
// Handle these cases:
// (select (x != c), e, c) -> select (x != c), e, x),
// (select (x == c), c, e) -> select (x == c), x, e)
// where the c is an integer constant, and the "select" is the combination
// of CMOV and CMP.
//
// The rationale for this change is that the conditional-move from a constant
// needs two instructions, however, conditional-move from a register needs
// only one instruction.
//
// CAVEAT: By replacing a constant with a symbolic value, it may obscure
// some instruction-combining opportunities. This opt needs to be
// postponed as late as possible.
//
if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) {
// the DCI.xxxx conditions are provided to postpone the optimization as
// late as possible.
ConstantSDNode *CmpAgainst = 0;
if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
(CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
!isa<ConstantSDNode>(Cond.getOperand(0))) {
if (CC == X86::COND_NE &&
CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueOp, FalseOp);
}
if (CC == X86::COND_E &&
CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
SDValue Ops[] = { FalseOp, Cond.getOperand(0),
DAG.getConstant(CC, MVT::i8), Cond };
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops,
array_lengthof(Ops));
}
}
}
return SDValue();
}
/// PerformMulCombine - Optimize a single multiply with constant into two
/// in order to implement it with two cheaper instructions, e.g.
/// LEA + SHL, LEA + LEA.
static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
EVT VT = N->getValueType(0);
if (VT != MVT::i64)
return SDValue();
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
uint64_t MulAmt = C->getZExtValue();
if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
return SDValue();
uint64_t MulAmt1 = 0;
uint64_t MulAmt2 = 0;
if ((MulAmt % 9) == 0) {
MulAmt1 = 9;
MulAmt2 = MulAmt / 9;
} else if ((MulAmt % 5) == 0) {
MulAmt1 = 5;
MulAmt2 = MulAmt / 5;
} else if ((MulAmt % 3) == 0) {
MulAmt1 = 3;
MulAmt2 = MulAmt / 3;
}
if (MulAmt2 &&
(isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
SDLoc DL(N);
if (isPowerOf2_64(MulAmt2) &&
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
// If second multiplifer is pow2, issue it first. We want the multiply by
// 3, 5, or 9 to be folded into the addressing mode unless the lone use
// is an add.
std::swap(MulAmt1, MulAmt2);
SDValue NewMul;
if (isPowerOf2_64(MulAmt1))
NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
DAG.getConstant(MulAmt1, VT));
if (isPowerOf2_64(MulAmt2))
NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, VT));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, NewMul, false);
}
return SDValue();
}
static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
// since the result of setcc_c is all zero's or all ones.
if (VT.isInteger() && !VT.isVector() &&
N1C && N0.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
((N00.getOpcode() == ISD::ANY_EXTEND ||
N00.getOpcode() == ISD::ZERO_EXTEND) &&
N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
APInt ShAmt = N1C->getAPIntValue();
Mask = Mask.shl(ShAmt);
if (Mask != 0)
return DAG.getNode(ISD::AND, SDLoc(N), VT,
N00, DAG.getConstant(Mask, VT));
}
}
// Hardware support for vector shifts is sparse which makes us scalarize the
// vector operations in many cases. Also, on sandybridge ADD is faster than
// shl.
// (shl V, 1) -> add V,V
if (isSplatVector(N1.getNode())) {
assert(N0.getValueType().isVector() && "Invalid vector shift type");
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(0));
// We shift all of the values by one. In many cases we do not have
// hardware support for this operation. This is better expressed as an ADD
// of two values.
if (N1C && (1 == N1C->getZExtValue())) {
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
}
}
return SDValue();
}
/// \brief Returns a vector of 0s if the node in input is a vector logical
/// shift by a constant amount which is known to be bigger than or equal
/// to the vector element size in bits.
static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
(!Subtarget->hasInt256() ||
(VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
return SDValue();
SDValue Amt = N->getOperand(1);
SDLoc DL(N);
if (isSplatVector(Amt.getNode())) {
SDValue SclrAmt = Amt->getOperand(0);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
APInt ShiftAmt = C->getAPIntValue();
unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
// SSE2/AVX2 logical shifts always return a vector of 0s
// if the shift amount is bigger than or equal to
// the element size. The constant shift amount will be
// encoded as a 8-bit immediate.
if (ShiftAmt.trunc(8).uge(MaxAmount))
return getZeroVector(VT, Subtarget, DAG, DL);
}
}
return SDValue();
}
/// PerformShiftCombine - Combine shifts.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (N->getOpcode() == ISD::SHL) {
SDValue V = PerformSHLCombine(N, DAG);
if (V.getNode()) return V;
}
if (N->getOpcode() != ISD::SRA) {
// Try to fold this logical shift into a zero vector.
SDValue V = performShiftToAllZeros(N, DAG, Subtarget);
if (V.getNode()) return V;
}
return SDValue();
}
// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
// and friends. Likewise for OR -> CMPNEQSS.
static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
unsigned opcode;
// SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
// we're requiring SSE2 for both.
if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CMP0 = N0->getOperand(1);
SDValue CMP1 = N1->getOperand(1);
SDLoc DL(N);
// The SETCCs should both refer to the same CMP.
if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
return SDValue();
SDValue CMP00 = CMP0->getOperand(0);
SDValue CMP01 = CMP0->getOperand(1);
EVT VT = CMP00.getValueType();
if (VT == MVT::f32 || VT == MVT::f64) {
bool ExpectingFlags = false;
// Check for any users that want flags:
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
!ExpectingFlags && UI != UE; ++UI)
switch (UI->getOpcode()) {
default:
case ISD::BR_CC:
case ISD::BRCOND:
case ISD::SELECT:
ExpectingFlags = true;
break;
case ISD::CopyToReg:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
break;
}
if (!ExpectingFlags) {
enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
X86::CondCode tmp = cc0;
cc0 = cc1;
cc1 = tmp;
}
if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
bool is64BitFP = (CMP00.getValueType() == MVT::f64);
X86ISD::NodeType NTOperator = is64BitFP ?
X86ISD::FSETCCsd : X86ISD::FSETCCss;
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
- SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
+ SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, CMP00.getValueType(),
+ CMP00, CMP01,
DAG.getConstant(x86cc, MVT::i8));
- SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
- OnesOrZeroesF);
- SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
- DAG.getConstant(1, MVT::i32));
+
+ MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
+
+ if (is64BitFP && !Subtarget->is64Bit()) {
+ // On a 32-bit target, we cannot bitcast the 64-bit float to a
+ // 64-bit integer, since that's not a legal type. Since
+ // OnesOrZeroesF is all ones of all zeroes, we don't need all the
+ // bits, but can do this little dance to extract the lowest 32 bits
+ // and work with those going forward.
+ SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
+ OnesOrZeroesF);
+ SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32,
+ Vector64);
+ OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
+ Vector32, DAG.getIntPtrConstant(0));
+ IntVT = MVT::i32;
+ }
+
+ SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF);
+ SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
+ DAG.getConstant(1, IntVT));
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
return OneBitOfTruth;
}
}
}
}
return SDValue();
}
/// CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector
/// so it can be folded inside ANDNP.
static bool CanFoldXORWithAllOnes(const SDNode *N) {
EVT VT = N->getValueType(0);
// Match direct AllOnes for 128 and 256-bit vectors
if (ISD::isBuildVectorAllOnes(N))
return true;
// Look through a bit convert.
if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
// Sometimes the operand may come from a insert_subvector building a 256-bit
// allones vector
if (VT.is256BitVector() &&
N->getOpcode() == ISD::INSERT_SUBVECTOR) {
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
if (V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
V1.getOperand(0).getOpcode() == ISD::UNDEF &&
ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) &&
ISD::isBuildVectorAllOnes(V2.getNode()))
return true;
}
return false;
}
// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
// register. In most cases we actually compare or select YMM-sized registers
// and mixing the two types creates horrible code. This method optimizes
// some of the transition sequences.
static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.is256BitVector())
return SDValue();
assert((N->getOpcode() == ISD::ANY_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND ||
N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
SDValue Narrow = N->getOperand(0);
EVT NarrowVT = Narrow->getValueType(0);
if (!NarrowVT.is128BitVector())
return SDValue();
if (Narrow->getOpcode() != ISD::XOR &&
Narrow->getOpcode() != ISD::AND &&
Narrow->getOpcode() != ISD::OR)
return SDValue();
SDValue N0 = Narrow->getOperand(0);
SDValue N1 = Narrow->getOperand(1);
SDLoc DL(Narrow);
// The Left side has to be a trunc.
if (N0.getOpcode() != ISD::TRUNCATE)
return SDValue();
// The type of the truncated inputs.
EVT WideVT = N0->getOperand(0)->getValueType(0);
if (WideVT != VT)
return SDValue();
// The right side has to be a 'trunc' or a constant vector.
bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
bool RHSConst = (isSplatVector(N1.getNode()) &&
isa<ConstantSDNode>(N1->getOperand(0)));
if (!RHSTrunc && !RHSConst)
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
return SDValue();
// Set N0 and N1 to hold the inputs to the new wide operation.
N0 = N0->getOperand(0);
if (RHSConst) {
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
N1->getOperand(0));
SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size());
} else if (RHSTrunc) {
N1 = N1->getOperand(0);
}
// Generate the wide operation.
SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1);
unsigned Opcode = N->getOpcode();
switch (Opcode) {
case ISD::ANY_EXTEND:
return Op;
case ISD::ZERO_EXTEND: {
unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
APInt Mask = APInt::getAllOnesValue(InBits);
Mask = Mask.zext(VT.getScalarType().getSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
Op, DAG.getConstant(Mask, VT));
}
case ISD::SIGN_EXTEND:
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
Op, DAG.getValueType(NarrowVT));
default:
llvm_unreachable("Unexpected opcode");
}
}
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
// Create BLSI, BLSR, and BZHI instructions
// BLSI is X & (-X)
// BLSR is X & (X-1)
// BZHI is X & ((1 << Y) - 1)
// BEXTR is ((X >> imm) & (2**size-1))
if (VT == MVT::i32 || VT == MVT::i64) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
if (Subtarget->hasBMI()) {
// Check LHS for neg
if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
isZero(N0.getOperand(0)))
return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
// Check RHS for neg
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
isZero(N1.getOperand(0)))
return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
// Check LHS for X-1
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
isAllOnes(N0.getOperand(1)))
return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
// Check RHS for X-1
if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
isAllOnes(N1.getOperand(1)))
return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
}
if (Subtarget->hasBMI2()) {
// Check for (and (add (shl 1, Y), -1), X)
if (N0.getOpcode() == ISD::ADD && isAllOnes(N0.getOperand(1))) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::SHL) {
SDValue N001 = N00.getOperand(1);
assert(N001.getValueType() == MVT::i8 && "unexpected type");
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N00.getOperand(0));
if (C && C->getZExtValue() == 1)
return DAG.getNode(X86ISD::BZHI, DL, VT, N1, N001);
}
}
// Check for (and X, (add (shl 1, Y), -1))
if (N1.getOpcode() == ISD::ADD && isAllOnes(N1.getOperand(1))) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == ISD::SHL) {
SDValue N101 = N10.getOperand(1);
assert(N101.getValueType() == MVT::i8 && "unexpected type");
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N10.getOperand(0));
if (C && C->getZExtValue() == 1)
return DAG.getNode(X86ISD::BZHI, DL, VT, N0, N101);
}
}
}
// Check for BEXTR.
if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
(N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (MaskNode && ShiftNode) {
uint64_t Mask = MaskNode->getZExtValue();
uint64_t Shift = ShiftNode->getZExtValue();
if (isMask_64(Mask)) {
uint64_t MaskSize = CountPopulation_64(Mask);
if (Shift + MaskSize <= VT.getSizeInBits())
return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
DAG.getConstant(Shift | (MaskSize << 8), VT));
}
}
} // BEXTR
return SDValue();
}
// Want to form ANDNP nodes:
// 1) In the hopes of then easily combining them with OR and AND nodes
// to form PBLEND/PSIGN.
// 2) To match ANDN packed intrinsics
if (VT != MVT::v2i64 && VT != MVT::v4i64)
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
// Check LHS for vnot
if (N0.getOpcode() == ISD::XOR &&
//ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
CanFoldXORWithAllOnes(N0.getOperand(1).getNode()))
return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
// Check RHS for vnot
if (N1.getOpcode() == ISD::XOR &&
//ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
CanFoldXORWithAllOnes(N1.getOperand(1).getNode()))
return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
return SDValue();
}
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// look for psign/blend
if (VT == MVT::v2i64 || VT == MVT::v4i64) {
if (!Subtarget->hasSSSE3() ||
(VT == MVT::v4i64 && !Subtarget->hasInt256()))
return SDValue();
// Canonicalize pandn to RHS
if (N0.getOpcode() == X86ISD::ANDNP)
std::swap(N0, N1);
// or (and (m, y), (pandn m, x))
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
SDValue Mask = N1.getOperand(0);
SDValue X = N1.getOperand(1);
SDValue Y;
if (N0.getOperand(0) == Mask)
Y = N0.getOperand(1);
if (N0.getOperand(1) == Mask)
Y = N0.getOperand(0);
// Check to see if the mask appeared in both the AND and ANDNP and
if (!Y.getNode())
return SDValue();
// Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
// Look through mask bitcast.
if (Mask.getOpcode() == ISD::BITCAST)
Mask = Mask.getOperand(0);
if (X.getOpcode() == ISD::BITCAST)
X = X.getOperand(0);
if (Y.getOpcode() == ISD::BITCAST)
Y = Y.getOperand(0);
EVT MaskVT = Mask.getValueType();
// Validate that the Mask operand is a vector sra node.
// FIXME: what to do for bytes, since there is a psignb/pblendvb, but
// there is no psrai.b
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
unsigned SraAmt = ~0;
if (Mask.getOpcode() == ISD::SRA) {
SDValue Amt = Mask.getOperand(1);
if (isSplatVector(Amt.getNode())) {
SDValue SclrAmt = Amt->getOperand(0);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
SraAmt = C->getZExtValue();
}
} else if (Mask.getOpcode() == X86ISD::VSRAI) {
SDValue SraC = Mask.getOperand(1);
SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
}
if ((SraAmt + 1) != EltBits)
return SDValue();
SDLoc DL(N);
// Now we know we at least have a plendvb with the mask val. See if
// we can form a psignb/w/d.
// psign = x.type == y.type == mask.type && y = sub(0, x);
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
"Unsupported VT for PSIGN");
Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
// PBLENDVB only available on SSE 4.1
if (!Subtarget->hasSSE41())
return SDValue();
EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
}
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
return SDValue();
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
SDValue ShAmt0 = N0.getOperand(1);
if (ShAmt0.getValueType() != MVT::i8)
return SDValue();
SDValue ShAmt1 = N1.getOperand(1);
if (ShAmt1.getValueType() != MVT::i8)
return SDValue();
if (ShAmt0.getOpcode() == ISD::TRUNCATE)
ShAmt0 = ShAmt0.getOperand(0);
if (ShAmt1.getOpcode() == ISD::TRUNCATE)
ShAmt1 = ShAmt1.getOperand(0);
SDLoc DL(N);
unsigned Opc = X86ISD::SHLD;
SDValue Op0 = N0.getOperand(0);
SDValue Op1 = N1.getOperand(0);
if (ShAmt0.getOpcode() == ISD::SUB) {
Opc = X86ISD::SHRD;
std::swap(Op0, Op1);
std::swap(ShAmt0, ShAmt1);
}
unsigned Bits = VT.getSizeInBits();
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
ShAmt1Op1 = ShAmt1Op1.getOperand(0);
if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
return DAG.getNode(Opc, DL, VT,
Op0, Op1,
DAG.getNode(ISD::TRUNCATE, DL,
MVT::i8, ShAmt0));
}
} else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
if (ShAmt0C &&
ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
return DAG.getNode(Opc, DL, VT,
N0.getOperand(0), N1.getOperand(0),
DAG.getNode(ISD::TRUNCATE, DL,
MVT::i8, ShAmt0));
}
return SDValue();
}
// Generate NEG and CMOV for integer abs.
static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
// Since X86 does not have CMOV for 8-bit integer, we don't convert
// 8-bit integer abs to NEG and CMOV.
if (VT.isInteger() && VT.getSizeInBits() == 8)
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
// Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
// and change it to SUB and CMOV.
if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
N0.getOpcode() == ISD::ADD &&
N0.getOperand(1) == N1 &&
N1.getOpcode() == ISD::SRA &&
N1.getOperand(0) == N0.getOperand(0))
if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) {
// Generate SUB & CMOV.
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
DAG.getConstant(0, VT), N0.getOperand(0));
SDValue Ops[] = { N0.getOperand(0), Neg,
DAG.getConstant(X86::COND_GE, MVT::i8),
SDValue(Neg.getNode(), 1) };
return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue),
Ops, array_lengthof(Ops));
}
return SDValue();
}
// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (Subtarget->hasCMov()) {
SDValue RV = performIntegerAbsCombine(N, DAG);
if (RV.getNode())
return RV;
}
// Try forming BMI if it is available.
if (!Subtarget->hasBMI())
return SDValue();
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
assert(Subtarget->hasBMI() && "Creating BLSMSK requires BMI instructions");
// Create BLSMSK instructions by finding X ^ (X-1)
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
isAllOnes(N0.getOperand(1)))
return DAG.getNode(X86ISD::BLSMSK, DL, VT, N1);
if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
isAllOnes(N1.getOperand(1)))
return DAG.getNode(X86ISD::BLSMSK, DL, VT, N0);
return SDValue();
}
/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
LoadSDNode *Ld = cast<LoadSDNode>(N);
EVT RegVT = Ld->getValueType(0);
EVT MemVT = Ld->getMemoryVT();
SDLoc dl(Ld);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned RegSz = RegVT.getSizeInBits();
// On Sandybridge unaligned 256bit loads are inefficient.
ISD::LoadExtType Ext = Ld->getExtensionType();
unsigned Alignment = Ld->getAlignment();
bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
!DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
unsigned NumElems = RegVT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
SDValue Ptr = Ld->getBasePtr();
SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
NumElems/2);
SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
std::min(16U, Alignment));
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Load1.getValue(1),
Load2.getValue(1));
SDValue NewVec = DAG.getUNDEF(RegVT);
NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl);
NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl);
return DCI.CombineTo(N, NewVec, TF, true);
}
// If this is a vector EXT Load then attempt to optimize it using a
// shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
// expansion is still better than scalar code.
// We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll
// emit a shuffle and a arithmetic shift.
// TODO: It is possible to support ZExt by zeroing the undef values
// during the shuffle phase or after the shuffle.
if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() &&
(Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) {
assert(MemVT != RegVT && "Cannot extend to the same type");
assert(MemVT.isVector() && "Must load a vector from memory");
unsigned NumElems = RegVT.getVectorNumElements();
unsigned MemSz = MemVT.getSizeInBits();
assert(RegSz > MemSz && "Register size must be greater than the mem size");
if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256())
return SDValue();
// All sizes must be a power of two.
if (!isPowerOf2_32(RegSz * MemSz * NumElems))
return SDValue();
// Attempt to load the original value using scalar loads.
// Find the largest scalar type that divides the total loaded size.
MVT SclrLoadTy = MVT::i8;
for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
MVT Tp = (MVT::SimpleValueType)tp;
if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
SclrLoadTy = Tp;
}
}
// On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
(64 <= MemSz))
SclrLoadTy = MVT::f64;
// Calculate the number of scalar loads that we need to perform
// in order to load our vector from memory.
unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
if (Ext == ISD::SEXTLOAD && NumLoads > 1)
return SDValue();
unsigned loadRegZize = RegSz;
if (Ext == ISD::SEXTLOAD && RegSz == 256)
loadRegZize /= 2;
// Represent our vector as a sequence of elements which are the
// largest scalar that we can load.
EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
loadRegZize/SclrLoadTy.getSizeInBits());
// Represent the data using the same element type that is stored in
// memory. In practice, we ''widen'' MemVT.
EVT WideVecVT =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
loadRegZize/MemVT.getScalarType().getSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
"Invalid vector type");
// We can't shuffle using an illegal type.
if (!TLI.isTypeLegal(WideVecVT))
return SDValue();
SmallVector<SDValue, 8> Chains;
SDValue Ptr = Ld->getBasePtr();
SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits()/8,
TLI.getPointerTy());
SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
for (unsigned i = 0; i < NumLoads; ++i) {
// Perform a single load.
SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
Ptr, Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->isInvariant(), Ld->getAlignment());
Chains.push_back(ScalarLoad.getValue(1));
// Create the first element type using SCALAR_TO_VECTOR in order to avoid
// another round of DAGCombining.
if (i == 0)
Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
else
Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
ScalarLoad, DAG.getIntPtrConstant(i));
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
Chains.size());
// Bitcast the loaded value to a vector of the original element type, in
// the size of the target vector type.
SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
unsigned SizeRatio = RegSz/MemSz;
if (Ext == ISD::SEXTLOAD) {
// If we have SSE4.1 we can directly emit a VSEXT node.
if (Subtarget->hasSSE41()) {
SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
return DCI.CombineTo(N, Sext, TF, true);
}
// Otherwise we'll shuffle the small elements in the high bits of the
// larger type and perform an arithmetic shift. If the shift is not legal
// it's better to scalarize.
if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT))
return SDValue();
// Redistribute the loaded elements into the different locations.
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i*SizeRatio + SizeRatio-1] = i;
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
DAG.getUNDEF(WideVecVT),
&ShuffleVec[0]);
Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
// Build the arithmetic shift.
unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
MemVT.getVectorElementType().getSizeInBits();
Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
DAG.getConstant(Amt, RegVT));
return DCI.CombineTo(N, Shuff, TF, true);
}
// Redistribute the loaded elements into the different locations.
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i*SizeRatio] = i;
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
DAG.getUNDEF(WideVecVT),
&ShuffleVec[0]);
// Bitcast to the requested type.
Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
// Replace the original load with the new sequence
// and return the new chain.
return DCI.CombineTo(N, Shuff, TF, true);
}
return SDValue();
}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
StoreSDNode *St = cast<StoreSDNode>(N);
EVT VT = St->getValue().getValueType();
EVT StVT = St->getMemoryVT();
SDLoc dl(St);
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we are saving a concatenation of two XMM registers, perform two stores.
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
unsigned Alignment = St->getAlignment();
bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
if (VT.is256BitVector() && !Subtarget->hasInt256() &&
StVT == VT && !IsAligned) {
unsigned NumElems = VT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
SDValue Ptr0 = St->getBasePtr();
SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), Alignment);
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(),
std::min(16U, Alignment));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
// Optimize trunc store (of multiple scalars) to shuffle and store.
// First, pack all of the elements in one place. Next, store to memory
// in fewer chunks.
if (St->isTruncatingStore() && VT.isVector()) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
// From, To sizes and ElemCount must be pow of two
if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
// We are going to use the original vector elt for storing.
// Accumulated smaller vector elements must be a multiple of the store size.
if (0 != (NumElems * FromSz) % ToSz) return SDValue();
unsigned SizeRatio = FromSz / ToSz;
assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
// Create a type on which we perform the shuffle
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
StVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
if (!TLI.isTypeLegal(WideVecVT))
return SDValue();
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
DAG.getUNDEF(WideVecVT),
&ShuffleVec[0]);
// At this point all of the data is stored at the bottom of the
// register. We now need to save it to mem.
// Find the largest store unit
MVT StoreType = MVT::i8;
for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
MVT Tp = (MVT::SimpleValueType)tp;
if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz)
StoreType = Tp;
}
// On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 &&
(64 <= NumElems * ToSz))
StoreType = MVT::f64;
// Bitcast the original vector into a vector of store-size units
EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
TLI.getPointerTy());
SDValue Ptr = St->getBasePtr();
// Perform one or more big stores into memory.
for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
StoreType, ShuffWide,
DAG.getIntPtrConstant(i));
SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr,
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), St->getAlignment());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
Chains.push_back(Ch);
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
Chains.size());
}
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
// places to insert EMMS. This qualifies as a quick hack.
// Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
if (VT.getSizeInBits() != 64)
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||
(VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
isa<LoadSDNode>(St->getValue()) &&
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
St->getChain().hasOneUse() && !St->isVolatile()) {
SDNode* LdVal = St->getValue().getNode();
LoadSDNode *Ld = 0;
int TokenFactorIndex = -1;
SmallVector<SDValue, 8> Ops;
SDNode* ChainVal = St->getChain().getNode();
// Must be a store of a load. We currently handle two cases: the load
// is a direct child, and it's under an intervening TokenFactor. It is
// possible to dig deeper under nested TokenFactors.
if (ChainVal == LdVal)
Ld = cast<LoadSDNode>(St->getChain());
else if (St->getValue().hasOneUse() &&
ChainVal->getOpcode() == ISD::TokenFactor) {
for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
if (ChainVal->getOperand(i).getNode() == LdVal) {
TokenFactorIndex = i;
Ld = cast<LoadSDNode>(St->getValue());
} else
Ops.push_back(ChainVal->getOperand(i));
}
}
if (!Ld || !ISD::isNormalLoad(Ld))
return SDValue();
// If this is not the MMX case, i.e. we are just turning i64 load/store
// into f64 load/store, avoid the transformation if there are multiple
// uses of the loaded value.
if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
return SDValue();
SDLoc LdDL(Ld);
SDLoc StDL(N);
// If we are a 64-bit capable x86, lower to a single movq load/store pair.
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
if (Subtarget->is64Bit() || F64IsLegal) {
EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
Ld->getAlignment());
SDValue NewChain = NewLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(NewChain);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
Ops.size());
}
return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(),
St->getAlignment());
}
// Otherwise, lower to two pairs of 32-bit loads / stores.
SDValue LoAddr = Ld->getBasePtr();
SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
DAG.getConstant(4, MVT::i32));
SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->isInvariant(), Ld->getAlignment());
SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->isInvariant(),
MinAlign(Ld->getAlignment(), 4));
SDValue NewChain = LoLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(LoLd);
Ops.push_back(HiLd);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
Ops.size());
}
LoAddr = St->getBasePtr();
HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
DAG.getConstant(4, MVT::i32));
SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(),
St->getAlignment());
SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
St->getPointerInfo().getWithOffset(4),
St->isVolatile(),
St->isNonTemporal(),
MinAlign(St->getAlignment(), 4));
return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
}
return SDValue();
}
/// isHorizontalBinOp - Return 'true' if this vector operation is "horizontal"
/// and return the operands for the horizontal operation in LHS and RHS. A
/// horizontal operation performs the binary operation on successive elements
/// of its first operand, then on successive elements of its second operand,
/// returning the resulting values in a vector. For example, if
/// A = < float a0, float a1, float a2, float a3 >
/// and
/// B = < float b0, float b1, float b2, float b3 >
/// then the result of doing a horizontal operation on A and B is
/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
/// A horizontal-op B, for some already available A and B, and if so then LHS is
/// set to A, RHS to B, and the routine returns 'true'.
/// Note that the binary operation should have the property that if one of the
/// operands is UNDEF then the result is UNDEF.
static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
// Look for the following pattern: if
// A = < float a0, float a1, float a2, float a3 >
// B = < float b0, float b1, float b2, float b3 >
// and
// LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6>
// RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7>
// then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
// which is A horizontal-op B.
// At least one of the operands should be a vector shuffle.
if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
return false;
MVT VT = LHS.getSimpleValueType();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for horizontal add/sub");
// Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
// operate independently on 128-bit lanes.
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts / NumLanes;
assert((NumLaneElts % 2 == 0) &&
"Vector type should have an even number of elements in each lane");
unsigned HalfLaneElts = NumLaneElts/2;
// View LHS in the form
// LHS = VECTOR_SHUFFLE A, B, LMask
// If LHS is not a shuffle then pretend it is the shuffle
// LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
// NOTE: in what follows a default initialized SDValue represents an UNDEF of
// type VT.
SDValue A, B;
SmallVector<int, 16> LMask(NumElts);
if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
A = LHS.getOperand(0);
if (LHS.getOperand(1).getOpcode() != ISD::UNDEF)
B = LHS.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
std::copy(Mask.begin(), Mask.end(), LMask.begin());
} else {
if (LHS.getOpcode() != ISD::UNDEF)
A = LHS;
for (unsigned i = 0; i != NumElts; ++i)
LMask[i] = i;
}
// Likewise, view RHS in the form
// RHS = VECTOR_SHUFFLE C, D, RMask
SDValue C, D;
SmallVector<int, 16> RMask(NumElts);
if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
C = RHS.getOperand(0);
if (RHS.getOperand(1).getOpcode() != ISD::UNDEF)
D = RHS.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
std::copy(Mask.begin(), Mask.end(), RMask.begin());
} else {
if (RHS.getOpcode() != ISD::UNDEF)
C = RHS;
for (unsigned i = 0; i != NumElts; ++i)
RMask[i] = i;
}
// Check that the shuffles are both shuffling the same vectors.
if (!(A == C && B == D) && !(A == D && B == C))
return false;
// If everything is UNDEF then bail out: it would be better to fold to UNDEF.
if (!A.getNode() && !B.getNode())
return false;
// If A and B occur in reverse order in RHS, then "swap" them (which means
// rewriting the mask).
if (A != C)
CommuteVectorShuffleMask(RMask, NumElts);
// At this point LHS and RHS are equivalent to
// LHS = VECTOR_SHUFFLE A, B, LMask
// RHS = VECTOR_SHUFFLE A, B, RMask
// Check that the masks correspond to performing a horizontal operation.
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
int LIdx = LMask[i+l], RIdx = RMask[i+l];
// Ignore any UNDEF components.
if (LIdx < 0 || RIdx < 0 ||
(!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
(!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
continue;
// Check that successive elements are being operated on. If not, this is
// not a horizontal operation.
unsigned Src = (i/HalfLaneElts); // each lane is split between srcs
int Index = 2*(i%HalfLaneElts) + NumElts*Src + l;
if (!(LIdx == Index && RIdx == Index + 1) &&
!(IsCommutative && LIdx == Index + 1 && RIdx == Index))
return false;
}
}
LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
return true;
}
/// PerformFADDCombine - Do target-specific dag combines on floating point adds.
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, true))
return DAG.getNode(X86ISD::FHADD, SDLoc(N), VT, LHS, RHS);
return SDValue();
}
/// PerformFSUBCombine - Do target-specific dag combines on floating point subs.
static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal subs from subs of shuffles.
if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, false))
return DAG.getNode(X86ISD::FHSUB, SDLoc(N), VT, LHS, RHS);
return SDValue();
}
/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
/// X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
// F[X]OR(0.0, x) -> x
// F[X]OR(x, 0.0) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
return SDValue();
}
/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and
/// X86ISD::FMAX nodes.
static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
// Only perform optimizations if UnsafeMath is used.
if (!DAG.getTarget().Options.UnsafeFPMath)
return SDValue();
// If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
// into FMINC and FMAXC, which are Commutative operations.
unsigned NewOp = 0;
switch (N->getOpcode()) {
default: llvm_unreachable("unknown opcode");
case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
}
return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),
N->getOperand(0), N->getOperand(1));
}
/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
// FAND(x, 0.0) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
return SDValue();
}
/// PerformFANDNCombine - Do target-specific dag combines on X86ISD::FANDN nodes
static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
// FANDN(x, 0.0) -> 0.0
// FANDN(0.0, x) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
return SDValue();
}
static SDValue PerformBTCombine(SDNode *N,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
// BT ignores high bits in the bit index operand.
SDValue Op1 = N->getOperand(1);
if (Op1.hasOneUse()) {
unsigned BitWidth = Op1.getValueSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
DCI.CommitTargetLoweringOpt(TLO);
}
return SDValue();
}
static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Op = N->getOperand(0);
if (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
EVT VT = N->getValueType(0), OpVT = Op.getValueType();
if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
VT.getVectorElementType().getSizeInBits() ==
OpVT.getVectorElementType().getSizeInBits()) {
return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
return SDValue();
}
static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.isVector())
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
SDLoc dl(N);
// The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
// both SSE and AVX2 since there is no sign-extended shift right
// operation on a vector with 64-bit elements.
//(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
// (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND)) {
SDValue N00 = N0.getOperand(0);
// EXTLOAD has a better solution on AVX2,
// it may be replaced with X86ISD::VSEXT node.
if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
if (!ISD::isNormalLoad(N00.getNode()))
return SDValue();
if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
N00, N1);
return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
}
}
return SDValue();
}
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (!DCI.isBeforeLegalizeOps())
return SDValue();
if (!Subtarget->hasFp256())
return SDValue();
EVT VT = N->getValueType(0);
if (VT.isVector() && VT.getSizeInBits() == 256) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
}
return SDValue();
}
static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget* Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
EVT ScalarVT = VT.getScalarType();
if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
(!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
return SDValue();
SDValue A = N->getOperand(0);
SDValue B = N->getOperand(1);
SDValue C = N->getOperand(2);
bool NegA = (A.getOpcode() == ISD::FNEG);
bool NegB = (B.getOpcode() == ISD::FNEG);
bool NegC = (C.getOpcode() == ISD::FNEG);
// Negative multiplication when NegA xor NegB
bool NegMul = (NegA != NegB);
if (NegA)
A = A.getOperand(0);
if (NegB)
B = B.getOperand(0);
if (NegC)
C = C.getOperand(0);
unsigned Opcode;
if (!NegMul)
Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
else
Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
return DAG.getNode(Opcode, dl, VT, A, B, C);
}
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
// (and (i32 x86isd::setcc_carry), 1)
// This eliminates the zext. This transformation is necessary because
// ISD::SETCC is always legalized to i8.
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (N0.getOpcode() == ISD::AND &&
N0.hasOneUse() &&
N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 1)
return SDValue();
return DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
N00.getOperand(0), N00.getOperand(1)),
DAG.getConstant(1, VT));
}
}
if (VT.is256BitVector()) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
}
return SDValue();
}
// Optimize x == -y --> x+y == 0
// x != -y --> x+y != 0
static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
LHS.getValueType(), RHS, LHS.getOperand(1));
return DAG.getSetCC(SDLoc(N), N->getValueType(0),
addV, DAG.getConstant(0, addV.getValueType()), CC);
}
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
RHS.getValueType(), LHS, RHS.getOperand(1));
return DAG.getSetCC(SDLoc(N), N->getValueType(0),
addV, DAG.getConstant(0, addV.getValueType()), CC);
}
return SDValue();
}
// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, DL, MVT::i8,
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS),
DAG.getConstant(1, MVT::i8));
}
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
SDValue EFLAGS = N->getOperand(1);
if (CC == X86::COND_A) {
// Try to convert COND_A into COND_B in an attempt to facilitate
// materializing "setb reg".
//
// Do not flip "e > c", where "c" is a constant, because Cmp instruction
// cannot take an immediate as its first operand.
//
if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
EFLAGS.getNode()->getVTList(),
EFLAGS.getOperand(1), EFLAGS.getOperand(0));
SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
return MaterializeSETB(DL, NewEFLAGS, DAG);
}
}
// Materialize "setb reg" as "sbb reg,reg", since it can be extended without
// a zext and produces an all-ones bit which is more useful than 0/1 in some
// cases.
if (CC == X86::COND_B)
return MaterializeSETB(DL, EFLAGS, DAG);
SDValue Flags;
Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
if (Flags.getNode()) {
SDValue Cond = DAG.getConstant(CC, MVT::i8);
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
}
return SDValue();
}
// Optimize branch condition evaluation.
//
static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Dest = N->getOperand(1);
SDValue EFLAGS = N->getOperand(3);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
SDValue Flags;
Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
if (Flags.getNode()) {
SDValue Cond = DAG.getConstant(CC, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
Flags);
}
return SDValue();
}
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
const X86TargetLowering *XTLI) {
SDValue Op0 = N->getOperand(0);
EVT InVT = Op0->getValueType(0);
// SINT_TO_FP(v4i8) -> SINT_TO_FP(SEXT(v4i8 to v4i32))
if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
SDLoc dl(N);
MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
}
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
if (Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT VT = Ld->getValueType(0);
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!XTLI->getSubtarget()->is64Bit() &&
VT == MVT::i64) {
SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
return FILDChain;
}
}
return SDValue();
}
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
// If the LHS and RHS of the ADC node are zero, then it can't overflow and
// the result is either zero or one (depending on the input carry bit).
// Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
if (X86::isZeroNode(N->getOperand(0)) &&
X86::isZeroNode(N->getOperand(1)) &&
// We don't have a good way to replace an EFLAGS use, so only do this when
// dead right now.
SDValue(N, 1).use_empty()) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
DAG.getConstant(X86::COND_B,MVT::i8),
N->getOperand(2)),
DAG.getConstant(1, VT));
return DCI.CombineTo(N, Res1, CarryOut);
}
return SDValue();
}
// fold (add Y, (sete X, 0)) -> adc 0, Y
// (add Y, (setne X, 0)) -> sbb -1, Y
// (sub (sete X, 0), Y) -> sbb 0, Y
// (sub (setne X, 0), Y) -> adc -1, Y
static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
// Look through ZExts.
SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
return SDValue();
SDValue SetCC = Ext.getOperand(0);
if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
return SDValue();
X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
SDValue Cmp = SetCC.getOperand(1);
if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
!X86::isZeroNode(Cmp.getOperand(1)) ||
!Cmp.getOperand(0).getValueType().isInteger())
return SDValue();
SDValue CmpOp0 = Cmp.getOperand(0);
SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
DAG.getConstant(1, CmpOp0.getValueType()));
SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
if (CC == X86::COND_NE)
return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
DL, OtherVal.getValueType(), OtherVal,
DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
DL, OtherVal.getValueType(), OtherVal,
DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
}
/// PerformADDCombine - Do target-specific dag combines on integer adds.
static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
(Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
return OptimizeConditionalInDecrement(N, DAG);
}
static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// X86 can't encode an immediate LHS of a sub. See if we can push the
// negation into a preceding instruction.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
// If the RHS of the sub is a XOR with one use and a constant, invert the
// immediate. Then add one to the LHS of the sub so we can turn
// X-Y -> X+~Y+1, saving one register.
if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
isa<ConstantSDNode>(Op1.getOperand(1))) {
APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
EVT VT = Op0.getValueType();
SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT,
Op1.getOperand(0),
DAG.getConstant(~XorC, VT));
return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor,
DAG.getConstant(C->getAPIntValue()+1, VT));
}
}
// Try to synthesize horizontal adds from adds of shuffles.
EVT VT = N->getValueType(0);
if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
(Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
return OptimizeConditionalInDecrement(N, DAG);
}
/// performVZEXTCombine - Performs build vector combines
static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
// (vzext (bitcast (vzext (x)) -> (vzext x)
SDValue In = N->getOperand(0);
while (In.getOpcode() == ISD::BITCAST)
In = In.getOperand(0);
if (In.getOpcode() != X86ISD::VZEXT)
return SDValue();
return DAG.getNode(X86ISD::VZEXT, SDLoc(N), N->getValueType(0),
In.getOperand(0));
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
case ISD::VSELECT:
case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, DCI, Subtarget);
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FMIN:
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::FANDN: return PerformFANDNCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGNR:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::VPERMILP:
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
}
return SDValue();
}
/// isTypeDesirableForOp - Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
/// instruction encodings are longer and some i16 instructions are slow.
bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (!isTypeLegal(VT))
return false;
if (VT != MVT::i16)
return true;
switch (Opc) {
default:
return true;
case ISD::LOAD:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::SHL:
case ISD::SRL:
case ISD::SUB:
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
return false;
}
}
/// IsDesirableToPromoteOp - This method query the target whether it is
/// beneficial for dag combiner to promote the specified node. If true, it
/// should return the desired promotion type by reference.
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
EVT VT = Op.getValueType();
if (VT != MVT::i16)
return false;
bool Promote = false;
bool Commute = false;
switch (Op.getOpcode()) {
default: break;
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
// If the non-extending load has a single use and it's not live out, then it
// might be folded.
if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
Op.hasOneUse()*/) {
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI) {
// The only case where we'd want to promote LOAD (rather then it being
// promoted as an operand is when it's only use is liveout.
if (UI->getOpcode() != ISD::CopyToReg)
return false;
}
}
Promote = true;
break;
}
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
Promote = true;
break;
case ISD::SHL:
case ISD::SRL: {
SDValue N0 = Op.getOperand(0);
// Look out for (store (shl (load), x)).
if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
return false;
Promote = true;
break;
}
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
Commute = true;
// fallthrough
case ISD::SUB: {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
if (!Commute && MayFoldLoad(N1))
return false;
// Avoid disabling potential load folding opportunities.
if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
return false;
if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
return false;
Promote = true;
}
}
PVT = MVT::i32;
return Promote;
}
//===----------------------------------------------------------------------===//
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
namespace {
// Helper to match a string separated by whitespace.
bool matchAsmImpl(StringRef s, ArrayRef<const StringRef *> args) {
s = s.substr(s.find_first_not_of(" \t")); // Skip leading whitespace.
for (unsigned i = 0, e = args.size(); i != e; ++i) {
StringRef piece(*args[i]);
if (!s.startswith(piece)) // Check if the piece matches.
return false;
s = s.substr(piece.size());
StringRef::size_type pos = s.find_first_not_of(" \t");
if (pos == 0) // We matched a prefix.
return false;
s = s.substr(pos);
}
return s.empty();
}
const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
}
static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") &&
std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") &&
std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) {
if (AsmPieces.size() == 3)
return true;
else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}"))
return true;
}
}
return false;
}
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
std::string AsmStr = IA->getAsmString();
IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
// TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
SmallVector<StringRef, 4> AsmPieces;
SplitString(AsmStr, AsmPieces, ";\n");
switch (AsmPieces.size()) {
default: return false;
case 1:
// FIXME: this should verify that we are targeting a 486 or better. If not,
// we will turn this bswap into something that will be lowered to logical
// ops instead of emitting the bswap asm. For now, we don't support 486 or
// lower so don't worry about this.
// bswap $0
if (matchAsm(AsmPieces[0], "bswap", "$0") ||
matchAsm(AsmPieces[0], "bswapl", "$0") ||
matchAsm(AsmPieces[0], "bswapq", "$0") ||
matchAsm(AsmPieces[0], "bswap", "${0:q}") ||
matchAsm(AsmPieces[0], "bswapl", "${0:q}") ||
matchAsm(AsmPieces[0], "bswapq", "${0:q}")) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
return IntrinsicLowering::LowerToByteSwap(CI);
}
// rorw $$8, ${0:w} --> llvm.bswap.i16
if (CI->getType()->isIntegerTy(16) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
(matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") ||
matchAsm(AsmPieces[0], "rolw", "$$8,", "${0:w}"))) {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (clobbersFlagRegisters(AsmPieces))
return IntrinsicLowering::LowerToByteSwap(CI);
}
break;
case 3:
if (CI->getType()->isIntegerTy(32) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") &&
matchAsm(AsmPieces[1], "rorl", "$$16,", "$0") &&
matchAsm(AsmPieces[2], "rorw", "$$8,", "${0:w}")) {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (clobbersFlagRegisters(AsmPieces))
return IntrinsicLowering::LowerToByteSwap(CI);
}
if (CI->getType()->isIntegerTy(64)) {
InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
if (Constraints.size() >= 2 &&
Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
// bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
if (matchAsm(AsmPieces[0], "bswap", "%eax") &&
matchAsm(AsmPieces[1], "bswap", "%edx") &&
matchAsm(AsmPieces[2], "xchgl", "%eax,", "%edx"))
return IntrinsicLowering::LowerToByteSwap(CI);
}
}
break;
}
return false;
}
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
X86TargetLowering::ConstraintType
X86TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'R':
case 'q':
case 'Q':
case 'f':
case 't':
case 'u':
case 'y':
case 'x':
case 'Y':
case 'l':
return C_RegisterClass;
case 'a':
case 'b':
case 'c':
case 'd':
case 'S':
case 'D':
case 'A':
return C_Register;
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'G':
case 'C':
case 'e':
case 'Z':
return C_Other;
default:
break;
}
}
return TargetLowering::getConstraintType(Constraint);
}
/// Examine constraint type and operand type and determine a weight value.
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
TargetLowering::ConstraintWeight
X86TargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
case 'R':
case 'q':
case 'Q':
case 'a':
case 'b':
case 'c':
case 'd':
case 'S':
case 'D':
case 'A':
if (CallOperandVal->getType()->isIntegerTy())
weight = CW_SpecificReg;
break;
case 'f':
case 't':
case 'u':
if (type->isFloatingPointTy())
weight = CW_SpecificReg;
break;
case 'y':
if (type->isX86_MMXTy() && Subtarget->hasMMX())
weight = CW_SpecificReg;
break;
case 'x':
case 'Y':
if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256()))
weight = CW_Register;
break;
case 'I':
if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
if (C->getZExtValue() <= 31)
weight = CW_Constant;
}
break;
case 'J':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 63)
weight = CW_Constant;
}
break;
case 'K':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
weight = CW_Constant;
}
break;
case 'L':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
weight = CW_Constant;
}
break;
case 'M':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 3)
weight = CW_Constant;
}
break;
case 'N':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 0xff)
weight = CW_Constant;
}
break;
case 'G':
case 'C':
if (dyn_cast<ConstantFP>(CallOperandVal)) {
weight = CW_Constant;
}
break;
case 'e':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getSExtValue() >= -0x80000000LL) &&
(C->getSExtValue() <= 0x7fffffffLL))
weight = CW_Constant;
}
break;
case 'Z':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 0xffffffff)
weight = CW_Constant;
}
break;
}
return weight;
}
/// LowerXConstraint - try to replace an X constraint, which matches anything,
/// with another that has more specific requirements based on the type of the
/// corresponding operand.
const char *X86TargetLowering::
LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
if (Subtarget->hasSSE2())
return "Y";
if (Subtarget->hasSSE1())
return "x";
}
return TargetLowering::LowerXConstraint(ConstraintVT);
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
// Only support length 1 constraints for now.
if (Constraint.length() > 1) return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default: break;
case 'I':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 31) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
case 'J':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 63) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
case 'K':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (isInt<8>(C->getSExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
case 'N':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 255) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
case 'e': {
// 32-bit signed value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
C->getSExtValue())) {
// Widen to 64 bits here to get it sign extended.
Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
break;
}
// FIXME gcc accepts some relocatable values here too, but only in certain
// memory models; it's complicated.
}
return;
}
case 'Z': {
// 32-bit unsigned value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
C->getZExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
// FIXME gcc accepts some relocatable values here too, but only in certain
// memory models; it's complicated.
return;
}
case 'i': {
// Literal immediates are always ok.
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
// Widen to 64 bits here to get it sign extended.
Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
break;
}
// In any sort of PIC mode addresses need to be computed at runtime by
// adding in a register or some sort of table lookup. These can't
// be used as immediates.
if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
return;
// If we are in non-pic codegen mode, we allow the address of a global (with
// an optional displacement) to be used with 'i'.
GlobalAddressSDNode *GA = 0;
int64_t Offset = 0;
// Match either (GA), (GA+C), (GA+C1+C2), etc.
while (1) {
if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
Offset += GA->getOffset();
break;
} else if (Op.getOpcode() == ISD::ADD) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
Offset += C->getZExtValue();
Op = Op.getOperand(0);
continue;
}
} else if (Op.getOpcode() == ISD::SUB) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
Offset += -C->getZExtValue();
Op = Op.getOperand(0);
continue;
}
}
// Otherwise, this isn't something we can handle, reject it.
return;
}
const GlobalValue *GV = GA->getGlobal();
// If we require an extra load to get this address, as in PIC mode, we
// can't accept it.
if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
getTargetMachine())))
return;
Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
GA->getValueType(0), Offset);
break;
}
}
if (Result.getNode()) {
Ops.push_back(Result);
return;
}
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
std::pair<unsigned, const TargetRegisterClass*>
X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const {
// First, see if this is a constraint that directly corresponds to an LLVM
// register class.
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
default: break;
// TODO: Slight differences here in allocation order and leaving
// RIP in the class. Do they matter any more here than they do
// in the normal allocation?
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
if (Subtarget->is64Bit()) {
if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, &X86::GR32RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16RegClass);
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8RegClass);
if (VT == MVT::i64 || VT == MVT::f64)
return std::make_pair(0U, &X86::GR64RegClass);
break;
}
// 32-bit fallthrough
case 'Q': // Q_REGS
if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, &X86::GR32_ABCDRegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_ABCDRegClass);
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
if (VT == MVT::i64)
return std::make_pair(0U, &X86::GR64_ABCDRegClass);
break;
case 'r': // GENERAL_REGS
case 'l': // INDEX_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16RegClass);
if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
return std::make_pair(0U, &X86::GR32RegClass);
return std::make_pair(0U, &X86::GR64RegClass);
case 'R': // LEGACY_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_NOREXRegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_NOREXRegClass);
if (VT == MVT::i32 || !Subtarget->is64Bit())
return std::make_pair(0U, &X86::GR32_NOREXRegClass);
return std::make_pair(0U, &X86::GR64_NOREXRegClass);
case 'f': // FP Stack registers.
// If SSE is enabled for this VT, use f80 to ensure the isel moves the
// value to the correct fpstack register class.
if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
return std::make_pair(0U, &X86::RFP32RegClass);
if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
return std::make_pair(0U, &X86::RFP64RegClass);
return std::make_pair(0U, &X86::RFP80RegClass);
case 'y': // MMX_REGS if MMX allowed.
if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, &X86::VR64RegClass);
case 'Y': // SSE_REGS if SSE2 allowed
if (!Subtarget->hasSSE2()) break;
// FALL THROUGH.
case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
if (!Subtarget->hasSSE1()) break;
switch (VT.SimpleTy) {
default: break;
// Scalar SSE types.
case MVT::f32:
case MVT::i32:
return std::make_pair(0U, &X86::FR32RegClass);
case MVT::f64:
case MVT::i64:
return std::make_pair(0U, &X86::FR64RegClass);
// Vector types.
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
return std::make_pair(0U, &X86::VR128RegClass);
// AVX types.
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
case MVT::v4i64:
case MVT::v8f32:
case MVT::v4f64:
return std::make_pair(0U, &X86::VR256RegClass);
case MVT::v8f64:
case MVT::v16f32:
case MVT::v16i32:
case MVT::v8i64:
return std::make_pair(0U, &X86::VR512RegClass);
}
break;
}
}
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass*> Res;
Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
// Not found as a standard register?
if (Res.second == 0) {
// Map st(0) -> st(7) -> ST0
if (Constraint.size() == 7 && Constraint[0] == '{' &&
tolower(Constraint[1]) == 's' &&
tolower(Constraint[2]) == 't' &&
Constraint[3] == '(' &&
(Constraint[4] >= '0' && Constraint[4] <= '7') &&
Constraint[5] == ')' &&
Constraint[6] == '}') {
Res.first = X86::ST0+Constraint[4]-'0';
Res.second = &X86::RFP80RegClass;
return Res;
}
// GCC allows "st(0)" to be called just plain "st".
if (StringRef("{st}").equals_lower(Constraint)) {
Res.first = X86::ST0;
Res.second = &X86::RFP80RegClass;
return Res;
}
// flags -> EFLAGS
if (StringRef("{flags}").equals_lower(Constraint)) {
Res.first = X86::EFLAGS;
Res.second = &X86::CCRRegClass;
return Res;
}
// 'A' means EAX + EDX.
if (Constraint == "A") {
Res.first = X86::EAX;
Res.second = &X86::GR32_ADRegClass;
return Res;
}
return Res;
}
// Otherwise, check to see if this is a register class of the wrong value
// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
// turn into {ax},{dx}.
if (Res.second->hasType(VT))
return Res; // Correct type already, nothing to do.
// All of the single-register GCC register classes map their values onto
// 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
// really want an 8-bit or 32-bit register, map to the appropriate register
// class and return the appropriate register.
if (Res.second == &X86::GR16RegClass) {
if (VT == MVT::i8 || VT == MVT::i1) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
case X86::AX: DestReg = X86::AL; break;
case X86::DX: DestReg = X86::DL; break;
case X86::CX: DestReg = X86::CL; break;
case X86::BX: DestReg = X86::BL; break;
}
if (DestReg) {
Res.first = DestReg;
Res.second = &X86::GR8RegClass;
}
} else if (VT == MVT::i32 || VT == MVT::f32) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
case X86::AX: DestReg = X86::EAX; break;
case X86::DX: DestReg = X86::EDX; break;
case X86::CX: DestReg = X86::ECX; break;
case X86::BX: DestReg = X86::EBX; break;
case X86::SI: DestReg = X86::ESI; break;
case X86::DI: DestReg = X86::EDI; break;
case X86::BP: DestReg = X86::EBP; break;
case X86::SP: DestReg = X86::ESP; break;
}
if (DestReg) {
Res.first = DestReg;
Res.second = &X86::GR32RegClass;
}
} else if (VT == MVT::i64 || VT == MVT::f64) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
case X86::AX: DestReg = X86::RAX; break;
case X86::DX: DestReg = X86::RDX; break;
case X86::CX: DestReg = X86::RCX; break;
case X86::BX: DestReg = X86::RBX; break;
case X86::SI: DestReg = X86::RSI; break;
case X86::DI: DestReg = X86::RDI; break;
case X86::BP: DestReg = X86::RBP; break;
case X86::SP: DestReg = X86::RSP; break;
}
if (DestReg) {
Res.first = DestReg;
Res.second = &X86::GR64RegClass;
}
}
} else if (Res.second == &X86::FR32RegClass ||
Res.second == &X86::FR64RegClass ||
Res.second == &X86::VR128RegClass ||
Res.second == &X86::VR256RegClass ||
Res.second == &X86::FR32XRegClass ||
Res.second == &X86::FR64XRegClass ||
Res.second == &X86::VR128XRegClass ||
Res.second == &X86::VR256XRegClass ||
Res.second == &X86::VR512RegClass) {
// Handle references to XMM physical registers that got mapped into the
// wrong class. This can happen with constraints like {xmm0} where the
// target independent register mapper will just pick the first match it can
// find, ignoring the required type.
if (VT == MVT::f32 || VT == MVT::i32)
Res.second = &X86::FR32RegClass;
else if (VT == MVT::f64 || VT == MVT::i64)
Res.second = &X86::FR64RegClass;
else if (X86::VR128RegClass.hasType(VT))
Res.second = &X86::VR128RegClass;
else if (X86::VR256RegClass.hasType(VT))
Res.second = &X86::VR256RegClass;
else if (X86::VR512RegClass.hasType(VT))
Res.second = &X86::VR512RegClass;
}
return Res;
}
Index: head/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
===================================================================
--- head/contrib/llvm/lib/Target/X86/X86InstrCompiler.td (revision 265924)
+++ head/contrib/llvm/lib/Target/X86/X86InstrCompiler.td (revision 265925)
@@ -1,1841 +1,1842 @@
//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the various pseudo instructions used by the compiler,
// as well as Pat patterns used during instruction selection.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Pattern Matching Support
def GetLo32XForm : SDNodeXForm<imm, [{
// Transformation function: get the low 32 bits.
return getI32Imm((unsigned)N->getZExtValue());
}]>;
def GetLo8XForm : SDNodeXForm<imm, [{
// Transformation function: get the low 8 bits.
return getI8Imm((uint8_t)N->getZExtValue());
}]>;
//===----------------------------------------------------------------------===//
// Random Pseudo Instructions.
// PIC base construction. This expands to code that looks like this:
// call $next_inst
// popl %destreg"
let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
"", []>;
// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
// a stack adjustment and the codegen must know that they may modify the stack
// pointer before prolog-epilog rewriting occurs.
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
// sub / add which can clobber EFLAGS.
let Defs = [ESP, EFLAGS], Uses = [ESP] in {
def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
"#ADJCALLSTACKDOWN",
[(X86callseq_start timm:$amt)]>,
Requires<[In32BitMode]>;
def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
"#ADJCALLSTACKUP",
[(X86callseq_end timm:$amt1, timm:$amt2)]>,
Requires<[In32BitMode]>;
}
// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
// a stack adjustment and the codegen must know that they may modify the stack
// pointer before prolog-epilog rewriting occurs.
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
// sub / add which can clobber EFLAGS.
let Defs = [RSP, EFLAGS], Uses = [RSP] in {
def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
"#ADJCALLSTACKDOWN",
[(X86callseq_start timm:$amt)]>,
Requires<[In64BitMode]>;
def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
"#ADJCALLSTACKUP",
[(X86callseq_end timm:$amt1, timm:$amt2)]>,
Requires<[In64BitMode]>;
}
// x86-64 va_start lowering magic.
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, Defs = [EFLAGS] in {
def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
(outs),
(ins GR8:$al,
i64imm:$regsavefi, i64imm:$offset,
variable_ops),
"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
[(X86vastart_save_xmm_regs GR8:$al,
imm:$regsavefi,
- imm:$offset)]>;
+ imm:$offset),
+ (implicit EFLAGS)]>;
// The VAARG_64 pseudo-instruction takes the address of the va_list,
// and places the address of the next argument into a register.
let Defs = [EFLAGS] in
def VAARG_64 : I<0, Pseudo,
(outs GR64:$dst),
(ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
"#VAARG_64 $dst, $ap, $size, $mode, $align",
[(set GR64:$dst,
(X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
(implicit EFLAGS)]>;
// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
// targets. These calls are needed to probe the stack when allocating more than
// 4k bytes in one go. Touching the stack at 4K increments is necessary to
// ensure that the guard pages used by the OS virtual memory manager are
// allocated in correct sequence.
// The main point of having separate instruction are extra unmodelled effects
// (compared to ordinary calls) like stack pointer change.
let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
"# dynamic stack allocation",
[(X86WinAlloca)]>;
// When using segmented stacks these are lowered into instructions which first
// check if the current stacklet has enough free memory. If it does, memory is
// allocated by bumping the stack pointer. Otherwise memory is allocated from
// the heap.
let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
"# variable sized alloca for segmented stacks",
[(set GR32:$dst,
(X86SegAlloca GR32:$size))]>,
Requires<[In32BitMode]>;
let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
"# variable sized alloca for segmented stacks",
[(set GR64:$dst,
(X86SegAlloca GR64:$size))]>,
Requires<[In64BitMode]>;
}
// The MSVC runtime contains an _ftol2 routine for converting floating-point
// to integer values. It has a strange calling convention: the input is
// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is
// used as a temporary register. No other registers (aside from flags) are
// touched.
// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
// variant is unnecessary.
let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in {
def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
"# win32 fptoui",
[(X86WinFTOL RFP32:$src)]>,
Requires<[In32BitMode]>;
def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
"# win32 fptoui",
[(X86WinFTOL RFP64:$src)]>,
Requires<[In32BitMode]>;
}
//===----------------------------------------------------------------------===//
// EH Pseudo Instructions
//
let SchedRW = [WriteSystem] in {
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
"ret\t#eh_return, addr: $addr",
[(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
}
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
"ret\t#eh_return, addr: $addr",
[(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
}
let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
usesCustomInserter = 1 in {
def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
"#EH_SJLJ_SETJMP32",
[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
Requires<[In32BitMode]>;
def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
"#EH_SJLJ_SETJMP64",
[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
Requires<[In64BitMode]>;
let isTerminator = 1 in {
def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
"#EH_SJLJ_LONGJMP32",
[(X86eh_sjlj_longjmp addr:$buf)]>,
Requires<[In32BitMode]>;
def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),
"#EH_SJLJ_LONGJMP64",
[(X86eh_sjlj_longjmp addr:$buf)]>,
Requires<[In64BitMode]>;
}
}
} // SchedRW
let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
"#EH_SjLj_Setup\t$dst", []>;
}
//===----------------------------------------------------------------------===//
// Pseudo instructions used by segmented stacks.
//
// This is lowered into a RET instruction by MCInstLower. We need
// this so that we don't have to have a MachineBasicBlock which ends
// with a RET and also has successors.
let isPseudo = 1 in {
def MORESTACK_RET: I<0, Pseudo, (outs), (ins),
"", []>;
// This instruction is lowered to a RET followed by a MOV. The two
// instructions are not generated on a higher level since then the
// verifier sees a MachineBasicBlock ending with a non-terminator.
def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
"", []>;
}
//===----------------------------------------------------------------------===//
// Alias Instructions
//===----------------------------------------------------------------------===//
// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
// FIXME: Set encoding to pseudo.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// Other widths can also make use of the 32-bit xor, which may have a smaller
// encoding and avoid partial register updates.
def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
let AddedComplexity = 20;
}
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
// that would make it more difficult to rematerialize.
let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1, neverHasSideEffects = 1 in
def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;
// This 64-bit pseudo-move can be used for both a 64-bit constant that is
// actually the zero-extension of a 32-bit constant, and for labels in the
// x86-64 small code model.
def mov64imm32 : ComplexPattern<i64, 1, "SelectMOV64Imm32", [imm, X86Wrapper]>;
let AddedComplexity = 1 in
def : Pat<(i64 mov64imm32:$src),
(SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;
// Use sbb to materialize carry bit.
let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
// However, Pat<> can't replicate the destination reg into the inputs of the
// result.
def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",
[(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",
[(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
[(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
} // isCodeGenOnly
def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C16r)>;
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C64r)>;
def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C16r)>;
def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C64r)>;
// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
// will be eliminated and that the sbb can be extended up to a wider type. When
// this happens, it is great. However, if we are left with an 8-bit sbb and an
// and, we might as well just match it as a setb.
def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
(SETBr)>;
// (add OP, SETB) -> (adc OP, 0)
def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
(ADC8ri GR8:$op, 0)>;
def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
(ADC32ri8 GR32:$op, 0)>;
def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
(ADC64ri8 GR64:$op, 0)>;
// (sub OP, SETB) -> (sbb OP, 0)
def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
(SBB8ri GR8:$op, 0)>;
def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
(SBB32ri8 GR32:$op, 0)>;
def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
(SBB64ri8 GR64:$op, 0)>;
// (sub OP, SETCC_CARRY) -> (adc OP, 0)
def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
(ADC8ri GR8:$op, 0)>;
def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
(ADC32ri8 GR32:$op, 0)>;
def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
(ADC64ri8 GR64:$op, 0)>;
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
//
let SchedRW = [WriteMicrocoded] in {
let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
Requires<[In32BitMode]>;
def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize,
Requires<[In32BitMode]>;
def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
[(X86rep_movs i32)], IIC_REP_MOVS>, REP,
Requires<[In32BitMode]>;
}
let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
Requires<[In64BitMode]>;
def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize,
Requires<[In64BitMode]>;
def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
[(X86rep_movs i32)], IIC_REP_MOVS>, REP,
Requires<[In64BitMode]>;
def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
[(X86rep_movs i64)], IIC_REP_MOVS>, REP,
Requires<[In64BitMode]>;
}
// FIXME: Should use "(X86rep_stos AL)" as the pattern.
let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
let Uses = [AL,ECX,EDI] in
def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
[(X86rep_stos i8)], IIC_REP_STOS>, REP,
Requires<[In32BitMode]>;
let Uses = [AX,ECX,EDI] in
def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize,
Requires<[In32BitMode]>;
let Uses = [EAX,ECX,EDI] in
def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
[(X86rep_stos i32)], IIC_REP_STOS>, REP,
Requires<[In32BitMode]>;
}
let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
let Uses = [AL,RCX,RDI] in
def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
[(X86rep_stos i8)], IIC_REP_STOS>, REP,
Requires<[In64BitMode]>;
let Uses = [AX,RCX,RDI] in
def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize,
Requires<[In64BitMode]>;
let Uses = [RAX,RCX,RDI] in
def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
[(X86rep_stos i32)], IIC_REP_STOS>, REP,
Requires<[In64BitMode]>;
let Uses = [RAX,RCX,RDI] in
def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
[(X86rep_stos i64)], IIC_REP_STOS>, REP,
Requires<[In64BitMode]>;
}
} // SchedRW
//===----------------------------------------------------------------------===//
// Thread Local Storage Instructions
//
// ELF TLS Support
// All calls clobber the non-callee saved registers. ESP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [ESP] in {
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_addr32",
[(X86tlsaddr tls32addr:$sym)]>,
Requires<[In32BitMode]>;
def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_base_addr32",
[(X86tlsbaseaddr tls32baseaddr:$sym)]>,
Requires<[In32BitMode]>;
}
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [RSP] in {
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_addr64",
[(X86tlsaddr tls64addr:$sym)]>,
Requires<[In64BitMode]>;
def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_base_addr64",
[(X86tlsbaseaddr tls64baseaddr:$sym)]>,
Requires<[In64BitMode]>;
}
// Darwin TLS Support
// For i386, the address of the thunk is passed on the stack, on return the
// address of the variable is in %eax. %ecx is trashed during the function
// call. All other registers are preserved.
let Defs = [EAX, ECX, EFLAGS],
Uses = [ESP],
usesCustomInserter = 1 in
def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLSCall_32",
[(X86TLSCall addr:$sym)]>,
Requires<[In32BitMode]>;
// For x86_64, the address of the thunk is passed in %rdi, on return
// the address of the variable is in %rax. All other registers are preserved.
let Defs = [RAX, EFLAGS],
Uses = [RSP, RDI],
usesCustomInserter = 1 in
def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLSCall_64",
[(X86TLSCall addr:$sym)]>,
Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Conditional Move Pseudo Instructions
// X86 doesn't have 8-bit conditional moves. Use a customInserter to
// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
// however that requires promoting the operands, and can induce additional
// i8 register pressure.
let usesCustomInserter = 1, Uses = [EFLAGS] in {
def CMOV_GR8 : I<0, Pseudo,
(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
"#CMOV_GR8 PSEUDO!",
[(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
imm:$cond, EFLAGS))]>;
let Predicates = [NoCMov] in {
def CMOV_GR32 : I<0, Pseudo,
(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
"#CMOV_GR32* PSEUDO!",
[(set GR32:$dst,
(X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
def CMOV_GR16 : I<0, Pseudo,
(outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
"#CMOV_GR16* PSEUDO!",
[(set GR16:$dst,
(X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
} // Predicates = [NoCMov]
// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
// SSE1.
let Predicates = [FPStackf32] in
def CMOV_RFP32 : I<0, Pseudo,
(outs RFP32:$dst),
(ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
"#CMOV_RFP32 PSEUDO!",
[(set RFP32:$dst,
(X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
EFLAGS))]>;
// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
// SSE2.
let Predicates = [FPStackf64] in
def CMOV_RFP64 : I<0, Pseudo,
(outs RFP64:$dst),
(ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
"#CMOV_RFP64 PSEUDO!",
[(set RFP64:$dst,
(X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
EFLAGS))]>;
def CMOV_RFP80 : I<0, Pseudo,
(outs RFP80:$dst),
(ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
"#CMOV_RFP80 PSEUDO!",
[(set RFP80:$dst,
(X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
EFLAGS))]>;
} // UsesCustomInserter = 1, Uses = [EFLAGS]
//===----------------------------------------------------------------------===//
// Atomic Instruction Pseudo Instructions
//===----------------------------------------------------------------------===//
// Pseudo atomic instructions
multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
let Defs = [EFLAGS, AL] in
def NAME#8 : I<0, Pseudo, (outs GR8:$dst),
(ins i8mem:$ptr, GR8:$val),
!strconcat(mnemonic, "8 PSEUDO!"), []>;
let Defs = [EFLAGS, AX] in
def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
(ins i16mem:$ptr, GR16:$val),
!strconcat(mnemonic, "16 PSEUDO!"), []>;
let Defs = [EFLAGS, EAX] in
def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
(ins i32mem:$ptr, GR32:$val),
!strconcat(mnemonic, "32 PSEUDO!"), []>;
let Defs = [EFLAGS, RAX] in
def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
(ins i64mem:$ptr, GR64:$val),
!strconcat(mnemonic, "64 PSEUDO!"), []>;
}
}
multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS<string name, string frag> {
def : Pat<(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val),
(!cast<Instruction>(name # "8") addr:$ptr, GR8:$val)>;
def : Pat<(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val),
(!cast<Instruction>(name # "16") addr:$ptr, GR16:$val)>;
def : Pat<(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val),
(!cast<Instruction>(name # "32") addr:$ptr, GR32:$val)>;
def : Pat<(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val),
(!cast<Instruction>(name # "64") addr:$ptr, GR64:$val)>;
}
// Atomic exchange, and, or, xor
defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">;
defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">;
defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">;
defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">;
defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">;
defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">;
defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">;
defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX],
mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
!strconcat(mnemonic, "6432 PSEUDO!"), []>;
}
defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">;
defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">;
defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">;
defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">;
defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">;
defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">;
defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">;
defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">;
defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">;
defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
//===----------------------------------------------------------------------===//
// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
//===----------------------------------------------------------------------===//
// FIXME: Use normal instructions and add lock prefix dynamically.
// Memory barriers
// TODO: Get this to fold the constant into the instruction.
let isCodeGenOnly = 1, Defs = [EFLAGS] in
def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
"or{l}\t{$zero, $dst|$dst, $zero}",
[], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK,
Sched<[WriteALULd, WriteRMW]>;
let hasSideEffects = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
"#MEMBARRIER",
[(X86MemBarrier)]>, Sched<[WriteLoad]>;
// RegOpc corresponds to the mr version of the instruction
// ImmOpc corresponds to the mi version of the instruction
// ImmOpc8 corresponds to the mi8 version of the instruction
// ImmMod corresponds to the instruction format of the mi and mi8 versions
multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
Format ImmMod, string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
SchedRW = [WriteALULd, WriteRMW] in {
def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
!strconcat(mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
!strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, OpSize, LOCK;
def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
!strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
!strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
!strconcat(mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
!strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, OpSize, LOCK;
def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
!strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
!strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
!strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, OpSize, LOCK;
def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
!strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
!strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
}
}
defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">;
defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
// Optimized codegen when the non-memory output is not used.
multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
SchedRW = [WriteALULd, WriteRMW] in {
def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
!strconcat(mnemonic, "{b}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
!strconcat(mnemonic, "{w}\t$dst"),
[], IIC_UNARY_MEM>, OpSize, LOCK;
def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
!strconcat(mnemonic, "{l}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
!strconcat(mnemonic, "{q}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
}
}
defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">;
defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">;
// Atomic compare and swap.
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
SDPatternOperator frag, X86MemOperand x86memop,
InstrItinClass itin> {
let isCodeGenOnly = 1 in {
def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
!strconcat(mnemonic, "\t$ptr"),
[(frag addr:$ptr)], itin>, TB, LOCK;
}
}
multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic, SDPatternOperator frag,
InstrItinClass itin8, InstrItinClass itin> {
let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
let Defs = [AL, EFLAGS], Uses = [AL] in
def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
!strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
let Defs = [AX, EFLAGS], Uses = [AX] in
def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
!strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
let Defs = [EAX, EFLAGS], Uses = [EAX] in
def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
!strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
let Defs = [RAX, EFLAGS], Uses = [RAX] in
def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
!strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
}
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
SchedRW = [WriteALULd, WriteRMW] in {
defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
X86cas8, i64mem,
IIC_CMPX_LOCK_8B>;
}
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
X86cas16, i128mem,
IIC_CMPX_LOCK_16B>, REX_W;
}
defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;
// Atomic exchange and add
multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
string frag,
InstrItinClass itin8, InstrItinClass itin> {
let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
SchedRW = [WriteALULd, WriteRMW] in {
def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
(ins GR8:$val, i8mem:$ptr),
!strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
[(set GR8:$dst,
(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
itin8>;
def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$val, i16mem:$ptr),
!strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
[(set
GR16:$dst,
(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
itin>, OpSize;
def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$val, i32mem:$ptr),
!strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
[(set
GR32:$dst,
(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
itin>;
def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$val, i64mem:$ptr),
!strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
[(set
GR64:$dst,
(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
itin>;
}
}
defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
TB, LOCK;
def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
"#ACQUIRE_MOV PSEUDO!",
[(set GR8:$dst, (atomic_load_8 addr:$src))]>;
def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
"#ACQUIRE_MOV PSEUDO!",
[(set GR16:$dst, (atomic_load_16 addr:$src))]>;
def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
"#ACQUIRE_MOV PSEUDO!",
[(set GR32:$dst, (atomic_load_32 addr:$src))]>;
def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
"#ACQUIRE_MOV PSEUDO!",
[(set GR64:$dst, (atomic_load_64 addr:$src))]>;
def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
"#RELEASE_MOV PSEUDO!",
[(atomic_store_8 addr:$dst, GR8 :$src)]>;
def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
"#RELEASE_MOV PSEUDO!",
[(atomic_store_16 addr:$dst, GR16:$src)]>;
def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
"#RELEASE_MOV PSEUDO!",
[(atomic_store_32 addr:$dst, GR32:$src)]>;
def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
"#RELEASE_MOV PSEUDO!",
[(atomic_store_64 addr:$dst, GR64:$src)]>;
//===----------------------------------------------------------------------===//
// Conditional Move Pseudo Instructions.
//===----------------------------------------------------------------------===//
// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
// instruction selection into a branch sequence.
let Uses = [EFLAGS], usesCustomInserter = 1 in {
def CMOV_FR32 : I<0, Pseudo,
(outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
"#CMOV_FR32 PSEUDO!",
[(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
EFLAGS))]>;
def CMOV_FR64 : I<0, Pseudo,
(outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
"#CMOV_FR64 PSEUDO!",
[(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
EFLAGS))]>;
def CMOV_V4F32 : I<0, Pseudo,
(outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
"#CMOV_V4F32 PSEUDO!",
[(set VR128:$dst,
(v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V2F64 : I<0, Pseudo,
(outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
"#CMOV_V2F64 PSEUDO!",
[(set VR128:$dst,
(v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V2I64 : I<0, Pseudo,
(outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
"#CMOV_V2I64 PSEUDO!",
[(set VR128:$dst,
(v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V8F32 : I<0, Pseudo,
(outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
"#CMOV_V8F32 PSEUDO!",
[(set VR256:$dst,
(v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V4F64 : I<0, Pseudo,
(outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
"#CMOV_V4F64 PSEUDO!",
[(set VR256:$dst,
(v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V4I64 : I<0, Pseudo,
(outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
"#CMOV_V4I64 PSEUDO!",
[(set VR256:$dst,
(v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V8I64 : I<0, Pseudo,
(outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
"#CMOV_V8I64 PSEUDO!",
[(set VR512:$dst,
(v8i64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V8F64 : I<0, Pseudo,
(outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
"#CMOV_V8F64 PSEUDO!",
[(set VR512:$dst,
(v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V16F32 : I<0, Pseudo,
(outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
"#CMOV_V16F32 PSEUDO!",
[(set VR512:$dst,
(v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond,
EFLAGS)))]>;
}
//===----------------------------------------------------------------------===//
// DAG Pattern Matching Rules
//===----------------------------------------------------------------------===//
// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
(ADD32ri GR32:$src1, tconstpool:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
(ADD32ri GR32:$src1, tjumptable:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
(ADD32ri GR32:$src1, tglobaladdr:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
(ADD32ri GR32:$src1, texternalsym:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
(ADD32ri GR32:$src1, tblockaddress:$src2)>;
def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
(MOV32mi addr:$dst, tglobaladdr:$src)>;
def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV32mi addr:$dst, texternalsym:$src)>;
def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV32mi addr:$dst, tblockaddress:$src)>;
// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
// code model mode, should use 'movabs'. FIXME: This is really a hack, the
// 'movabs' predicate should handle this sort of thing.
def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
(MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
(MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
(MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
(MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
// In kernel code model, we can get the address of a label
// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
// the MOV64ri32 should accept these.
def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
(MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
// If we have small model and -static mode, it is safe to store global addresses
// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
// for MOV64mi32 should handle this sort of thing.
def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tconstpool:$src)>,
Requires<[NearData, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tjumptable:$src)>,
Requires<[NearData, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tglobaladdr:$src)>,
Requires<[NearData, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV64mi32 addr:$dst, texternalsym:$src)>,
Requires<[NearData, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tblockaddress:$src)>,
Requires<[NearData, IsStatic]>;
// Calls
// tls has some funny stuff here...
// This corresponds to movabs $foo@tpoff, %rax
def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
(MOV64ri32 tglobaltlsaddr :$dst)>;
// This corresponds to add $foo@tpoff, %rax
def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
// Direct PC relative function call for small code model. 32-bit displacement
// sign extended to 64-bit.
def : Pat<(X86call (i64 tglobaladdr:$dst)),
(CALL64pcrel32 tglobaladdr:$dst)>;
def : Pat<(X86call (i64 texternalsym:$dst)),
(CALL64pcrel32 texternalsym:$dst)>;
// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
// can never use callee-saved registers. That is the purpose of the GR64_TC
// register classes.
//
// The only volatile register that is never used by the calling convention is
// %r11. This happens when calling a vararg function with 6 arguments.
//
// Match an X86tcret that uses less than 7 volatile registers.
def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
(X86tcret node:$ptr, node:$off), [{
// X86tcret args: (*chain, ptr, imm, regs..., glue)
unsigned NumRegs = 0;
for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)
return false;
return true;
}]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[In32BitMode]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
// callee-saved register.
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
Requires<[In32BitMode, IsNotPIC]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>,
Requires<[In32BitMode]>;
def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>,
Requires<[In32BitMode]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[In64BitMode]>;
// Don't fold loads into X86tcret requiring more than 6 regs.
// There wouldn't be enough scratch registers for base+index.
def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
(TCRETURNmi64 addr:$dst, imm:$off)>,
Requires<[In64BitMode]>;
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
Requires<[In64BitMode]>;
def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
(TCRETURNdi64 texternalsym:$dst, imm:$off)>,
Requires<[In64BitMode]>;
// Normal calls, with various flavors of addresses.
def : Pat<(X86call (i32 tglobaladdr:$dst)),
(CALLpcrel32 tglobaladdr:$dst)>;
def : Pat<(X86call (i32 texternalsym:$dst)),
(CALLpcrel32 texternalsym:$dst)>;
def : Pat<(X86call (i32 imm:$dst)),
(CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
// Comparisons.
// TEST R,R is smaller than CMP R,0
def : Pat<(X86cmp GR8:$src1, 0),
(TEST8rr GR8:$src1, GR8:$src1)>;
def : Pat<(X86cmp GR16:$src1, 0),
(TEST16rr GR16:$src1, GR16:$src1)>;
def : Pat<(X86cmp GR32:$src1, 0),
(TEST32rr GR32:$src1, GR32:$src1)>;
def : Pat<(X86cmp GR64:$src1, 0),
(TEST64rr GR64:$src1, GR64:$src1)>;
// Conditional moves with folded loads with operands swapped and conditions
// inverted.
multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
Instruction Inst64> {
let Predicates = [HasCMov] in {
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
(Inst16 GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
(Inst32 GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
(Inst64 GR64:$src2, addr:$src1)>;
}
}
defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
// zextload bool -> zextload byte
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(zextloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
// extload bool -> extload byte
// When extloading from 16-bit and smaller memory locations into 64-bit
// registers, use zero-extending loads so that the entire 64-bit register is
// defined, avoiding partial-register updates.
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
// For other extloads, use subregs, since the high contents of the register are
// defined after an extload.
def : Pat<(extloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i8 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i16 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i32 addr:$src),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
// anyext. Define these to do an explicit zero-extend to
// avoid partial-register updates.
def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
(MOVZX32rr8 GR8 :$src), sub_16bit)>;
def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
def : Pat<(i32 (anyext GR16:$src)),
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
def : Pat<(i64 (anyext GR8 :$src)),
(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;
def : Pat<(i64 (anyext GR16:$src)),
(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;
def : Pat<(i64 (anyext GR32:$src)),
(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
// Any instruction that defines a 32-bit result leaves the high half of the
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
// be copying from a truncate. And x86's cmov doesn't do anything if the
// condition is false. But any other 32-bit operation will zero-extend
// up to 64 bits.
def def32 : PatLeaf<(i32 GR32:$src), [{
return N->getOpcode() != ISD::TRUNCATE &&
N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
N->getOpcode() != ISD::CopyFromReg &&
N->getOpcode() != X86ISD::CMOV;
}]>;
// In the case of a 32-bit def that is known to implicitly zero-extend,
// we can use a SUBREG_TO_REG.
def : Pat<(i64 (zext def32:$src)),
(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
//===----------------------------------------------------------------------===//
// Pattern match OR as ADD
//===----------------------------------------------------------------------===//
// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
// 3-addressified into an LEA instruction to avoid copies. However, we also
// want to finally emit these instructions as an or at the end of the code
// generator to make the generated code easier to read. To do this, we select
// into "disjoint bits" pseudo ops.
// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
APInt KnownZero0, KnownOne0;
CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
APInt KnownZero1, KnownOne1;
CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
return (~KnownZero0 & ~KnownZero1) == 0;
}]>;
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
// Try this before the selecting to OR.
let AddedComplexity = 5, SchedRW = [WriteALU] in {
let isConvertibleToThreeAddress = 1,
Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
let isCommutable = 1 in {
def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"", // orw/addw REG, REG
[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
"", // orl/addl REG, REG
[(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
"", // orq/addq REG, REG
[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
} // isCommutable
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
def ADD16ri8_DB : I<0, Pseudo,
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"", // orw/addw REG, imm8
[(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"", // orw/addw REG, imm
[(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
def ADD32ri8_DB : I<0, Pseudo,
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"", // orl/addl REG, imm8
[(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"", // orl/addl REG, imm
[(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
def ADD64ri8_DB : I<0, Pseudo,
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"", // orq/addq REG, imm8
[(set GR64:$dst, (or_is_add GR64:$src1,
i64immSExt8:$src2))]>;
def ADD64ri32_DB : I<0, Pseudo,
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"", // orq/addq REG, imm
[(set GR64:$dst, (or_is_add GR64:$src1,
i64immSExt32:$src2))]>;
}
} // AddedComplexity, SchedRW
//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
// Odd encoding trick: -128 fits into an 8-bit immediate field while
// +128 doesn't, so in this special case use a sub instead of an add.
def : Pat<(add GR16:$src1, 128),
(SUB16ri8 GR16:$src1, -128)>;
def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
(SUB16mi8 addr:$dst, -128)>;
def : Pat<(add GR32:$src1, 128),
(SUB32ri8 GR32:$src1, -128)>;
def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
(SUB32mi8 addr:$dst, -128)>;
def : Pat<(add GR64:$src1, 128),
(SUB64ri8 GR64:$src1, -128)>;
def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
(SUB64mi8 addr:$dst, -128)>;
// The same trick applies for 32-bit immediate fields in 64-bit
// instructions.
def : Pat<(add GR64:$src1, 0x0000000080000000),
(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
(SUB64mi32 addr:$dst, 0xffffffff80000000)>;
// To avoid needing to materialize an immediate in a register, use a 32-bit and
// with implicit zero-extension instead of a 64-bit and if the immediate has at
// least 32 bits of leading zeros. If in addition the last 32 bits can be
// represented with a sign extension of a 8 bit constant, use that.
def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
(SUBREG_TO_REG
(i64 0),
(AND32ri8
(EXTRACT_SUBREG GR64:$src, sub_32bit),
(i32 (GetLo8XForm imm:$imm))),
sub_32bit)>;
def : Pat<(and GR64:$src, i64immZExt32:$imm),
(SUBREG_TO_REG
(i64 0),
(AND32ri
(EXTRACT_SUBREG GR64:$src, sub_32bit),
(i32 (GetLo32XForm imm:$imm))),
sub_32bit)>;
// r & (2^16-1) ==> movz
def : Pat<(and GR32:$src1, 0xffff),
(MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
GR32_ABCD)),
sub_8bit))>,
Requires<[In32BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
(EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
(i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
sub_16bit)>,
Requires<[In32BitMode]>;
// r & (2^32-1) ==> movz
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
(SUBREG_TO_REG (i64 0),
(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
sub_32bit)>;
// r & (2^16-1) ==> movz
def : Pat<(and GR64:$src, 0xffff),
(SUBREG_TO_REG (i64 0),
(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
sub_32bit)>;
// r & (2^8-1) ==> movz
def : Pat<(and GR64:$src, 0xff),
(SUBREG_TO_REG (i64 0),
(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),
sub_32bit)>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
Requires<[In64BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
(EXTRACT_SUBREG (MOVZX32rr8 (i8
(EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
Requires<[In64BitMode]>;
// sext_inreg patterns
def : Pat<(sext_inreg GR32:$src, i16),
(MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
def : Pat<(sext_inreg GR32:$src, i8),
(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
sub_8bit))>,
Requires<[In32BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
(EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
(i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
sub_16bit)>,
Requires<[In32BitMode]>;
def : Pat<(sext_inreg GR64:$src, i32),
(MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
def : Pat<(sext_inreg GR64:$src, i16),
(MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
def : Pat<(sext_inreg GR64:$src, i8),
(MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
def : Pat<(sext_inreg GR32:$src, i8),
(MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
Requires<[In64BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
(EXTRACT_SUBREG (MOVSX32rr8
(EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
Requires<[In64BitMode]>;
// sext, sext_load, zext, zext_load
def: Pat<(i16 (sext GR8:$src)),
(EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
def: Pat<(sextloadi16i8 addr:$src),
(EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
def: Pat<(i16 (zext GR8:$src)),
(EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
def: Pat<(zextloadi16i8 addr:$src),
(EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
// trunc patterns
def : Pat<(i16 (trunc GR32:$src)),
(EXTRACT_SUBREG GR32:$src, sub_16bit)>;
def : Pat<(i8 (trunc GR32:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
sub_8bit)>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
sub_8bit)>,
Requires<[In32BitMode]>;
def : Pat<(i32 (trunc GR64:$src)),
(EXTRACT_SUBREG GR64:$src, sub_32bit)>;
def : Pat<(i16 (trunc GR64:$src)),
(EXTRACT_SUBREG GR64:$src, sub_16bit)>;
def : Pat<(i8 (trunc GR64:$src)),
(EXTRACT_SUBREG GR64:$src, sub_8bit)>;
def : Pat<(i8 (trunc GR32:$src)),
(EXTRACT_SUBREG GR32:$src, sub_8bit)>,
Requires<[In64BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
(EXTRACT_SUBREG GR16:$src, sub_8bit)>,
Requires<[In64BitMode]>;
// h-register tricks
def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
sub_8bit_hi)>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
sub_8bit_hi)>,
Requires<[In32BitMode]>;
def : Pat<(srl GR16:$src, (i8 8)),
(EXTRACT_SUBREG
(MOVZX32rr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
sub_8bit_hi)),
sub_16bit)>,
Requires<[In32BitMode]>;
def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
sub_8bit_hi))>,
Requires<[In32BitMode]>;
// h-register tricks.
// For now, be conservative on x86-64 and use an h-register extract only if the
// value is immediately zero-extended or stored, which are somewhat common
// cases. This uses a bunch of code to prevent a register requiring a REX prefix
// from being allocated in the same instruction as the h register, as there's
// currently no way to describe this requirement to the register allocator.
// h-register extract and zero-extend.
def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
(SUBREG_TO_REG
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
sub_8bit_hi)),
sub_32bit)>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(srl GR16:$src, (i8 8)),
(EXTRACT_SUBREG
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
sub_8bit_hi)),
sub_16bit)>,
Requires<[In64BitMode]>;
def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
(SUBREG_TO_REG
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
sub_8bit_hi)),
sub_32bit)>;
def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
(SUBREG_TO_REG
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
sub_8bit_hi)),
sub_32bit)>;
// h-register extract and store.
def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
sub_8bit_hi))>;
def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
sub_8bit_hi))>,
Requires<[In64BitMode]>;
// (shl x, 1) ==> (add x, x)
// Note that if x is undef (immediate or otherwise), we could theoretically
// end up with the two uses of x getting different values, producing a result
// where the least significant bit is not 0. However, the probability of this
// happening is considered low enough that this is officially not a
// "real problem".
def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
// Helper imms that check if a mask doesn't change significant shift bits.
def immShift32 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 5; }]>;
def immShift64 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 6; }]>;
// (shl x (and y, 31)) ==> (shl x, y)
def : Pat<(shl GR8:$src1, (and CL, immShift32)),
(SHL8rCL GR8:$src1)>;
def : Pat<(shl GR16:$src1, (and CL, immShift32)),
(SHL16rCL GR16:$src1)>;
def : Pat<(shl GR32:$src1, (and CL, immShift32)),
(SHL32rCL GR32:$src1)>;
def : Pat<(store (shl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHL8mCL addr:$dst)>;
def : Pat<(store (shl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHL16mCL addr:$dst)>;
def : Pat<(store (shl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHL32mCL addr:$dst)>;
def : Pat<(srl GR8:$src1, (and CL, immShift32)),
(SHR8rCL GR8:$src1)>;
def : Pat<(srl GR16:$src1, (and CL, immShift32)),
(SHR16rCL GR16:$src1)>;
def : Pat<(srl GR32:$src1, (and CL, immShift32)),
(SHR32rCL GR32:$src1)>;
def : Pat<(store (srl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHR8mCL addr:$dst)>;
def : Pat<(store (srl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHR16mCL addr:$dst)>;
def : Pat<(store (srl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHR32mCL addr:$dst)>;
def : Pat<(sra GR8:$src1, (and CL, immShift32)),
(SAR8rCL GR8:$src1)>;
def : Pat<(sra GR16:$src1, (and CL, immShift32)),
(SAR16rCL GR16:$src1)>;
def : Pat<(sra GR32:$src1, (and CL, immShift32)),
(SAR32rCL GR32:$src1)>;
def : Pat<(store (sra (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
(SAR8mCL addr:$dst)>;
def : Pat<(store (sra (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
(SAR16mCL addr:$dst)>;
def : Pat<(store (sra (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
(SAR32mCL addr:$dst)>;
// (shl x (and y, 63)) ==> (shl x, y)
def : Pat<(shl GR64:$src1, (and CL, immShift64)),
(SHL64rCL GR64:$src1)>;
def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SHL64mCL addr:$dst)>;
def : Pat<(srl GR64:$src1, (and CL, immShift64)),
(SHR64rCL GR64:$src1)>;
def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SHR64mCL addr:$dst)>;
def : Pat<(sra GR64:$src1, (and CL, immShift64)),
(SAR64rCL GR64:$src1)>;
def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SAR64mCL addr:$dst)>;
// (anyext (setcc_carry)) -> (setcc_carry)
def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C16r)>;
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
//===----------------------------------------------------------------------===//
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
// add reg, reg
def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
// add reg, mem
def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
(ADD8rm GR8:$src1, addr:$src2)>;
def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
(ADD16rm GR16:$src1, addr:$src2)>;
def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
(ADD32rm GR32:$src1, addr:$src2)>;
// add reg, imm
def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
def : Pat<(add GR16:$src1, i16immSExt8:$src2),
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(add GR32:$src1, i32immSExt8:$src2),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
// sub reg, reg
def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
// sub reg, mem
def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
(SUB8rm GR8:$src1, addr:$src2)>;
def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
(SUB16rm GR16:$src1, addr:$src2)>;
def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
(SUB32rm GR32:$src1, addr:$src2)>;
// sub reg, imm
def : Pat<(sub GR8:$src1, imm:$src2),
(SUB8ri GR8:$src1, imm:$src2)>;
def : Pat<(sub GR16:$src1, imm:$src2),
(SUB16ri GR16:$src1, imm:$src2)>;
def : Pat<(sub GR32:$src1, imm:$src2),
(SUB32ri GR32:$src1, imm:$src2)>;
def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
(SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
// sub 0, reg
def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>;
def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
// mul reg, reg
def : Pat<(mul GR16:$src1, GR16:$src2),
(IMUL16rr GR16:$src1, GR16:$src2)>;
def : Pat<(mul GR32:$src1, GR32:$src2),
(IMUL32rr GR32:$src1, GR32:$src2)>;
// mul reg, mem
def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
(IMUL16rm GR16:$src1, addr:$src2)>;
def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
(IMUL32rm GR32:$src1, addr:$src2)>;
// mul reg, imm
def : Pat<(mul GR16:$src1, imm:$src2),
(IMUL16rri GR16:$src1, imm:$src2)>;
def : Pat<(mul GR32:$src1, imm:$src2),
(IMUL32rri GR32:$src1, imm:$src2)>;
def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
(IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
(IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
// reg = mul mem, imm
def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
(IMUL16rmi addr:$src1, imm:$src2)>;
def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
(IMUL32rmi addr:$src1, imm:$src2)>;
def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
(IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
// Patterns for nodes that do not produce flags, for instructions that do.
// addition
def : Pat<(add GR64:$src1, GR64:$src2),
(ADD64rr GR64:$src1, GR64:$src2)>;
def : Pat<(add GR64:$src1, i64immSExt8:$src2),
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(add GR64:$src1, i64immSExt32:$src2),
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
(ADD64rm GR64:$src1, addr:$src2)>;
// subtraction
def : Pat<(sub GR64:$src1, GR64:$src2),
(SUB64rr GR64:$src1, GR64:$src2)>;
def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
(SUB64rm GR64:$src1, addr:$src2)>;
def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
(SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
(SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
// Multiply
def : Pat<(mul GR64:$src1, GR64:$src2),
(IMUL64rr GR64:$src1, GR64:$src2)>;
def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
(IMUL64rm GR64:$src1, addr:$src2)>;
def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
(IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
(IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
(IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
// Increment reg.
def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
// Decrement reg.
def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
// or reg/reg.
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
// or reg/mem
def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
(OR8rm GR8:$src1, addr:$src2)>;
def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
(OR16rm GR16:$src1, addr:$src2)>;
def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
(OR32rm GR32:$src1, addr:$src2)>;
def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
(OR64rm GR64:$src1, addr:$src2)>;
// or reg/imm
def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
def : Pat<(or GR16:$src1, i16immSExt8:$src2),
(OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(or GR32:$src1, i32immSExt8:$src2),
(OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
def : Pat<(or GR64:$src1, i64immSExt8:$src2),
(OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(or GR64:$src1, i64immSExt32:$src2),
(OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
// xor reg/reg
def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
// xor reg/mem
def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
(XOR8rm GR8:$src1, addr:$src2)>;
def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
(XOR16rm GR16:$src1, addr:$src2)>;
def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
(XOR32rm GR32:$src1, addr:$src2)>;
def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
(XOR64rm GR64:$src1, addr:$src2)>;
// xor reg/imm
def : Pat<(xor GR8:$src1, imm:$src2),
(XOR8ri GR8:$src1, imm:$src2)>;
def : Pat<(xor GR16:$src1, imm:$src2),
(XOR16ri GR16:$src1, imm:$src2)>;
def : Pat<(xor GR32:$src1, imm:$src2),
(XOR32ri GR32:$src1, imm:$src2)>;
def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
(XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
(XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
(XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
(XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
// and reg/reg
def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
// and reg/mem
def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
(AND8rm GR8:$src1, addr:$src2)>;
def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
(AND16rm GR16:$src1, addr:$src2)>;
def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
(AND32rm GR32:$src1, addr:$src2)>;
def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
(AND64rm GR64:$src1, addr:$src2)>;
// and reg/imm
def : Pat<(and GR8:$src1, imm:$src2),
(AND8ri GR8:$src1, imm:$src2)>;
def : Pat<(and GR16:$src1, imm:$src2),
(AND16ri GR16:$src1, imm:$src2)>;
def : Pat<(and GR32:$src1, imm:$src2),
(AND32ri GR32:$src1, imm:$src2)>;
def : Pat<(and GR16:$src1, i16immSExt8:$src2),
(AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(and GR32:$src1, i32immSExt8:$src2),
(AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
def : Pat<(and GR64:$src1, i64immSExt8:$src2),
(AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(and GR64:$src1, i64immSExt32:$src2),
(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
// Bit scan instruction patterns to match explicit zero-undef behavior.
def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>;
def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>;
def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>;
def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
Index: head/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (revision 265924)
+++ head/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (revision 265925)
@@ -1,1046 +1,1048 @@
//===- InstCombineVectorOps.cpp -------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements instcombine for ExtractElement, InsertElement and
// ShuffleVector.
//
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
/// is to leave as a vector operation. isConstant indicates whether we're
/// extracting one known element. If false we're extracting a variable index.
static bool CheapToScalarize(Value *V, bool isConstant) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (isConstant) return true;
// If all elts are the same, we can extract it and use any of the values.
- Constant *Op0 = C->getAggregateElement(0U);
- for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
- if (C->getAggregateElement(i) != Op0)
- return false;
- return true;
+ if (Constant *Op0 = C->getAggregateElement(0U)) {
+ for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
+ ++i)
+ if (C->getAggregateElement(i) != Op0)
+ return false;
+ return true;
+ }
}
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
// Insert element gets simplified to the inserted element or is deleted if
// this is constant idx extract element and its a constant idx insertelt.
if (I->getOpcode() == Instruction::InsertElement && isConstant &&
isa<ConstantInt>(I->getOperand(2)))
return true;
if (I->getOpcode() == Instruction::Load && I->hasOneUse())
return true;
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
if (BO->hasOneUse() &&
(CheapToScalarize(BO->getOperand(0), isConstant) ||
CheapToScalarize(BO->getOperand(1), isConstant)))
return true;
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (CI->hasOneUse() &&
(CheapToScalarize(CI->getOperand(0), isConstant) ||
CheapToScalarize(CI->getOperand(1), isConstant)))
return true;
return false;
}
/// FindScalarElement - Given a vector and an element number, see if the scalar
/// value is already around as a register, for example if it were inserted then
/// extracted from the vector.
static Value *FindScalarElement(Value *V, unsigned EltNo) {
assert(V->getType()->isVectorTy() && "Not looking at a vector?");
VectorType *VTy = cast<VectorType>(V->getType());
unsigned Width = VTy->getNumElements();
if (EltNo >= Width) // Out of range access.
return UndefValue::get(VTy->getElementType());
if (Constant *C = dyn_cast<Constant>(V))
return C->getAggregateElement(EltNo);
if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
// If this is an insert to a variable element, we don't know what it is.
if (!isa<ConstantInt>(III->getOperand(2)))
return 0;
unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
// If this is an insert to the element we are looking for, return the
// inserted value.
if (EltNo == IIElt)
return III->getOperand(1);
// Otherwise, the insertelement doesn't modify the value, recurse on its
// vector input.
return FindScalarElement(III->getOperand(0), EltNo);
}
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
int InEl = SVI->getMaskValue(EltNo);
if (InEl < 0)
return UndefValue::get(VTy->getElementType());
if (InEl < (int)LHSWidth)
return FindScalarElement(SVI->getOperand(0), InEl);
return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
}
// Extract a value from a vector add operation with a constant zero.
Value *Val = 0; Constant *Con = 0;
if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
if (Con->getAggregateElement(EltNo)->isNullValue())
return FindScalarElement(Val, EltNo);
}
// Otherwise, we don't know.
return 0;
}
// If we have a PHI node with a vector type that has only 2 uses: feed
// itself and be an operand of extractelement at a constant location,
// try to replace the PHI of the vector type with a PHI of a scalar type.
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
if (!PN->hasNUses(2))
return NULL;
// If so, it's known at this point that one operand is PHI and the other is
// an extractelement node. Find the PHI user that is not the extractelement
// node.
Value::use_iterator iu = PN->use_begin();
Instruction *PHIUser = dyn_cast<Instruction>(*iu);
if (PHIUser == cast<Instruction>(&EI))
PHIUser = cast<Instruction>(*(++iu));
// Verify that this PHI user has one use, which is the PHI itself,
// and that it is a binary operation which is cheap to scalarize.
// otherwise return NULL.
if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
!(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
return NULL;
// Create a scalar PHI node that will replace the vector PHI node
// just before the current PHI node.
PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
// Scalarize each PHI operand.
for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
Value *PHIInVal = PN->getIncomingValue(i);
BasicBlock *inBB = PN->getIncomingBlock(i);
Value *Elt = EI.getIndexOperand();
// If the operand is the PHI induction variable:
if (PHIInVal == PHIUser) {
// Scalarize the binary operation. Its first operand is the
// scalar PHI and the second operand is extracted from the other
// vector operand.
BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
Value *Op = InsertNewInstWith(
ExtractElementInst::Create(B0->getOperand(opId), Elt,
B0->getOperand(opId)->getName() + ".Elt"),
*B0);
Value *newPHIUser = InsertNewInstWith(
BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0);
scalarPHI->addIncoming(newPHIUser, inBB);
} else {
// Scalarize PHI input:
Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
// Insert the new instruction into the predecessor basic block.
Instruction *pos = dyn_cast<Instruction>(PHIInVal);
BasicBlock::iterator InsertPos;
if (pos && !isa<PHINode>(pos)) {
InsertPos = pos;
++InsertPos;
} else {
InsertPos = inBB->getFirstInsertionPt();
}
InsertNewInstWith(newEI, *InsertPos);
scalarPHI->addIncoming(newEI, inBB);
}
}
return ReplaceInstUsesWith(EI, scalarPHI);
}
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If vector val is constant with all elements the same, replace EI with
// that element. We handle a known element # below.
if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
if (CheapToScalarize(C, false))
return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
unsigned IndexVal = IdxC->getZExtValue();
unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
// If this is extracting an invalid index, turn this into undef, to avoid
// crashing the code below.
if (IndexVal >= VectorWidth)
return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
// This instruction only demands the single element from the input vector.
// If the input vector has a single use, simplify it based on this use
// property.
if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
APInt UndefElts(VectorWidth, 0);
APInt DemandedMask(VectorWidth, 0);
DemandedMask.setBit(IndexVal);
if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
DemandedMask, UndefElts)) {
EI.setOperand(0, V);
return &EI;
}
}
if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
return ReplaceInstUsesWith(EI, Elt);
// If the this extractelement is directly using a bitcast from a vector of
// the same number of elements, see if we can find the source element from
// it. In this case, we will end up needing to bitcast the scalars.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
if (VT->getNumElements() == VectorWidth)
if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
return new BitCastInst(Elt, EI.getType());
}
// If there's a vector PHI feeding a scalar use through this extractelement
// instruction, try to scalarize the PHI.
if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
Instruction *scalarPHI = scalarizePHI(EI, PN);
if (scalarPHI)
return scalarPHI;
}
}
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
// Push extractelement into predecessor operation if legal and
// profitable to do so
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
if (I->hasOneUse() &&
CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
Value *newEI0 =
Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
EI.getName()+".lhs");
Value *newEI1 =
Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
EI.getName()+".rhs");
return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
}
} else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
// Extracting the inserted element?
if (IE->getOperand(2) == EI.getOperand(1))
return ReplaceInstUsesWith(EI, IE->getOperand(1));
// If the inserted and extracted elements are constants, they must not
// be the same value, extract from the pre-inserted value instead.
if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
Worklist.AddValue(EI.getOperand(0));
EI.setOperand(0, IE->getOperand(0));
return &EI;
}
} else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
// If this is extracting an element from a shufflevector, figure out where
// it came from and extract from the appropriate input element instead.
if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
Value *Src;
unsigned LHSWidth =
SVI->getOperand(0)->getType()->getVectorNumElements();
if (SrcIdx < 0)
return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
if (SrcIdx < (int)LHSWidth)
Src = SVI->getOperand(0);
else {
SrcIdx -= LHSWidth;
Src = SVI->getOperand(1);
}
Type *Int32Ty = Type::getInt32Ty(EI.getContext());
return ExtractElementInst::Create(Src,
ConstantInt::get(Int32Ty,
SrcIdx, false));
}
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
// Canonicalize extractelement(cast) -> cast(extractelement)
// bitcasts can change the number of vector elements and they cost nothing
if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
EI.getIndexOperand());
Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
} else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
if (SI->hasOneUse()) {
// TODO: For a select on vectors, it might be useful to do this if it
// has multiple extractelement uses. For vector select, that seems to
// fight the vectorizer.
// If we are extracting an element from a vector select or a select on
// vectors, a select on the scalars extracted from the vector arguments.
Value *TrueVal = SI->getTrueValue();
Value *FalseVal = SI->getFalseValue();
Value *Cond = SI->getCondition();
if (Cond->getType()->isVectorTy()) {
Cond = Builder->CreateExtractElement(Cond,
EI.getIndexOperand(),
Cond->getName() + ".elt");
}
Value *V1Elem
= Builder->CreateExtractElement(TrueVal,
EI.getIndexOperand(),
TrueVal->getName() + ".elt");
Value *V2Elem
= Builder->CreateExtractElement(FalseVal,
EI.getIndexOperand(),
FalseVal->getName() + ".elt");
return SelectInst::Create(Cond,
V1Elem,
V2Elem,
SI->getName() + ".elt");
}
}
}
return 0;
}
/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
/// elements from either LHS or RHS, return the shuffle mask and true.
/// Otherwise, return false.
static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
SmallVectorImpl<Constant*> &Mask) {
assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
unsigned NumElts = V->getType()->getVectorNumElements();
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
return true;
}
if (V == LHS) {
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
return true;
}
if (V == RHS) {
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
i+NumElts));
return true;
}
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
// If this is an insert of an extract from some other vector, include it.
Value *VecOp = IEI->getOperand(0);
Value *ScalarOp = IEI->getOperand(1);
Value *IdxOp = IEI->getOperand(2);
if (!isa<ConstantInt>(IdxOp))
return false;
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
// Okay, we can handle this if the vector we are insertinting into is
// transitively ok.
if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted undef.
Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
return true;
}
} else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
if (isa<ConstantInt>(EI->getOperand(1)) &&
EI->getOperand(0)->getType() == V->getType()) {
unsigned ExtractedIdx =
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
// This must be extracting from either LHS or RHS.
if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
// Okay, we can handle this if the vector we are insertinting into is
// transitively ok.
if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted value.
if (EI->getOperand(0) == LHS) {
Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::getInt32Ty(V->getContext()),
ExtractedIdx);
} else {
assert(EI->getOperand(0) == RHS);
Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::getInt32Ty(V->getContext()),
ExtractedIdx+NumElts);
}
return true;
}
}
}
}
}
// TODO: Handle shufflevector here!
return false;
}
/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
/// that computes V and the LHS value of the shuffle.
static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
Value *&RHS) {
assert(V->getType()->isVectorTy() &&
(RHS == 0 || V->getType() == RHS->getType()) &&
"Invalid shuffle!");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
return V;
}
if (isa<ConstantAggregateZero>(V)) {
Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
return V;
}
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
// If this is an insert of an extract from some other vector, include it.
Value *VecOp = IEI->getOperand(0);
Value *ScalarOp = IEI->getOperand(1);
Value *IdxOp = IEI->getOperand(2);
if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
EI->getOperand(0)->getType() == V->getType()) {
unsigned ExtractedIdx =
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
// Either the extracted from or inserted into vector must be RHSVec,
// otherwise we'd end up with a shuffle of three inputs.
if (EI->getOperand(0) == RHS || RHS == 0) {
RHS = EI->getOperand(0);
Value *V = CollectShuffleElements(VecOp, Mask, RHS);
Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::getInt32Ty(V->getContext()),
NumElts+ExtractedIdx);
return V;
}
if (VecOp == RHS) {
Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
// Update Mask to reflect that `ScalarOp' has been inserted at
// position `InsertedIdx' within the vector returned by IEI.
Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
// Everything but the extracted element is replaced with the RHS.
for (unsigned i = 0; i != NumElts; ++i) {
if (i != InsertedIdx)
Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
NumElts+i);
}
return V;
}
// If this insertelement is a chain that comes from exactly these two
// vectors, return the vector and the effective shuffle.
if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
return EI->getOperand(0);
}
}
}
// TODO: Handle shufflevector here!
// Otherwise, can't do anything fancy. Return an identity vector.
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
return V;
}
Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
Value *IdxOp = IE.getOperand(2);
// Inserting an undef or into an undefined place, remove this.
if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
ReplaceInstUsesWith(IE, VecOp);
// If the inserted element was extracted from some other vector, and if the
// indexes are constant, try to turn this into a shufflevector operation.
if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
EI->getOperand(0)->getType() == IE.getType()) {
unsigned NumVectorElts = IE.getType()->getNumElements();
unsigned ExtractedIdx =
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
if (ExtractedIdx >= NumVectorElts) // Out of range extract.
return ReplaceInstUsesWith(IE, VecOp);
if (InsertedIdx >= NumVectorElts) // Out of range insert.
return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
// If we are extracting a value from a vector, then inserting it right
// back into the same place, just use the input vector.
if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
return ReplaceInstUsesWith(IE, VecOp);
// If this insertelement isn't used by some other insertelement, turn it
// (and any insertelements it points to), into one big shuffle.
if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
SmallVector<Constant*, 16> Mask;
Value *RHS = 0;
Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
if (RHS == 0) RHS = UndefValue::get(LHS->getType());
// We now have a shuffle of LHS, RHS, Mask.
return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
}
}
}
unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
if (V != &IE)
return ReplaceInstUsesWith(IE, V);
return &IE;
}
return 0;
}
/// Return true if we can evaluate the specified expression tree if the vector
/// elements were shuffled in a different order.
static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
unsigned Depth = 5) {
// We can always reorder the elements of a constant.
if (isa<Constant>(V))
return true;
// We won't reorder vector arguments. No IPO here.
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
// Two users may expect different orders of the elements. Don't try it.
if (!I->hasOneUse())
return false;
if (Depth == 0) return false;
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::FDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::ICmp:
case Instruction::FCmp:
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::UIToFP:
case Instruction::SIToFP:
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::GetElementPtr: {
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
return false;
}
return true;
}
case Instruction::InsertElement: {
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
if (!CI) return false;
int ElementNumber = CI->getLimitedValue();
// Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
// can't put an element into multiple indices.
bool SeenOnce = false;
for (int i = 0, e = Mask.size(); i != e; ++i) {
if (Mask[i] == ElementNumber) {
if (SeenOnce)
return false;
SeenOnce = true;
}
}
return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
}
}
return false;
}
/// Rebuild a new instruction just like 'I' but with the new operands given.
/// In the event of type mismatch, the type of the operands is correct.
static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
// We don't want to use the IRBuilder here because we want the replacement
// instructions to appear next to 'I', not the builder's insertion point.
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::FDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
BinaryOperator *BO = cast<BinaryOperator>(I);
assert(NewOps.size() == 2 && "binary operator with #ops != 2");
BinaryOperator *New =
BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
NewOps[0], NewOps[1], "", BO);
if (isa<OverflowingBinaryOperator>(BO)) {
New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
New->setHasNoSignedWrap(BO->hasNoSignedWrap());
}
if (isa<PossiblyExactOperator>(BO)) {
New->setIsExact(BO->isExact());
}
return New;
}
case Instruction::ICmp:
assert(NewOps.size() == 2 && "icmp with #ops != 2");
return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
NewOps[0], NewOps[1]);
case Instruction::FCmp:
assert(NewOps.size() == 2 && "fcmp with #ops != 2");
return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
NewOps[0], NewOps[1]);
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::UIToFP:
case Instruction::SIToFP:
case Instruction::FPTrunc:
case Instruction::FPExt: {
// It's possible that the mask has a different number of elements from
// the original cast. We recompute the destination type to match the mask.
Type *DestTy =
VectorType::get(I->getType()->getScalarType(),
NewOps[0]->getType()->getVectorNumElements());
assert(NewOps.size() == 1 && "cast with #ops != 1");
return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
"", I);
}
case Instruction::GetElementPtr: {
Value *Ptr = NewOps[0];
ArrayRef<Value*> Idx = NewOps.slice(1);
GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
return GEP;
}
}
llvm_unreachable("failed to rebuild vector instructions");
}
Value *
InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
// Mask.size() does not need to be equal to the number of vector elements.
assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
if (isa<UndefValue>(V)) {
return UndefValue::get(VectorType::get(V->getType()->getScalarType(),
Mask.size()));
}
if (isa<ConstantAggregateZero>(V)) {
return ConstantAggregateZero::get(
VectorType::get(V->getType()->getScalarType(),
Mask.size()));
}
if (Constant *C = dyn_cast<Constant>(V)) {
SmallVector<Constant *, 16> MaskValues;
for (int i = 0, e = Mask.size(); i != e; ++i) {
if (Mask[i] == -1)
MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
else
MaskValues.push_back(Builder->getInt32(Mask[i]));
}
return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
ConstantVector::get(MaskValues));
}
Instruction *I = cast<Instruction>(V);
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::FDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::ICmp:
case Instruction::FCmp:
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::UIToFP:
case Instruction::SIToFP:
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::Select:
case Instruction::GetElementPtr: {
SmallVector<Value*, 8> NewOps;
bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
NewOps.push_back(V);
NeedsRebuild |= (V != I->getOperand(i));
}
if (NeedsRebuild) {
return BuildNew(I, NewOps);
}
return I;
}
case Instruction::InsertElement: {
int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
// The insertelement was inserting at Element. Figure out which element
// that becomes after shuffling. The answer is guaranteed to be unique
// by CanEvaluateShuffled.
bool Found = false;
int Index = 0;
for (int e = Mask.size(); Index != e; ++Index) {
if (Mask[Index] == Element) {
Found = true;
break;
}
}
if (!Found)
return UndefValue::get(
VectorType::get(V->getType()->getScalarType(), Mask.size()));
Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
return InsertElementInst::Create(V, I->getOperand(1),
Builder->getInt32(Index), "", I);
}
}
llvm_unreachable("failed to reorder elements of vector instruction!");
}
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
SmallVector<int, 16> Mask = SVI.getShuffleMask();
bool MadeChange = false;
// Undefined shuffle mask -> undefined value.
if (isa<UndefValue>(SVI.getOperand(2)))
return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
if (V != &SVI)
return ReplaceInstUsesWith(SVI, V);
LHS = SVI.getOperand(0);
RHS = SVI.getOperand(1);
MadeChange = true;
}
unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
// Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
// Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
if (LHS == RHS || isa<UndefValue>(LHS)) {
if (isa<UndefValue>(LHS) && LHS == RHS) {
// shuffle(undef,undef,mask) -> undef.
Value *Result = (VWidth == LHSWidth)
? LHS : UndefValue::get(SVI.getType());
return ReplaceInstUsesWith(SVI, Result);
}
// Remap any references to RHS to use LHS.
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
if (Mask[i] < 0) {
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
continue;
}
if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
(Mask[i] < (int)e && isa<UndefValue>(LHS))) {
Mask[i] = -1; // Turn into undef.
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
} else {
Mask[i] = Mask[i] % e; // Force to LHS.
Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
Mask[i]));
}
}
SVI.setOperand(0, SVI.getOperand(1));
SVI.setOperand(1, UndefValue::get(RHS->getType()));
SVI.setOperand(2, ConstantVector::get(Elts));
LHS = SVI.getOperand(0);
RHS = SVI.getOperand(1);
MadeChange = true;
}
if (VWidth == LHSWidth) {
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
bool isLHSID = true, isRHSID = true;
for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
if (Mask[i] < 0) continue; // Ignore undef values.
// Is this an identity shuffle of the LHS value?
isLHSID &= (Mask[i] == (int)i);
// Is this an identity shuffle of the RHS value?
isRHSID &= (Mask[i]-e == i);
}
// Eliminate identity shuffles.
if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
}
if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
return ReplaceInstUsesWith(SVI, V);
}
// If the LHS is a shufflevector itself, see if we can combine it with this
// one without producing an unusual shuffle.
// Cases that might be simplified:
// 1.
// x1=shuffle(v1,v2,mask1)
// x=shuffle(x1,undef,mask)
// ==>
// x=shuffle(v1,undef,newMask)
// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
// 2.
// x1=shuffle(v1,undef,mask1)
// x=shuffle(x1,x2,mask)
// where v1.size() == mask1.size()
// ==>
// x=shuffle(v1,x2,newMask)
// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
// 3.
// x2=shuffle(v2,undef,mask2)
// x=shuffle(x1,x2,mask)
// where v2.size() == mask2.size()
// ==>
// x=shuffle(x1,v2,newMask)
// newMask[i] = (mask[i] < x1.size())
// ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
// 4.
// x1=shuffle(v1,undef,mask1)
// x2=shuffle(v2,undef,mask2)
// x=shuffle(x1,x2,mask)
// where v1.size() == v2.size()
// ==>
// x=shuffle(v1,v2,newMask)
// newMask[i] = (mask[i] < x1.size())
// ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
//
// Here we are really conservative:
// we are absolutely afraid of producing a shuffle mask not in the input
// program, because the code gen may not be smart enough to turn a merged
// shuffle into two specific shuffles: it may produce worse code. As such,
// we only merge two shuffles if the result is either a splat or one of the
// input shuffle masks. In this case, merging the shuffles just removes
// one instruction, which we know is safe. This is good for things like
// turning: (splat(splat)) -> splat, or
// merge(V[0..n], V[n+1..2n]) -> V[0..2n]
ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
if (LHSShuffle)
if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
LHSShuffle = NULL;
if (RHSShuffle)
if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
RHSShuffle = NULL;
if (!LHSShuffle && !RHSShuffle)
return MadeChange ? &SVI : 0;
Value* LHSOp0 = NULL;
Value* LHSOp1 = NULL;
Value* RHSOp0 = NULL;
unsigned LHSOp0Width = 0;
unsigned RHSOp0Width = 0;
if (LHSShuffle) {
LHSOp0 = LHSShuffle->getOperand(0);
LHSOp1 = LHSShuffle->getOperand(1);
LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
}
if (RHSShuffle) {
RHSOp0 = RHSShuffle->getOperand(0);
RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
}
Value* newLHS = LHS;
Value* newRHS = RHS;
if (LHSShuffle) {
// case 1
if (isa<UndefValue>(RHS)) {
newLHS = LHSOp0;
newRHS = LHSOp1;
}
// case 2 or 4
else if (LHSOp0Width == LHSWidth) {
newLHS = LHSOp0;
}
}
// case 3 or 4
if (RHSShuffle && RHSOp0Width == LHSWidth) {
newRHS = RHSOp0;
}
// case 4
if (LHSOp0 == RHSOp0) {
newLHS = LHSOp0;
newRHS = NULL;
}
if (newLHS == LHS && newRHS == RHS)
return MadeChange ? &SVI : 0;
SmallVector<int, 16> LHSMask;
SmallVector<int, 16> RHSMask;
if (newLHS != LHS)
LHSMask = LHSShuffle->getShuffleMask();
if (RHSShuffle && newRHS != RHS)
RHSMask = RHSShuffle->getShuffleMask();
unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
SmallVector<int, 16> newMask;
bool isSplat = true;
int SplatElt = -1;
// Create a new mask for the new ShuffleVectorInst so that the new
// ShuffleVectorInst is equivalent to the original one.
for (unsigned i = 0; i < VWidth; ++i) {
int eltMask;
if (Mask[i] < 0) {
// This element is an undef value.
eltMask = -1;
} else if (Mask[i] < (int)LHSWidth) {
// This element is from left hand side vector operand.
//
// If LHS is going to be replaced (case 1, 2, or 4), calculate the
// new mask value for the element.
if (newLHS != LHS) {
eltMask = LHSMask[Mask[i]];
// If the value selected is an undef value, explicitly specify it
// with a -1 mask value.
if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
eltMask = -1;
} else
eltMask = Mask[i];
} else {
// This element is from right hand side vector operand
//
// If the value selected is an undef value, explicitly specify it
// with a -1 mask value. (case 1)
if (isa<UndefValue>(RHS))
eltMask = -1;
// If RHS is going to be replaced (case 3 or 4), calculate the
// new mask value for the element.
else if (newRHS != RHS) {
eltMask = RHSMask[Mask[i]-LHSWidth];
// If the value selected is an undef value, explicitly specify it
// with a -1 mask value.
if (eltMask >= (int)RHSOp0Width) {
assert(isa<UndefValue>(RHSShuffle->getOperand(1))
&& "should have been check above");
eltMask = -1;
}
} else
eltMask = Mask[i]-LHSWidth;
// If LHS's width is changed, shift the mask value accordingly.
// If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
// references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
// If newRHS == newLHS, we want to remap any references from newRHS to
// newLHS so that we can properly identify splats that may occur due to
// obfuscation accross the two vectors.
if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
eltMask += newLHSWidth;
}
// Check if this could still be a splat.
if (eltMask >= 0) {
if (SplatElt >= 0 && SplatElt != eltMask)
isSplat = false;
SplatElt = eltMask;
}
newMask.push_back(eltMask);
}
// If the result mask is equal to one of the original shuffle masks,
// or is a splat, do the replacement.
if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
SmallVector<Constant*, 16> Elts;
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
if (newMask[i] < 0) {
Elts.push_back(UndefValue::get(Int32Ty));
} else {
Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
}
}
if (newRHS == NULL)
newRHS = UndefValue::get(newLHS->getType());
return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
}
return MadeChange ? &SVI : 0;
}
Index: head/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp (revision 265924)
+++ head/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp (revision 265925)
@@ -1,1184 +1,1186 @@
//===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass implements a simple loop reroller.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-reroll"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
STATISTIC(NumRerolledLoops, "Number of rerolled loops");
static cl::opt<unsigned>
MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
cl::desc("The maximum increment for loop rerolling"));
// This loop re-rolling transformation aims to transform loops like this:
//
// int foo(int a);
// void bar(int *x) {
// for (int i = 0; i < 500; i += 3) {
// foo(i);
// foo(i+1);
// foo(i+2);
// }
// }
//
// into a loop like this:
//
// void bar(int *x) {
// for (int i = 0; i < 500; ++i)
// foo(i);
// }
//
// It does this by looking for loops that, besides the latch code, are composed
// of isomorphic DAGs of instructions, with each DAG rooted at some increment
// to the induction variable, and where each DAG is isomorphic to the DAG
// rooted at the induction variable (excepting the sub-DAGs which root the
// other induction-variable increments). In other words, we're looking for loop
// bodies of the form:
//
// %iv = phi [ (preheader, ...), (body, %iv.next) ]
// f(%iv)
// %iv.1 = add %iv, 1 <-- a root increment
// f(%iv.1)
// %iv.2 = add %iv, 2 <-- a root increment
// f(%iv.2)
// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
// f(%iv.scale_m_1)
// ...
// %iv.next = add %iv, scale
// %cmp = icmp(%iv, ...)
// br %cmp, header, exit
//
// where each f(i) is a set of instructions that, collectively, are a function
// only of i (and other loop-invariant values).
//
// As a special case, we can also reroll loops like this:
//
// int foo(int);
// void bar(int *x) {
// for (int i = 0; i < 500; ++i) {
// x[3*i] = foo(0);
// x[3*i+1] = foo(0);
// x[3*i+2] = foo(0);
// }
// }
//
// into this:
//
// void bar(int *x) {
// for (int i = 0; i < 1500; ++i)
// x[i] = foo(0);
// }
//
// in which case, we're looking for inputs like this:
//
// %iv = phi [ (preheader, ...), (body, %iv.next) ]
// %scaled.iv = mul %iv, scale
// f(%scaled.iv)
// %scaled.iv.1 = add %scaled.iv, 1
// f(%scaled.iv.1)
// %scaled.iv.2 = add %scaled.iv, 2
// f(%scaled.iv.2)
// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
// f(%scaled.iv.scale_m_1)
// ...
// %iv.next = add %iv, 1
// %cmp = icmp(%iv, ...)
// br %cmp, header, exit
namespace {
class LoopReroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
LoopReroll() : LoopPass(ID) {
initializeLoopRerollPass(*PassRegistry::getPassRegistry());
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<TargetLibraryInfo>();
}
protected:
AliasAnalysis *AA;
LoopInfo *LI;
ScalarEvolution *SE;
DataLayout *DL;
TargetLibraryInfo *TLI;
DominatorTree *DT;
typedef SmallVector<Instruction *, 16> SmallInstructionVector;
typedef SmallSet<Instruction *, 16> SmallInstructionSet;
// A chain of isomorphic instructions, indentified by a single-use PHI,
// representing a reduction. Only the last value may be used outside the
// loop.
struct SimpleLoopReduction {
SimpleLoopReduction(Instruction *P, Loop *L)
: Valid(false), Instructions(1, P) {
assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
add(L);
}
bool valid() const {
return Valid;
}
Instruction *getPHI() const {
assert(Valid && "Using invalid reduction");
return Instructions.front();
}
Instruction *getReducedValue() const {
assert(Valid && "Using invalid reduction");
return Instructions.back();
}
Instruction *get(size_t i) const {
assert(Valid && "Using invalid reduction");
return Instructions[i+1];
}
Instruction *operator [] (size_t i) const { return get(i); }
// The size, ignoring the initial PHI.
size_t size() const {
assert(Valid && "Using invalid reduction");
return Instructions.size()-1;
}
typedef SmallInstructionVector::iterator iterator;
typedef SmallInstructionVector::const_iterator const_iterator;
iterator begin() {
assert(Valid && "Using invalid reduction");
return llvm::next(Instructions.begin());
}
const_iterator begin() const {
assert(Valid && "Using invalid reduction");
return llvm::next(Instructions.begin());
}
iterator end() { return Instructions.end(); }
const_iterator end() const { return Instructions.end(); }
protected:
bool Valid;
SmallInstructionVector Instructions;
void add(Loop *L);
};
// The set of all reductions, and state tracking of possible reductions
// during loop instruction processing.
struct ReductionTracker {
typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector;
// Add a new possible reduction.
void addSLR(SimpleLoopReduction &SLR) {
PossibleReds.push_back(SLR);
}
// Setup to track possible reductions corresponding to the provided
// rerolling scale. Only reductions with a number of non-PHI instructions
// that is divisible by the scale are considered. Three instructions sets
// are filled in:
// - A set of all possible instructions in eligible reductions.
// - A set of all PHIs in eligible reductions
// - A set of all reduced values (last instructions) in eligible reductions.
void restrictToScale(uint64_t Scale,
SmallInstructionSet &PossibleRedSet,
SmallInstructionSet &PossibleRedPHISet,
SmallInstructionSet &PossibleRedLastSet) {
PossibleRedIdx.clear();
PossibleRedIter.clear();
Reds.clear();
for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
if (PossibleReds[i].size() % Scale == 0) {
PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
PossibleRedPHISet.insert(PossibleReds[i].getPHI());
PossibleRedSet.insert(PossibleReds[i].getPHI());
PossibleRedIdx[PossibleReds[i].getPHI()] = i;
for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
JE = PossibleReds[i].end(); J != JE; ++J) {
PossibleRedSet.insert(*J);
PossibleRedIdx[*J] = i;
}
}
}
// The functions below are used while processing the loop instructions.
// Are the two instructions both from reductions, and furthermore, from
// the same reduction?
bool isPairInSame(Instruction *J1, Instruction *J2) {
DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
if (J1I != PossibleRedIdx.end()) {
DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
return true;
}
return false;
}
// The two provided instructions, the first from the base iteration, and
// the second from iteration i, form a matched pair. If these are part of
// a reduction, record that fact.
void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
if (PossibleRedIdx.count(J1)) {
assert(PossibleRedIdx.count(J2) &&
"Recording reduction vs. non-reduction instruction?");
PossibleRedIter[J1] = 0;
PossibleRedIter[J2] = i;
int Idx = PossibleRedIdx[J1];
assert(Idx == PossibleRedIdx[J2] &&
"Recording pair from different reductions?");
Reds.insert(Idx);
}
}
// The functions below can be called after we've finished processing all
// instructions in the loop, and we know which reductions were selected.
// Is the provided instruction the PHI of a reduction selected for
// rerolling?
bool isSelectedPHI(Instruction *J) {
if (!isa<PHINode>(J))
return false;
for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
RI != RIE; ++RI) {
int i = *RI;
if (cast<Instruction>(J) == PossibleReds[i].getPHI())
return true;
}
return false;
}
bool validateSelected();
void replaceSelected();
protected:
// The vector of all possible reductions (for any scale).
SmallReductionVector PossibleReds;
DenseMap<Instruction *, int> PossibleRedIdx;
DenseMap<Instruction *, int> PossibleRedIter;
DenseSet<int> Reds;
};
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
void collectPossibleReductions(Loop *L,
ReductionTracker &Reductions);
void collectInLoopUserSet(Loop *L,
const SmallInstructionVector &Roots,
const SmallInstructionSet &Exclude,
const SmallInstructionSet &Final,
DenseSet<Instruction *> &Users);
void collectInLoopUserSet(Loop *L,
Instruction * Root,
const SmallInstructionSet &Exclude,
const SmallInstructionSet &Final,
DenseSet<Instruction *> &Users);
bool findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
Instruction *&IV,
SmallInstructionVector &LoopIncs);
bool collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, Instruction *IV,
SmallVector<SmallInstructionVector, 32> &Roots,
SmallInstructionSet &AllRoots,
SmallInstructionVector &LoopIncs);
bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
ReductionTracker &Reductions);
};
}
char LoopReroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
Pass *llvm::createLoopRerollPass() {
return new LoopReroll;
}
// Returns true if the provided instruction is used outside the given loop.
// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
// non-loop blocks to be outside the loop.
static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
for (Value::use_iterator UI = I->use_begin(),
UIE = I->use_end(); UI != UIE; ++UI) {
Instruction *User = cast<Instruction>(*UI);
if (!L->contains(User))
return true;
}
return false;
}
// Collect the list of loop induction variables with respect to which it might
// be possible to reroll the loop.
void LoopReroll::collectPossibleIVs(Loop *L,
SmallInstructionVector &PossibleIVs) {
BasicBlock *Header = L->getHeader();
for (BasicBlock::iterator I = Header->begin(),
IE = Header->getFirstInsertionPt(); I != IE; ++I) {
if (!isa<PHINode>(I))
continue;
if (!I->getType()->isIntegerTy())
continue;
if (const SCEVAddRecExpr *PHISCEV =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
if (PHISCEV->getLoop() != L)
continue;
if (!PHISCEV->isAffine())
continue;
if (const SCEVConstant *IncSCEV =
dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
continue;
if (IncSCEV->getValue()->uge(MaxInc))
continue;
DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
*PHISCEV << "\n");
PossibleIVs.push_back(I);
}
}
}
}
// Add the remainder of the reduction-variable chain to the instruction vector
// (the initial PHINode has already been added). If successful, the object is
// marked as valid.
void LoopReroll::SimpleLoopReduction::add(Loop *L) {
assert(!Valid && "Cannot add to an already-valid chain");
// The reduction variable must be a chain of single-use instructions
// (including the PHI), except for the last value (which is used by the PHI
// and also outside the loop).
Instruction *C = Instructions.front();
do {
C = cast<Instruction>(*C->use_begin());
if (C->hasOneUse()) {
if (!C->isBinaryOp())
return;
if (!(isa<PHINode>(Instructions.back()) ||
C->isSameOperationAs(Instructions.back())))
return;
Instructions.push_back(C);
}
} while (C->hasOneUse());
if (Instructions.size() < 2 ||
!C->isSameOperationAs(Instructions.back()) ||
C->use_begin() == C->use_end())
return;
// C is now the (potential) last instruction in the reduction chain.
for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end();
UI != UIE; ++UI) {
// The only in-loop user can be the initial PHI.
if (L->contains(cast<Instruction>(*UI)))
if (cast<Instruction>(*UI ) != Instructions.front())
return;
}
Instructions.push_back(C);
Valid = true;
}
// Collect the vector of possible reduction variables.
void LoopReroll::collectPossibleReductions(Loop *L,
ReductionTracker &Reductions) {
BasicBlock *Header = L->getHeader();
for (BasicBlock::iterator I = Header->begin(),
IE = Header->getFirstInsertionPt(); I != IE; ++I) {
if (!isa<PHINode>(I))
continue;
if (!I->getType()->isSingleValueType())
continue;
SimpleLoopReduction SLR(I, L);
if (!SLR.valid())
continue;
DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " <<
SLR.size() << " chained instructions)\n");
Reductions.addSLR(SLR);
}
}
// Collect the set of all users of the provided root instruction. This set of
// users contains not only the direct users of the root instruction, but also
// all users of those users, and so on. There are two exceptions:
//
// 1. Instructions in the set of excluded instructions are never added to the
// use set (even if they are users). This is used, for example, to exclude
// including root increments in the use set of the primary IV.
//
// 2. Instructions in the set of final instructions are added to the use set
// if they are users, but their users are not added. This is used, for
// example, to prevent a reduction update from forcing all later reduction
// updates into the use set.
void LoopReroll::collectInLoopUserSet(Loop *L,
Instruction *Root, const SmallInstructionSet &Exclude,
const SmallInstructionSet &Final,
DenseSet<Instruction *> &Users) {
SmallInstructionVector Queue(1, Root);
while (!Queue.empty()) {
Instruction *I = Queue.pop_back_val();
if (!Users.insert(I).second)
continue;
if (!Final.count(I))
for (Value::use_iterator UI = I->use_begin(),
UIE = I->use_end(); UI != UIE; ++UI) {
Instruction *User = cast<Instruction>(*UI);
if (PHINode *PN = dyn_cast<PHINode>(User)) {
// Ignore "wrap-around" uses to PHIs of this loop's header.
if (PN->getIncomingBlock(UI) == L->getHeader())
continue;
}
if (L->contains(User) && !Exclude.count(User)) {
Queue.push_back(User);
}
}
// We also want to collect single-user "feeder" values.
for (User::op_iterator OI = I->op_begin(),
OIE = I->op_end(); OI != OIE; ++OI) {
if (Instruction *Op = dyn_cast<Instruction>(*OI))
if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
!Final.count(Op))
Queue.push_back(Op);
}
}
}
// Collect all of the users of all of the provided root instructions (combined
// into a single set).
void LoopReroll::collectInLoopUserSet(Loop *L,
const SmallInstructionVector &Roots,
const SmallInstructionSet &Exclude,
const SmallInstructionSet &Final,
DenseSet<Instruction *> &Users) {
for (SmallInstructionVector::const_iterator I = Roots.begin(),
IE = Roots.end(); I != IE; ++I)
collectInLoopUserSet(L, *I, Exclude, Final, Users);
}
static bool isSimpleLoadStore(Instruction *I) {
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return LI->isSimple();
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->isSimple();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
return !MI->isVolatile();
return false;
}
// Recognize loops that are setup like this:
//
// %iv = phi [ (preheader, ...), (body, %iv.next) ]
// %scaled.iv = mul %iv, scale
// f(%scaled.iv)
// %scaled.iv.1 = add %scaled.iv, 1
// f(%scaled.iv.1)
// %scaled.iv.2 = add %scaled.iv, 2
// f(%scaled.iv.2)
// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
// f(%scaled.iv.scale_m_1)
// ...
// %iv.next = add %iv, 1
// %cmp = icmp(%iv, ...)
// br %cmp, header, exit
//
// and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs.
bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
Instruction *&IV,
SmallInstructionVector &LoopIncs) {
// This is a special case: here we're looking for all uses (except for
// the increment) to be multiplied by a common factor. The increment must
// be by one. This is to capture loops like:
// for (int i = 0; i < 500; ++i) {
// foo(3*i); foo(3*i+1); foo(3*i+2);
// }
if (RealIV->getNumUses() != 2)
return false;
const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
Instruction *User1 = cast<Instruction>(*RealIV->use_begin()),
*User2 = cast<Instruction>(*llvm::next(RealIV->use_begin()));
if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
return false;
const SCEVAddRecExpr *User1SCEV =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User1)),
*User2SCEV =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User2));
if (!User1SCEV || !User1SCEV->isAffine() ||
!User2SCEV || !User2SCEV->isAffine())
return false;
// We assume below that User1 is the scale multiply and User2 is the
// increment. If this can't be true, then swap them.
if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) {
std::swap(User1, User2);
std::swap(User1SCEV, User2SCEV);
}
if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE))
return false;
assert(User2SCEV->getStepRecurrence(*SE)->isOne() &&
"Invalid non-unit step for multiplicative scaling");
LoopIncs.push_back(User2);
if (const SCEVConstant *MulScale =
dyn_cast<SCEVConstant>(User1SCEV->getStepRecurrence(*SE))) {
// Make sure that both the start and step have the same multiplier.
if (RealIVSCEV->getStart()->getType() != MulScale->getType())
return false;
if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) !=
User1SCEV->getStart())
return false;
ConstantInt *MulScaleCI = MulScale->getValue();
if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc))
return false;
Scale = MulScaleCI->getZExtValue();
IV = User1;
} else
return false;
DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n");
return true;
}
// Collect all root increments with respect to the provided induction variable
// (normally the PHI, but sometimes a multiply). A root increment is an
// instruction, normally an add, with a positive constant less than Scale. In a
// rerollable loop, each of these increments is the root of an instruction
// graph isomorphic to the others. Also, we collect the final induction
// increment (the increment equal to the Scale), and its users in LoopIncs.
bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale,
Instruction *IV,
SmallVector<SmallInstructionVector, 32> &Roots,
SmallInstructionSet &AllRoots,
SmallInstructionVector &LoopIncs) {
for (Value::use_iterator UI = IV->use_begin(),
UIE = IV->use_end(); UI != UIE; ++UI) {
Instruction *User = cast<Instruction>(*UI);
if (!SE->isSCEVable(User->getType()))
continue;
if (User->getType() != IV->getType())
continue;
if (!L->contains(User))
continue;
if (hasUsesOutsideLoop(User, L))
continue;
if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
SE->getSCEV(User), SE->getSCEV(IV)))) {
uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
if (Idx > 0 && Idx < Scale) {
Roots[Idx-1].push_back(User);
AllRoots.insert(User);
} else if (Idx == Scale && Inc > 1) {
LoopIncs.push_back(User);
}
}
}
if (Roots[0].empty())
return false;
bool AllSame = true;
for (unsigned i = 1; i < Scale-1; ++i)
if (Roots[i].size() != Roots[0].size()) {
AllSame = false;
break;
}
if (!AllSame)
return false;
return true;
}
// Validate the selected reductions. All iterations must have an isomorphic
// part of the reduction chain and, for non-associative reductions, the chain
// entries must appear in order.
bool LoopReroll::ReductionTracker::validateSelected() {
// For a non-associative reduction, the chain entries must appear in order.
for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
RI != RIE; ++RI) {
int i = *RI;
int PrevIter = 0, BaseCount = 0, Count = 0;
for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
JE = PossibleReds[i].end(); J != JE; ++J) {
// Note that all instructions in the chain must have been found because
// all instructions in the function must have been assigned to some
// iteration.
int Iter = PossibleRedIter[*J];
if (Iter != PrevIter && Iter != PrevIter + 1 &&
!PossibleReds[i].getReducedValue()->isAssociative()) {
DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
*J << "\n");
return false;
}
if (Iter != PrevIter) {
if (Count != BaseCount) {
DEBUG(dbgs() << "LRR: Iteration " << PrevIter <<
" reduction use count " << Count <<
" is not equal to the base use count " <<
BaseCount << "\n");
return false;
}
Count = 0;
}
++Count;
if (Iter == 0)
++BaseCount;
PrevIter = Iter;
}
}
return true;
}
// For all selected reductions, remove all parts except those in the first
// iteration (and the PHI). Replace outside uses of the reduced value with uses
// of the first-iteration reduced value (in other words, reroll the selected
// reductions).
void LoopReroll::ReductionTracker::replaceSelected() {
// Fixup reductions to refer to the last instruction associated with the
// first iteration (not the last).
for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
RI != RIE; ++RI) {
int i = *RI;
int j = 0;
for (int e = PossibleReds[i].size(); j != e; ++j)
if (PossibleRedIter[PossibleReds[i][j]] != 0) {
--j;
break;
}
// Replace users with the new end-of-chain value.
SmallInstructionVector Users;
for (Value::use_iterator UI =
PossibleReds[i].getReducedValue()->use_begin(),
UIE = PossibleReds[i].getReducedValue()->use_end(); UI != UIE; ++UI)
Users.push_back(cast<Instruction>(*UI));
for (SmallInstructionVector::iterator J = Users.begin(),
JE = Users.end(); J != JE; ++J)
(*J)->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
PossibleReds[i][j]);
}
}
// Reroll the provided loop with respect to the provided induction variable.
// Generally, we're looking for a loop like this:
//
// %iv = phi [ (preheader, ...), (body, %iv.next) ]
// f(%iv)
// %iv.1 = add %iv, 1 <-- a root increment
// f(%iv.1)
// %iv.2 = add %iv, 2 <-- a root increment
// f(%iv.2)
// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
// f(%iv.scale_m_1)
// ...
// %iv.next = add %iv, scale
// %cmp = icmp(%iv, ...)
// br %cmp, header, exit
//
// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
// be intermixed with eachother. The restriction imposed by this algorithm is
// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
// etc. be the same.
//
// First, we collect the use set of %iv, excluding the other increment roots.
// This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
// times, having collected the use set of f(%iv.(i+1)), during which we:
// - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
// the next unmatched instruction in f(%iv.(i+1)).
// - Ensure that both matched instructions don't have any external users
// (with the exception of last-in-chain reduction instructions).
// - Track the (aliasing) write set, and other side effects, of all
// instructions that belong to future iterations that come before the matched
// instructions. If the matched instructions read from that write set, then
// f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
// f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
// if any of these future instructions had side effects (could not be
// speculatively executed), and so do the matched instructions, when we
// cannot reorder those side-effect-producing instructions, and rerolling
// fails.
//
// Finally, we make sure that all loop instructions are either loop increment
// roots, belong to simple latch code, parts of validated reductions, part of
// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
// have been validated), then we reroll the loop.
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
const SCEV *IterCount,
ReductionTracker &Reductions) {
const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
getValue()->getZExtValue();
// The collection of loop increment instructions.
SmallInstructionVector LoopIncs;
uint64_t Scale = Inc;
// The effective induction variable, IV, is normally also the real induction
// variable. When we're dealing with a loop like:
// for (int i = 0; i < 500; ++i)
// x[3*i] = ...;
// x[3*i+1] = ...;
// x[3*i+2] = ...;
// then the real IV is still i, but the effective IV is (3*i).
Instruction *RealIV = IV;
if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
return false;
assert(Scale <= MaxInc && "Scale is too large");
assert(Scale > 1 && "Scale must be at least 2");
// The set of increment instructions for each increment value.
SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
SmallInstructionSet AllRoots;
if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
return false;
DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
*RealIV << "\n");
// An array of just the possible reductions for this scale factor. When we
// collect the set of all users of some root instructions, these reduction
// instructions are treated as 'final' (their uses are not considered).
// This is important because we don't want the root use set to search down
// the reduction chain.
SmallInstructionSet PossibleRedSet;
SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
PossibleRedLastSet);
// We now need to check for equivalence of the use graph of each root with
// that of the primary induction variable (excluding the roots). Our goal
// here is not to solve the full graph isomorphism problem, but rather to
// catch common cases without a lot of work. As a result, we will assume
// that the relative order of the instructions in each unrolled iteration
// is the same (although we will not make an assumption about how the
// different iterations are intermixed). Note that while the order must be
// the same, the instructions may not be in the same basic block.
SmallInstructionSet Exclude(AllRoots);
Exclude.insert(LoopIncs.begin(), LoopIncs.end());
DenseSet<Instruction *> BaseUseSet;
collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);
DenseSet<Instruction *> AllRootUses;
std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);
bool MatchFailed = false;
for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
PossibleRedSet, RootUseSet);
DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
" vs. iteration increment " << (i+1) <<
" use set size: " << RootUseSet.size() << "\n");
if (BaseUseSet.size() != RootUseSet.size()) {
MatchFailed = true;
break;
}
// In addition to regular aliasing information, we need to look for
// instructions from later (future) iterations that have side effects
// preventing us from reordering them past other instructions with side
// effects.
bool FutureSideEffects = false;
AliasSetTracker AST(*AA);
// The map between instructions in f(%iv.(i+1)) and f(%iv).
DenseMap<Value *, Value *> BaseMap;
assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
if (cast<Instruction>(J1) == RealIV)
continue;
if (cast<Instruction>(J1) == IV)
continue;
if (!BaseUseSet.count(J1))
continue;
if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
continue;
while (J2 != JE && (!RootUseSet.count(J2) ||
std::find(Roots[i].begin(), Roots[i].end(), J2) !=
Roots[i].end())) {
// As we iterate through the instructions, instructions that don't
// belong to previous iterations (or the base case), must belong to
// future iterations. We want to track the alias set of writes from
// previous iterations.
if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
!AllRootUses.count(J2)) {
if (J2->mayWriteToMemory())
AST.add(J2);
// Note: This is specifically guarded by a check on isa<PHINode>,
// which while a valid (somewhat arbitrary) micro-optimization, is
// needed because otherwise isSafeToSpeculativelyExecute returns
// false on PHI nodes.
if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
FutureSideEffects = true;
}
++J2;
}
if (!J1->isSameOperationAs(J2)) {
DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
" vs. " << *J2 << "\n");
MatchFailed = true;
break;
}
// Make sure that this instruction, which is in the use set of this
// root instruction, does not also belong to the base set or the set of
// some previous root instruction.
if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
" vs. " << *J2 << " (prev. case overlap)\n");
MatchFailed = true;
break;
}
// Make sure that we don't alias with any instruction in the alias set
// tracker. If we do, then we depend on a future iteration, and we
// can't reroll.
if (J2->mayReadFromMemory()) {
for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
K != KE && !MatchFailed; ++K) {
if (K->aliasesUnknownInst(J2, *AA)) {
DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
" vs. " << *J2 << " (depends on future store)\n");
MatchFailed = true;
break;
}
}
}
// If we've past an instruction from a future iteration that may have
// side effects, and this instruction might also, then we can't reorder
// them, and this matching fails. As an exception, we allow the alias
// set tracker to handle regular (simple) load/store dependencies.
if (FutureSideEffects &&
((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
(!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
" vs. " << *J2 <<
" (side effects prevent reordering)\n");
MatchFailed = true;
break;
}
// For instructions that are part of a reduction, if the operation is
// associative, then don't bother matching the operands (because we
// already know that the instructions are isomorphic, and the order
// within the iteration does not matter). For non-associative reductions,
// we do need to match the operands, because we need to reject
// out-of-order instructions within an iteration!
// For example (assume floating-point addition), we need to reject this:
// x += a[i]; x += b[i];
// x += a[i+1]; x += b[i+1];
// x += b[i+2]; x += a[i+2];
bool InReduction = Reductions.isPairInSame(J1, J2);
if (!(InReduction && J1->isAssociative())) {
bool Swapped = false, SomeOpMatched = false;;
for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
Value *Op2 = J2->getOperand(j);
// If this is part of a reduction (and the operation is not
// associatve), then we match all operands, but not those that are
// part of the reduction.
if (InReduction)
if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
if (Reductions.isPairInSame(J2, Op2I))
continue;
DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
if (BMI != BaseMap.end())
Op2 = BMI->second;
else if (std::find(Roots[i].begin(), Roots[i].end(),
(Instruction*) Op2) != Roots[i].end())
Op2 = IV;
if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
// If we've not already decided to swap the matched operands, and
// we've not already matched our first operand (note that we could
// have skipped matching the first operand because it is part of a
// reduction above), and the instruction is commutative, then try
// the swapped match.
if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
J1->getOperand(!j) == Op2) {
Swapped = true;
} else {
DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
" vs. " << *J2 << " (operand " << j << ")\n");
MatchFailed = true;
break;
}
}
SomeOpMatched = true;
}
}
if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
(!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
" vs. " << *J2 << " (uses outside loop)\n");
MatchFailed = true;
break;
}
if (!MatchFailed)
BaseMap.insert(std::pair<Value *, Value *>(J2, J1));
AllRootUses.insert(J2);
Reductions.recordPair(J1, J2, i+1);
++J2;
}
}
if (MatchFailed)
return false;
DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
*RealIV << "\n");
DenseSet<Instruction *> LoopIncUseSet;
collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
SmallInstructionSet(), LoopIncUseSet);
DEBUG(dbgs() << "LRR: Loop increment set size: " <<
LoopIncUseSet.size() << "\n");
// Make sure that all instructions in the loop have been included in some
// use set.
for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
J != JE; ++J) {
if (isa<DbgInfoIntrinsic>(J))
continue;
if (cast<Instruction>(J) == RealIV)
continue;
if (cast<Instruction>(J) == IV)
continue;
if (BaseUseSet.count(J) || AllRootUses.count(J) ||
(LoopIncUseSet.count(J) && (J->isTerminator() ||
isSafeToSpeculativelyExecute(J, DL))))
continue;
if (AllRoots.count(J))
continue;
if (Reductions.isSelectedPHI(J))
continue;
DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
" unprocessed instruction found: " << *J << "\n");
MatchFailed = true;
break;
}
if (MatchFailed)
return false;
DEBUG(dbgs() << "LRR: all instructions processed from " <<
*RealIV << "\n");
if (!Reductions.validateSelected())
return false;
// At this point, we've validated the rerolling, and we're committed to
// making changes!
Reductions.replaceSelected();
// Remove instructions associated with non-base iterations.
for (BasicBlock::reverse_iterator J = Header->rbegin();
J != Header->rend();) {
if (AllRootUses.count(&*J)) {
Instruction *D = &*J;
DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
D->eraseFromParent();
continue;
}
++J;
}
// Insert the new induction variable.
const SCEV *Start = RealIVSCEV->getStart();
if (Inc == 1)
Start = SE->getMulExpr(Start,
SE->getConstant(Start->getType(), Scale));
const SCEVAddRecExpr *H =
cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
SE->getConstant(RealIVSCEV->getType(), 1),
L, SCEV::FlagAnyWrap));
{ // Limit the lifetime of SCEVExpander.
SCEVExpander Expander(*SE, "reroll");
- PHINode *NewIV =
- cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
- Header->begin()));
+ Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+
for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
JE = BaseUseSet.end(); J != JE; ++J)
(*J)->replaceUsesOfWith(IV, NewIV);
if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
if (LoopIncUseSet.count(BI)) {
const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
if (Inc == 1)
ICSCEV =
SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
- Value *IC;
- if (isa<SCEVConstant>(ICSCEV)) {
- IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
+ // Iteration count SCEV minus 1
+ const SCEV *ICMinus1SCEV =
+ SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+
+ Value *ICMinus1; // Iteration count minus 1
+ if (isa<SCEVConstant>(ICMinus1SCEV)) {
+ ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
} else {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader)
Preheader = InsertPreheaderForLoop(L, this);
- IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
- Preheader->getTerminator());
+ ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
+ Preheader->getTerminator());
}
- Value *NewIVNext = NewIV->getIncomingValueForBlock(Header);
- Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
+ Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
"exitcond");
BI->setCondition(Cond);
if (BI->getSuccessor(1) != Header)
BI->swapSuccessors();
}
}
}
SimplifyInstructionsInBlock(Header, DL, TLI);
DeleteDeadPHIs(Header, TLI);
++NumRerolledLoops;
return true;
}
bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
AA = &getAnalysis<AliasAnalysis>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
TLI = &getAnalysis<TargetLibraryInfo>();
DL = getAnalysisIfAvailable<DataLayout>();
DT = &getAnalysis<DominatorTree>();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
"] Loop %" << Header->getName() << " (" <<
L->getNumBlocks() << " block(s))\n");
bool Changed = false;
// For now, we'll handle only single BB loops.
if (L->getNumBlocks() > 1)
return Changed;
if (!SE->hasLoopInvariantBackedgeTakenCount(L))
return Changed;
const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
const SCEV *IterCount =
SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
// First, we need to find the induction variable with respect to which we can
// reroll (there may be several possible options).
SmallInstructionVector PossibleIVs;
collectPossibleIVs(L, PossibleIVs);
if (PossibleIVs.empty()) {
DEBUG(dbgs() << "LRR: No possible IVs found\n");
return Changed;
}
ReductionTracker Reductions;
collectPossibleReductions(L, Reductions);
// For each possible IV, collect the associated possible set of 'root' nodes
// (i+1, i+2, etc.).
for (SmallInstructionVector::iterator I = PossibleIVs.begin(),
IE = PossibleIVs.end(); I != IE; ++I)
if (reroll(*I, L, Header, IterCount, Reductions)) {
Changed = true;
break;
}
return Changed;
}
Index: head/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (revision 265924)
+++ head/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (revision 265925)
@@ -1,4939 +1,4951 @@
//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This transformation analyzes and transforms the induction variables (and
// computations derived from them) into forms suitable for efficient execution
// on the target.
//
// This pass performs a strength reduction on array references inside loops that
// have as one or more of their components the loop induction variable, it
// rewrites expressions to take advantage of scaled-index addressing modes
// available on the target, and it performs a variety of other optimizations
// related to loop induction variables.
//
// Terminology note: this code has a lot of handling for "post-increment" or
// "post-inc" users. This is not talking about post-increment addressing modes;
// it is instead talking about code like this:
//
// %i = phi [ 0, %entry ], [ %i.next, %latch ]
// ...
// %i.next = add %i, 1
// %c = icmp eq %i.next, %n
//
// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
// it's useful to think about these as the same register, with some uses using
// the value of the register before the add and some using // it after. In this
// example, the icmp is a post-increment user, since it uses %i.next, which is
// the value of the induction variable after the increment. The other common
// case of post-increment users is users outside the loop.
//
// TODO: More sophistication in the way Formulae are generated and filtered.
//
// TODO: Handle multiple loops at a time.
//
// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
// of a GlobalValue?
//
// TODO: When truncation is free, truncate ICmp users' operands to make it a
// smaller encoding (on x86 at least).
//
// TODO: When a negated register is used by an add (such as in a list of
// multiple base registers, or as the increment expression in an addrec),
// we may not actually need both reg and (-1 * reg) in registers; the
// negation can be implemented by using a sub instead of an add. The
// lack of support for taking this into consideration when making
// register pressure decisions is partly worked around by the "Special"
// use kind.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-reduce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
/// bail out. This threshold is far beyond the number of users that LSR can
/// conceivably solve, so it should not affect generated code, but catches the
/// worst cases before LSR burns too much compile time and stack space.
static const unsigned MaxIVUsers = 200;
// Temporary flag to cleanup congruent phis after LSR phi expansion.
// It's currently disabled until we can determine whether it's truly useful or
// not. The flag should be removed after the v3.0 release.
// This is now needed for ivchains.
static cl::opt<bool> EnablePhiElim(
"enable-lsr-phielim", cl::Hidden, cl::init(true),
cl::desc("Enable LSR phi elimination"));
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
"stress-ivchain", cl::Hidden, cl::init(false),
cl::desc("Stress test LSR IV chains"));
#else
static bool StressIVChain = false;
#endif
namespace {
/// RegSortData - This class holds data which is used to order reuse candidates.
class RegSortData {
public:
/// UsedByIndices - This represents the set of LSRUse indices which reference
/// a particular register.
SmallBitVector UsedByIndices;
RegSortData() {}
void print(raw_ostream &OS) const;
void dump() const;
};
}
void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
#endif
namespace {
/// RegUseTracker - Map register candidates to information about how they are
/// used.
class RegUseTracker {
typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
RegUsesTy RegUsesMap;
SmallVector<const SCEV *, 16> RegSequence;
public:
void CountRegister(const SCEV *Reg, size_t LUIdx);
void DropRegister(const SCEV *Reg, size_t LUIdx);
void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
void clear();
typedef SmallVectorImpl<const SCEV *>::iterator iterator;
typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
iterator begin() { return RegSequence.begin(); }
iterator end() { return RegSequence.end(); }
const_iterator begin() const { return RegSequence.begin(); }
const_iterator end() const { return RegSequence.end(); }
};
}
void
RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
std::pair<RegUsesTy::iterator, bool> Pair =
RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
RegSortData &RSD = Pair.first->second;
if (Pair.second)
RegSequence.push_back(Reg);
RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
RSD.UsedByIndices.set(LUIdx);
}
void
RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
RegUsesTy::iterator It = RegUsesMap.find(Reg);
assert(It != RegUsesMap.end());
RegSortData &RSD = It->second;
assert(RSD.UsedByIndices.size() > LUIdx);
RSD.UsedByIndices.reset(LUIdx);
}
void
RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
assert(LUIdx <= LastLUIdx);
// Update RegUses. The data structure is not optimized for this purpose;
// we must iterate through it and update each of the bit vectors.
for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
I != E; ++I) {
SmallBitVector &UsedByIndices = I->second.UsedByIndices;
if (LUIdx < UsedByIndices.size())
UsedByIndices[LUIdx] =
LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
}
}
bool
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
if (I == RegUsesMap.end())
return false;
const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
int i = UsedByIndices.find_first();
if (i == -1) return false;
if ((size_t)i != LUIdx) return true;
return UsedByIndices.find_next(i) != -1;
}
const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
assert(I != RegUsesMap.end() && "Unknown register!");
return I->second.UsedByIndices;
}
void RegUseTracker::clear() {
RegUsesMap.clear();
RegSequence.clear();
}
namespace {
/// Formula - This class holds information that describes a formula for
/// computing satisfying a use. It may include broken-out immediates and scaled
/// registers.
struct Formula {
/// Global base address used for complex addressing.
GlobalValue *BaseGV;
/// Base offset for complex addressing.
int64_t BaseOffset;
/// Whether any complex addressing has a base register.
bool HasBaseReg;
/// The scale of any complex addressing.
int64_t Scale;
/// BaseRegs - The list of "base" registers for this use. When this is
/// non-empty,
SmallVector<const SCEV *, 4> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
/// when Scale is not zero.
const SCEV *ScaledReg;
/// UnfoldedOffset - An additional constant offset which added near the
/// use. This requires a temporary register, but the offset itself can
/// live in an add immediate field rather than a register.
int64_t UnfoldedOffset;
Formula()
: BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
unsigned getNumRegs() const;
Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
bool referencesReg(const SCEV *S) const;
bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const;
void print(raw_ostream &OS) const;
void dump() const;
};
}
/// DoInitialMatch - Recursion helper for InitialMatch.
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
ScalarEvolution &SE) {
// Collect expressions which properly dominate the loop header.
if (SE.properlyDominates(S, L->getHeader())) {
Good.push_back(S);
return;
}
// Look at add operands.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
DoInitialMatch(*I, L, Good, Bad, SE);
return;
}
// Look at addrec operands.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
if (!AR->getStart()->isZero()) {
DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
// FIXME: AR->getNoWrapFlags()
AR->getLoop(), SCEV::FlagAnyWrap),
L, Good, Bad, SE);
return;
}
// Handle a multiplication by -1 (negation) if it didn't fold.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
if (Mul->getOperand(0)->isAllOnesValue()) {
SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
const SCEV *NewMul = SE.getMulExpr(Ops);
SmallVector<const SCEV *, 4> MyGood;
SmallVector<const SCEV *, 4> MyBad;
DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
SE.getEffectiveSCEVType(NewMul->getType())));
for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
E = MyGood.end(); I != E; ++I)
Good.push_back(SE.getMulExpr(NegOne, *I));
for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
E = MyBad.end(); I != E; ++I)
Bad.push_back(SE.getMulExpr(NegOne, *I));
return;
}
// Ok, we can't do anything interesting. Just stuff the whole thing into a
// register and hope for the best.
Bad.push_back(S);
}
/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
/// attempting to keep all loop-invariant and loop-computable values in a
/// single base register.
void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
SmallVector<const SCEV *, 4> Good;
SmallVector<const SCEV *, 4> Bad;
DoInitialMatch(S, L, Good, Bad, SE);
if (!Good.empty()) {
const SCEV *Sum = SE.getAddExpr(Good);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
if (!Bad.empty()) {
const SCEV *Sum = SE.getAddExpr(Bad);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
}
/// getNumRegs - Return the total number of register operands used by this
/// formula. This does not include register uses implied by non-constant
/// addrec strides.
unsigned Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
/// getType - Return the type of this formula, if it has one, or null
/// otherwise. This type is meaningless except for the bit size.
Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
BaseGV ? BaseGV->getType() :
0;
}
/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
void Formula::DeleteBaseReg(const SCEV *&S) {
if (&S != &BaseRegs.back())
std::swap(S, BaseRegs.back());
BaseRegs.pop_back();
}
/// referencesReg - Test if this formula references the given register.
bool Formula::referencesReg(const SCEV *S) const {
return S == ScaledReg ||
std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
}
/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
/// which are used by uses other than the use with the given index.
bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const {
if (ScaledReg)
if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
return true;
for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
E = BaseRegs.end(); I != E; ++I)
if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
return true;
return false;
}
void Formula::print(raw_ostream &OS) const {
bool First = true;
if (BaseGV) {
if (!First) OS << " + "; else First = false;
WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
}
if (BaseOffset != 0) {
if (!First) OS << " + "; else First = false;
OS << BaseOffset;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
E = BaseRegs.end(); I != E; ++I) {
if (!First) OS << " + "; else First = false;
OS << "reg(" << **I << ')';
}
if (HasBaseReg && BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: HasBaseReg**";
} else if (!HasBaseReg && !BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: !HasBaseReg**";
}
if (Scale != 0) {
if (!First) OS << " + "; else First = false;
OS << Scale << "*reg(";
if (ScaledReg)
OS << *ScaledReg;
else
OS << "<unknown>";
OS << ')';
}
if (UnfoldedOffset != 0) {
if (!First) OS << " + "; else First = false;
OS << "imm(" << UnfoldedOffset << ')';
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Formula::dump() const {
print(errs()); errs() << '\n';
}
#endif
/// isAddRecSExtable - Return true if the given addrec can be sign-extended
/// without changing its value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
/// isAddSExtable - Return true if the given add can be sign-extended
/// without changing its value.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
/// isMulSExtable - Return true if the given mul can be sign-extended
/// without changing its value.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(),
SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}
/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
/// and if the remainder is known to be zero, or null otherwise. If
/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
/// to Y, ignoring that the multiplication may overflow, which is useful when
/// the result will be used in a context where the most significant bits are
/// ignored.
static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
ScalarEvolution &SE,
bool IgnoreSignificantBits = false) {
// Handle the trivial case, which works for any SCEV type.
if (LHS == RHS)
return SE.getConstant(LHS->getType(), 1);
// Handle a few RHS special cases.
const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
if (RC) {
const APInt &RA = RC->getValue()->getValue();
// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
// some folding.
if (RA.isAllOnesValue())
return SE.getMulExpr(LHS, RC);
// Handle x /s 1 as x.
if (RA == 1)
return LHS;
}
// Check for a division of a constant by a constant.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
return 0;
const APInt &LA = C->getValue()->getValue();
const APInt &RA = RC->getValue()->getValue();
if (LA.srem(RA) != 0)
return 0;
return SE.getConstant(LA.sdiv(RA));
}
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
if (!Step) return 0;
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
if (!Start) return 0;
// FlagNW is independent of the start value, step direction, and is
// preserved with smaller magnitude steps.
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
}
return 0;
}
// Distribute the sdiv over add operands, if the add doesn't overflow.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
SmallVector<const SCEV *, 8> Ops;
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
const SCEV *Op = getExactSDiv(*I, RHS, SE,
IgnoreSignificantBits);
if (!Op) return 0;
Ops.push_back(Op);
}
return SE.getAddExpr(Ops);
}
return 0;
}
// Check for a multiply operand that we can pull RHS out of.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
SmallVector<const SCEV *, 4> Ops;
bool Found = false;
for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
I != E; ++I) {
const SCEV *S = *I;
if (!Found)
if (const SCEV *Q = getExactSDiv(S, RHS, SE,
IgnoreSignificantBits)) {
S = Q;
Found = true;
}
Ops.push_back(S);
}
return Found ? SE.getMulExpr(Ops) : 0;
}
return 0;
}
// Otherwise we don't know.
return 0;
}
/// ExtractImmediate - If S involves the addition of a constant integer value,
/// return that integer value, and mutate S to point to a new SCEV with that
/// value excluded.
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
if (C->getValue()->getValue().getMinSignedBits() <= 64) {
S = SE.getConstant(C->getType(), 0);
return C->getValue()->getSExtValue();
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
SCEV::FlagAnyWrap);
return Result;
}
return 0;
}
/// ExtractSymbol - If S involves the addition of a GlobalValue address,
/// return that symbol, and mutate S to point to a new SCEV with that
/// value excluded.
static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
S = SE.getConstant(GV->getType(), 0);
return GV;
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
if (Result)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
if (Result)
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
SCEV::FlagAnyWrap);
return Result;
}
return 0;
}
/// isAddressUse - Returns true if the specified instruction is using the
/// specified value as an address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getOperand(1) == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
}
}
return isAddress;
}
/// getAccessType - Return the type of the memory being accessed.
static Type *getAccessType(const Instruction *Inst) {
Type *AccessTy = Inst->getType();
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
AccessTy = SI->getOperand(0)->getType();
else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
AccessTy = II->getArgOperand(0)->getType();
break;
}
}
// All pointers have the same requirements, so canonicalize them to an
// arbitrary pointer type to minimize variation.
if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
PTy->getAddressSpace());
return AccessTy;
}
/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
if (SE.isSCEVable(PN->getType()) &&
(SE.getEffectiveSCEVType(PN->getType()) ==
SE.getEffectiveSCEVType(AR->getType())) &&
SE.getSCEV(PN) == AR)
return true;
}
return false;
}
/// Check if expanding this expression is likely to incur significant cost. This
/// is tricky because SCEV doesn't track which expressions are actually computed
/// by the current IR.
///
/// We currently allow expansion of IV increments that involve adds,
/// multiplication by constants, and AddRecs from existing phis.
///
/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
/// obvious multiple of the UDivExpr.
static bool isHighCostExpansion(const SCEV *S,
SmallPtrSet<const SCEV*, 8> &Processed,
ScalarEvolution &SE) {
// Zero/One operand expressions
switch (S->getSCEVType()) {
case scUnknown:
case scConstant:
return false;
case scTruncate:
return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
Processed, SE);
case scZeroExtend:
return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
Processed, SE);
case scSignExtend:
return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
Processed, SE);
}
if (!Processed.insert(S))
return false;
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
if (isHighCostExpansion(*I, Processed, SE))
return true;
}
return false;
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
if (Mul->getNumOperands() == 2) {
// Multiplication by a constant is ok
if (isa<SCEVConstant>(Mul->getOperand(0)))
return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
// If we have the value of one operand, check if an existing
// multiplication already generates this expression.
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
Value *UVal = U->getValue();
for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end();
UI != UE; ++UI) {
// If U is a constant, it may be used by a ConstantExpr.
Instruction *User = dyn_cast<Instruction>(*UI);
if (User && User->getOpcode() == Instruction::Mul
&& SE.isSCEVable(User->getType())) {
return SE.getSCEV(User) == Mul;
}
}
}
}
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
if (isExistingPhi(AR, SE))
return false;
}
// Fow now, consider any other type of expression (div/mul/min/max) high cost.
return true;
}
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
static bool
DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
Value *V = DeadInsts.pop_back_val();
Instruction *I = dyn_cast_or_null<Instruction>(V);
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
*OI = 0;
if (U->use_empty())
DeadInsts.push_back(U);
}
I->eraseFromParent();
Changed = true;
}
return Changed;
}
namespace {
class LSRUse;
}
// Check if it is legal to fold 2 base registers.
static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
const Formula &F);
// Get the cost of the scaling factor used in F for LU.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
namespace {
/// Cost - This class is used to measure and compare candidate formulae.
class Cost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
unsigned NumRegs;
unsigned AddRecCost;
unsigned NumIVMuls;
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
unsigned ScaleCost;
public:
Cost()
: NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
SetupCost(0), ScaleCost(0) {}
bool operator<(const Cost &Other) const;
void Loose();
#ifndef NDEBUG
// Once any of the metrics loses, they must all remain losers.
bool isValid() {
return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
| ImmCost | SetupCost | ScaleCost) != ~0u)
|| ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
& ImmCost & SetupCost & ScaleCost) == ~0u);
}
#endif
bool isLoser() {
assert(isValid() && "invalid cost");
return NumRegs == ~0u;
}
void RateFormula(const TargetTransformInfo &TTI,
const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
void dump() const;
private:
void RateRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT);
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT,
SmallPtrSet<const SCEV *, 16> *LoserRegs);
};
}
/// RateRegister - Tally up interesting quantities from the given register.
void Cost::RateRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
// If this is an addrec for another loop, don't second-guess its addrec phi
// nodes. LSR isn't currently smart enough to reason about more than one
// loop at a time. LSR has already run on inner loops, will not run on outer
// loops, and cannot be expected to change sibling loops.
if (AR->getLoop() != L) {
// If the AddRec exists, consider it's register free and leave it alone.
if (isExistingPhi(AR, SE))
return;
// Otherwise, do not consider this formula at all.
Loose();
return;
}
AddRecCost += 1; /// TODO: This should be a function of the stride.
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
if (!Regs.count(AR->getOperand(1))) {
RateRegister(AR->getOperand(1), Regs, L, SE, DT);
if (isLoser())
return;
}
}
}
++NumRegs;
// Rough heuristic; favor registers which don't require extra setup
// instructions in the preheader.
if (!isa<SCEVUnknown>(Reg) &&
!isa<SCEVConstant>(Reg) &&
!(isa<SCEVAddRecExpr>(Reg) &&
(isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
++SetupCost;
NumIVMuls += isa<SCEVMulExpr>(Reg) &&
SE.hasComputableLoopEvolution(Reg, L);
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
/// before, rate it. Optional LoserRegs provides a way to declare any formula
/// that refers to one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
if (LoserRegs && LoserRegs->count(Reg)) {
Loose();
return;
}
if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
void Cost::RateFormula(const TargetTransformInfo &TTI,
const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Loose();
return;
}
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
if (VisitedRegs.count(BaseReg)) {
Loose();
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
// Determine how many (unfolded) adds we'll need inside the loop.
size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
if (NumBaseParts > 1)
// Do not count the base and a possible second register if the target
// allows to fold 2 registers.
NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
// Accumulate non-free scaling amounts.
ScaleCost += getScalingFactorCost(TTI, LU, F);
// Tally up the non-zero immediates.
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
int64_t Offset = (uint64_t)*I + F.BaseOffset;
if (F.BaseGV)
ImmCost += 64; // Handle symbolic values conservatively.
// TODO: This should probably be the pointer size.
else if (Offset != 0)
ImmCost += APInt(64, Offset, true).getMinSignedBits();
}
assert(isValid() && "invalid cost");
}
/// Loose - Set this cost to a losing value.
void Cost::Loose() {
NumRegs = ~0u;
AddRecCost = ~0u;
NumIVMuls = ~0u;
NumBaseAdds = ~0u;
ImmCost = ~0u;
SetupCost = ~0u;
ScaleCost = ~0u;
}
/// operator< - Choose the lower cost.
bool Cost::operator<(const Cost &Other) const {
if (NumRegs != Other.NumRegs)
return NumRegs < Other.NumRegs;
if (AddRecCost != Other.AddRecCost)
return AddRecCost < Other.AddRecCost;
if (NumIVMuls != Other.NumIVMuls)
return NumIVMuls < Other.NumIVMuls;
if (NumBaseAdds != Other.NumBaseAdds)
return NumBaseAdds < Other.NumBaseAdds;
if (ScaleCost != Other.ScaleCost)
return ScaleCost < Other.ScaleCost;
if (ImmCost != Other.ImmCost)
return ImmCost < Other.ImmCost;
if (SetupCost != Other.SetupCost)
return SetupCost < Other.SetupCost;
return false;
}
void Cost::print(raw_ostream &OS) const {
OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
if (AddRecCost != 0)
OS << ", with addrec cost " << AddRecCost;
if (NumIVMuls != 0)
OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
if (NumBaseAdds != 0)
OS << ", plus " << NumBaseAdds << " base add"
<< (NumBaseAdds == 1 ? "" : "s");
if (ScaleCost != 0)
OS << ", plus " << ScaleCost << " scale cost";
if (ImmCost != 0)
OS << ", plus " << ImmCost << " imm cost";
if (SetupCost != 0)
OS << ", plus " << SetupCost << " setup cost";
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Cost::dump() const {
print(errs()); errs() << '\n';
}
#endif
namespace {
/// LSRFixup - An operand value in an instruction which is to be replaced
/// with some equivalent, possibly strength-reduced, replacement.
struct LSRFixup {
/// UserInst - The instruction which will be updated.
Instruction *UserInst;
/// OperandValToReplace - The operand of the instruction which will
/// be replaced. The operand may be used more than once; every instance
/// will be replaced.
Value *OperandValToReplace;
/// PostIncLoops - If this user is to use the post-incremented value of an
/// induction variable, this variable is non-null and holds the loop
/// associated with the induction variable.
PostIncLoopSet PostIncLoops;
/// LUIdx - The index of the LSRUse describing the expression which
/// this fixup needs, minus an offset (below).
size_t LUIdx;
/// Offset - A constant offset to be added to the LSRUse expression.
/// This allows multiple fixups to share the same LSRUse with different
/// offsets, for example in an unrolled loop.
int64_t Offset;
bool isUseFullyOutsideLoop(const Loop *L) const;
LSRFixup();
void print(raw_ostream &OS) const;
void dump() const;
};
}
LSRFixup::LSRFixup()
: UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
/// value outside of the given loop.
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
// PHI nodes use their value in their incoming blocks.
if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == OperandValToReplace &&
L->contains(PN->getIncomingBlock(i)))
return false;
return true;
}
return !L->contains(UserInst);
}
void LSRFixup::print(raw_ostream &OS) const {
OS << "UserInst=";
// Store is common and interesting enough to be worth special-casing.
if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
OS << "store ";
WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
} else if (UserInst->getType()->isVoidTy())
OS << UserInst->getOpcodeName();
else
WriteAsOperand(OS, UserInst, /*PrintType=*/false);
OS << ", OperandValToReplace=";
WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
E = PostIncLoops.end(); I != E; ++I) {
OS << ", PostIncLoop=";
WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
}
if (LUIdx != ~size_t(0))
OS << ", LUIdx=" << LUIdx;
if (Offset != 0)
OS << ", Offset=" << Offset;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
#endif
namespace {
/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
struct UniquifierDenseMapInfo {
static SmallVector<const SCEV *, 4> getEmptyKey() {
SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-1));
return V;
}
static SmallVector<const SCEV *, 4> getTombstoneKey() {
SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-2));
return V;
}
static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
unsigned Result = 0;
for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
E = V.end(); I != E; ++I)
Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
return Result;
}
static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
const SmallVector<const SCEV *, 4> &RHS) {
return LHS == RHS;
}
};
/// LSRUse - This class holds the state that LSR keeps for each use in
/// IVUsers, as well as uses invented by LSR itself. It includes information
/// about what kinds of things can be folded into the user, information about
/// the user itself, and information about how the use may be satisfied.
/// TODO: Represent multiple users of the same expression in common?
class LSRUse {
DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
public:
/// KindType - An enum for a kind of use, indicating what types of
/// scaled and immediate operands it might support.
enum KindType {
Basic, ///< A normal use, with no folding.
Special, ///< A special case of basic, allowing -1 scales.
Address, ///< An address use; folding according to TargetLowering
ICmpZero ///< An equality icmp with both operands folded into one.
// TODO: Add a generic icmp too?
};
KindType Kind;
Type *AccessTy;
SmallVector<int64_t, 8> Offsets;
int64_t MinOffset;
int64_t MaxOffset;
/// AllFixupsOutsideLoop - This records whether all of the fixups using this
/// LSRUse are outside of the loop, in which case some special-case heuristics
/// may be used.
bool AllFixupsOutsideLoop;
/// RigidFormula is set to true to guarantee that this use will be associated
/// with a single formula--the one that initially matched. Some SCEV
/// expressions cannot be expanded. This allows LSR to consider the registers
/// used by those expressions without the need to expand them later after
/// changing the formula.
bool RigidFormula;
/// WidestFixupType - This records the widest use type for any fixup using
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
/// max fixup widths to be equivalent, because the narrower one may be relying
/// on the implicit truncation to truncate away bogus bits.
Type *WidestFixupType;
/// Formulae - A list of ways to build a value that can satisfy this user.
/// After the list is populated, one of these is selected heuristically and
/// used to formulate a replacement for OperandValToReplace in UserInst.
SmallVector<Formula, 12> Formulae;
/// Regs - The set of register candidates used by all formulae in this LSRUse.
SmallPtrSet<const SCEV *, 4> Regs;
LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
RigidFormula(false),
WidestFixupType(0) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
void print(raw_ostream &OS) const;
void dump() const;
};
}
/// HasFormula - Test whether this use as a formula which has the same
/// registers as the given formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
return Uniquifier.count(Key);
}
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
if (!Formulae.empty() && RigidFormula)
return false;
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
if (!Uniquifier.insert(Key).second)
return false;
// Using a register to hold the value of 0 is not profitable.
assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
"Zero allocated in a scaled register!");
#ifndef NDEBUG
for (SmallVectorImpl<const SCEV *>::const_iterator I =
F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
assert(!(*I)->isZero() && "Zero allocated in a base register!");
#endif
// Add the formula to the list.
Formulae.push_back(F);
// Record registers now being used by this use.
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
return true;
}
/// DeleteFormula - Remove the given formula from this use's list.
void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
}
/// RecomputeRegs - Recompute the Regs field, and update RegUses.
void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
// Now that we've filtered out some formulae, recompute the Regs set.
SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
Regs.clear();
for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
E = Formulae.end(); I != E; ++I) {
const Formula &F = *I;
if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
}
// Update the RegTracker.
for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
E = OldRegs.end(); I != E; ++I)
if (!Regs.count(*I))
RegUses.DropRegister(*I, LUIdx);
}
void LSRUse::print(raw_ostream &OS) const {
OS << "LSR Use: Kind=";
switch (Kind) {
case Basic: OS << "Basic"; break;
case Special: OS << "Special"; break;
case ICmpZero: OS << "ICmpZero"; break;
case Address:
OS << "Address of ";
if (AccessTy->isPointerTy())
OS << "pointer"; // the full pointer type could be really verbose
else
OS << *AccessTy;
}
OS << ", Offsets={";
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
OS << *I;
if (llvm::next(I) != E)
OS << ',';
}
OS << '}';
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
if (WidestFixupType)
OS << ", widest fixup type: " << *WidestFixupType;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
#endif
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
/// be completely folded into the user instruction at isel time. This includes
/// address-mode folding and special icmp tricks.
static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
// Otherwise, just guess that reg+reg addressing is legal.
//return ;
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
// fold a GV into an ICmp.
if (BaseGV)
return false;
// ICmp only has two operands; don't allow more than two non-trivial parts.
if (Scale != 0 && HasBaseReg && BaseOffset != 0)
return false;
// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
// putting the scaled register in the other operand of the icmp.
if (Scale != 0 && Scale != -1)
return false;
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (BaseOffset != 0) {
// We have one of:
// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
// Offs is the ICmp immediate.
if (Scale == 0)
// The cast does the right thing with INT64_MIN.
BaseOffset = -(uint64_t)BaseOffset;
return TTI.isLegalICmpImmediate(BaseOffset);
}
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
return true;
case LSRUse::Basic:
// Only handle single-register values.
return !BaseGV && Scale == 0 && BaseOffset == 0;
case LSRUse::Special:
// Special case Basic to handle -1 scales.
return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
int64_t Scale) {
// Check for overflow.
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
return false;
MinOffset = (uint64_t)BaseOffset + MinOffset;
if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
(MaxOffset > 0))
return false;
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
Scale) &&
isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
const Formula &F) {
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
const Formula &F) {
// If F is used as an Addressing Mode, it may fold one Base plus one
// scaled register. If the scaled register is nil, do as if another
// element of the base regs is a 1-scaled register.
// This is possible if BaseRegs has at least 2 registers.
// If this is not an address calculation, this is not an addressing mode
// use.
if (LU.Kind != LSRUse::Address)
return false;
// F is already scaled.
if (F.Scale != 0)
return false;
// We need to keep one register for the base and one to scale.
if (F.BaseRegs.size() < 2)
return false;
return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
}
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
if (!F.Scale)
return 0;
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, F) && "Illegal formula in use.");
switch (LU.Kind) {
case LSRUse::Address: {
// Check the scaling factor cost with both the min and max offsets.
int ScaleCostMinOffset =
TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
F.BaseOffset + LU.MinOffset,
F.HasBaseReg, F.Scale);
int ScaleCostMaxOffset =
TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
F.BaseOffset + LU.MaxOffset,
F.HasBaseReg, F.Scale);
assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
"Legal addressing mode has an illegal cost!");
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
}
case LSRUse::ICmpZero:
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
// Therefore, return 0 in case F.Scale == -1.
return F.Scale != -1;
case LSRUse::Basic:
case LSRUse::Special:
return 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, Type *AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg) {
// Fast-path: zero is always foldable.
if (BaseOffset == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
// Canonicalize a scale of 1 to a base register if the formula doesn't
// already have a base register.
if (!HasBaseReg && Scale == 1) {
Scale = 0;
HasBaseReg = true;
}
return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
ScalarEvolution &SE, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind,
Type *AccessTy, const SCEV *S, bool HasBaseReg) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
int64_t BaseOffset = ExtractImmediate(S, SE);
GlobalValue *BaseGV = ExtractSymbol(S, SE);
// If there's anything else involved, it's not foldable.
if (!S->isZero()) return false;
// Fast-path: zero is always foldable.
if (BaseOffset == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
BaseOffset, HasBaseReg, Scale);
}
namespace {
/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
struct UseMapDenseMapInfo {
static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
}
static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
}
static unsigned
getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
return Result;
}
static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
return LHS == RHS;
}
};
/// IVInc - An individual increment in a Chain of IV increments.
/// Relate an IV user to an expression that computes the IV it uses from the IV
/// used by the previous link in the Chain.
///
/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
/// original IVOperand. The head of the chain's IVOperand is only valid during
/// chain collection, before LSR replaces IV users. During chain generation,
/// IncExpr can be used to find the new IVOperand that computes the same
/// expression.
struct IVInc {
Instruction *UserInst;
Value* IVOperand;
const SCEV *IncExpr;
IVInc(Instruction *U, Value *O, const SCEV *E):
UserInst(U), IVOperand(O), IncExpr(E) {}
};
// IVChain - The list of IV increments in program order.
// We typically add the head of a chain without finding subsequent links.
struct IVChain {
SmallVector<IVInc,1> Incs;
const SCEV *ExprBase;
IVChain() : ExprBase(0) {}
IVChain(const IVInc &Head, const SCEV *Base)
: Incs(1, Head), ExprBase(Base) {}
typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
// begin - return the first increment in the chain.
const_iterator begin() const {
assert(!Incs.empty());
return llvm::next(Incs.begin());
}
const_iterator end() const {
return Incs.end();
}
// hasIncs - Returns true if this chain contains any increments.
bool hasIncs() const { return Incs.size() >= 2; }
// add - Add an IVInc to the end of this chain.
void add(const IVInc &X) { Incs.push_back(X); }
// tailUserInst - Returns the last UserInst in the chain.
Instruction *tailUserInst() const { return Incs.back().UserInst; }
// isProfitableIncrement - Returns true if IncExpr can be profitably added to
// this chain.
bool isProfitableIncrement(const SCEV *OperExpr,
const SCEV *IncExpr,
ScalarEvolution&);
};
/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
/// Distinguish between FarUsers that definitely cross IV increments and
/// NearUsers that may be used between IV increments.
struct ChainUsers {
SmallPtrSet<Instruction*, 4> FarUsers;
SmallPtrSet<Instruction*, 4> NearUsers;
};
/// LSRInstance - This class holds state for the main loop strength reduction
/// logic.
class LSRInstance {
IVUsers &IU;
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
const TargetTransformInfo &TTI;
Loop *const L;
bool Changed;
/// IVIncInsertPos - This is the insert position that the current loop's
/// induction variable increment should be placed. In simple loops, this is
/// the latch block's terminator. But in more complicated cases, this is a
/// position which will dominate all the in-loop post-increment users.
Instruction *IVIncInsertPos;
/// Factors - Interesting factors between use strides.
SmallSetVector<int64_t, 8> Factors;
/// Types - Interesting use types, to facilitate truncation reuse.
SmallSetVector<Type *, 4> Types;
/// Fixups - The list of operands which are to be replaced.
SmallVector<LSRFixup, 16> Fixups;
/// Uses - The list of interesting uses.
SmallVector<LSRUse, 16> Uses;
/// RegUses - Track which uses use which register candidates.
RegUseTracker RegUses;
// Limit the number of chains to avoid quadratic behavior. We don't expect to
// have more than a few IV increment chains in a loop. Missing a Chain falls
// back to normal LSR behavior for those uses.
static const unsigned MaxChains = 8;
/// IVChainVec - IV users can form a chain of IV increments.
SmallVector<IVChain, MaxChains> IVChainVec;
/// IVIncSet - IV users that belong to profitable IVChains.
SmallPtrSet<Use*, MaxChains> IVIncSet;
void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
void OptimizeLoopTermCond();
void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec);
void FinalizeChain(IVChain &Chain);
void CollectChains();
void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts);
void CollectInterestingTypesAndFactors();
void CollectFixupsAndInitialFormulae();
LSRFixup &getNewFixup() {
Fixups.push_back(LSRFixup());
return Fixups.back();
}
// Support for sharing of LSRUses between LSRFixups.
typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
size_t,
UseMapDenseMapInfo> UseMapTy;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy);
std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
LSRUse::KindType Kind,
Type *AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
void CollectLoopInvariantFixupsAndFormulae();
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
unsigned Depth = 0);
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateCrossUseConstantOffsets();
void GenerateAllReuseFormulae();
void FilterOutUndesirableDedicatedRegisters();
size_t EstimateSearchSpaceComplexity() const;
void NarrowSearchSpaceByDetectingSupersets();
void NarrowSearchSpaceByCollapsingUnrolledCode();
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
const Cost &CurCost,
const SmallPtrSet<const SCEV *, 16> &CurRegs,
DenseSet<const SCEV *> &VisitedRegs) const;
void Solve(SmallVectorImpl<const Formula *> &Solution) const;
BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs) const;
BasicBlock::iterator
AdjustInsertPositionForExpand(BasicBlock::iterator IP,
const LSRFixup &LF,
const LSRUse &LU,
SCEVExpander &Rewriter) const;
Value *Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) const;
void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const;
void Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const;
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
public:
LSRInstance(Loop *L, Pass *P);
bool getChanged() const { return Changed; }
void print_factors_and_types(raw_ostream &OS) const;
void print_fixups(raw_ostream &OS) const;
void print_uses(raw_ostream &OS) const;
void print(raw_ostream &OS) const;
void dump() const;
};
}
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast operation.
void LSRInstance::OptimizeShadowIV() {
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
UI != E; /* empty */) {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
Type *DestTy = 0;
bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
double d = 0.0;
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
IsSigned = false;
DestTy = UCast->getDestTy();
}
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
IsSigned = true;
DestTy = SCast->getDestTy();
}
if (!DestTy) continue;
// If target does not support DestTy natively then do not apply
// this transformation.
if (!TTI.isTypeLegal(DestTy)) continue;
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
(double)Init->getSExtValue() :
(double)Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
/* Initialize new IV, double d = 0.0 in above example. */
ConstantInt *C = 0;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
if (!C) continue;
// Ignore negative constants, as the code below doesn't handle them
// correctly. TODO: Remove this restriction.
if (!C->getValue().isStrictlyPositive()) continue;
/* Add new PHINode. */
PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
/* Remove cast operation */
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
Changed = true;
break;
}
}
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
if (UI->getUser() == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = UI;
return true;
}
return false;
}
/// OptimizeMax - Rewrite the loop's terminating condition if it uses
/// a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
///
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
///
/// the trip count isn't just 'n', because 'n' might not be positive. And
/// unfortunately this can come up even for loops where the user didn't use
/// a C do-while loop. For example, seemingly well-behaved top-test loops
/// will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
/// }
///
/// and then it's possible for subsequent optimization to obscure the if
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
/// max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
/// max = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != max);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
/// needed, and it does this using a simple analysis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
/// can be quite costly, especially if it's inside of an outer loop.
///
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return Cond;
const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
// Add one to the backedge-taken count to get the trip count.
const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
if (IterationCount != SE.getSCEV(Sel)) return Cond;
// Check for a max calculation that matches the pattern. There's no check
// for ICMP_ULE here because the comparison would be with zero, which
// isn't interesting.
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
const SCEVNAryExpr *Max = 0;
if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
Pred = ICmpInst::ICMP_SLE;
Max = S;
} else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
Pred = ICmpInst::ICMP_SLT;
Max = S;
} else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
Pred = ICmpInst::ICMP_ULT;
Max = U;
} else {
// No match; bail.
return Cond;
}
// To handle a max with more than two operands, this optimization would
// require additional checking and setup.
if (Max->getNumOperands() != 2)
return Cond;
const SCEV *MaxLHS = Max->getOperand(0);
const SCEV *MaxRHS = Max->getOperand(1);
// ScalarEvolution canonicalizes constants to the left. For < and >, look
// for a comparison with 1. For <= and >=, a comparison with zero.
if (!MaxLHS ||
(ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(SE) != One)
return Cond;
assert(AR->getLoop() == L &&
"Loop condition operand is an addrec in a different loop!");
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
NewRHS = BO->getOperand(0);
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
NewRHS = BO->getOperand(0);
if (!NewRHS)
return Cond;
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
NewRHS = SU->getValue();
else
// Max doesn't match expected pattern.
return Cond;
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
CondUse->setUser(NewCond);
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
Cond->eraseFromParent();
Sel->eraseFromParent();
if (Cmp->use_empty())
Cmp->eraseFromParent();
return NewCond;
}
/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void
LSRInstance::OptimizeLoopTermCond() {
SmallPtrSet<Instruction *, 4> PostIncs;
BasicBlock *LatchBlock = L->getLoopLatch();
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
// Get the terminating condition for the loop if possible. If we
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!TermBr)
continue;
// FIXME: Overly conservative, termination condition could be an 'or' etc..
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
continue;
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse))
continue;
// If the trip count is computed in terms of a max (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT or ULT instead of NE.
// One consequence of doing this now is that it disrupts the count-down
// optimization. That's not always a bad thing though, because in such
// cases it may still be worthwhile to avoid a max.
Cond = OptimizeMax(Cond, CondUse);
// If this exiting block dominates the latch block, it may also use
// the post-inc value if it won't be shared with other uses.
// Check for dominance.
if (!DT.dominates(ExitingBlock, LatchBlock))
continue;
// Conservatively avoid trying to use the post-inc value in non-latch
// exits if there may be pre-inc users in intervening blocks.
if (LatchBlock != ExitingBlock)
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
// Test if the use is reachable from the exiting block. This dominator
// query is a conservative approximation of reachability.
if (&*UI != CondUse &&
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
// Conservatively assume there may be reuse if the quotient of their
// strides could be a legal scale.
const SCEV *A = IU.getStride(*CondUse, L);
const SCEV *B = IU.getStride(*UI, L);
if (!A || !B) continue;
if (SE.getTypeSizeInBits(A->getType()) !=
SE.getTypeSizeInBits(B->getType())) {
if (SE.getTypeSizeInBits(A->getType()) >
SE.getTypeSizeInBits(B->getType()))
B = SE.getSignExtendExpr(B, A->getType());
else
A = SE.getSignExtendExpr(A, B->getType());
}
if (const SCEVConstant *D =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
const ConstantInt *C = D->getValue();
// Stride of one or negative one can have reuse with non-addresses.
if (C->isOne() || C->isAllOnesValue())
goto decline_post_inc;
// Avoid weird situations.
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
int64_t Scale = C->getSExtValue();
if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
Scale = -Scale;
if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
}
}
DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
<< *Cond << '\n');
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
// the exiting block branch, move it.
if (&*++BasicBlock::iterator(Cond) != TermBr) {
if (Cond->hasOneUse()) {
Cond->moveBefore(TermBr);
} else {
// Clone the terminating condition and insert into the loopend.
ICmpInst *OldCond = Cond;
Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
ExitingBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
TermBr->replaceUsesOfWith(OldCond, Cond);
}
}
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
CondUse->transformToPostInc(L);
Changed = true;
PostIncs.insert(Cond);
decline_post_inc:;
}
// Determine an insertion point for the loop induction variable increment. It
// must dominate all the post-inc comparisons we just set up, and it must
// dominate the loop latch edge.
IVIncInsertPos = L->getLoopLatch()->getTerminator();
for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
E = PostIncs.end(); I != E; ++I) {
BasicBlock *BB =
DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
(*I)->getParent());
if (BB == (*I)->getParent())
IVIncInsertPos = *I;
else if (BB != IVIncInsertPos->getParent())
IVIncInsertPos = BB->getTerminator();
}
}
/// reconcileNewOffset - Determine if the given use can accommodate a fixup
/// at the given offset and other details. If so, update the use and
/// return true.
bool
LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
Type *NewAccessTy = AccessTy;
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
// something conservative, however this can pessimize in the case that one of
// the uses will have all its uses outside the loop, for example.
if (LU.Kind != Kind)
return false;
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
// Check for a mismatched access type, and fall back conservatively as needed.
// TODO: Be less conservative when the type is similar and can use the same
// addressing modes.
if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
NewAccessTy = Type::getVoidTy(AccessTy->getContext());
// Update the use.
LU.MinOffset = NewMinOffset;
LU.MaxOffset = NewMaxOffset;
LU.AccessTy = NewAccessTy;
if (NewOffset != LU.Offsets.back())
LU.Offsets.push_back(NewOffset);
return true;
}
/// getUse - Return an LSRUse index and an offset value for a fixup which
/// needs the given expression, with the given kind and optional access type.
/// Either reuse an existing use or create a new one, as needed.
std::pair<size_t, int64_t>
LSRInstance::getUse(const SCEV *&Expr,
LSRUse::KindType Kind, Type *AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
Offset = 0;
}
std::pair<UseMapTy::iterator, bool> P =
UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
if (!P.second) {
// A use already existed with this base.
size_t LUIdx = P.first->second;
LSRUse &LU = Uses[LUIdx];
if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
// Reuse this use.
return std::make_pair(LUIdx, Offset);
}
// Create a new use.
size_t LUIdx = Uses.size();
P.first->second = LUIdx;
Uses.push_back(LSRUse(Kind, AccessTy));
LSRUse &LU = Uses[LUIdx];
// We don't need to track redundant offsets, but we don't need to go out
// of our way here to avoid them.
if (LU.Offsets.empty() || Offset != LU.Offsets.back())
LU.Offsets.push_back(Offset);
LU.MinOffset = Offset;
LU.MaxOffset = Offset;
return std::make_pair(LUIdx, Offset);
}
/// DeleteUse - Delete the given use from the Uses list.
void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
if (&LU != &Uses.back())
std::swap(LU, Uses.back());
Uses.pop_back();
// Update RegUses.
RegUses.SwapAndDropUse(LUIdx, Uses.size());
}
/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
/// a formula that has the same registers as the given formula.
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
// Search all uses for the formula. This could be more clever.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
// Check whether this use is close enough to OrigLU, to see whether it's
// worthwhile looking through its formulae.
// Ignore ICmpZero uses because they may contain formulae generated by
// GenerateICmpZeroScales, in which case adding fixup offsets may
// be invalid.
if (&LU != &OrigLU &&
LU.Kind != LSRUse::ICmpZero &&
LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
LU.WidestFixupType == OrigLU.WidestFixupType &&
LU.HasFormulaWithSameRegs(OrigF)) {
// Scan through this use's formulae.
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
// Check to see if this formula has the same registers and symbols
// as OrigF.
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
F.BaseGV == OrigF.BaseGV &&
F.Scale == OrigF.Scale &&
F.UnfoldedOffset == OrigF.UnfoldedOffset) {
if (F.BaseOffset == 0)
return &LU;
// This is the formula where all the registers and symbols matched;
// there aren't going to be any others. Since we declined it, we
// can skip the rest of the formulae and proceed to the next LSRUse.
break;
}
}
}
}
// Nothing looked good.
return 0;
}
void LSRInstance::CollectInterestingTypesAndFactors() {
SmallSetVector<const SCEV *, 4> Strides;
// Collect interesting types and strides.
SmallVector<const SCEV *, 4> Worklist;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
const SCEV *Expr = IU.getExpr(*UI);
// Collect interesting types.
Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
// Add strides for mentioned loops.
Worklist.push_back(Expr);
do {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
if (AR->getLoop() == L)
Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
Worklist.append(Add->op_begin(), Add->op_end());
}
} while (!Worklist.empty());
}
// Compute interesting factors from the set of interesting strides.
for (SmallSetVector<const SCEV *, 4>::const_iterator
I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
if (SE.getTypeSizeInBits(OldStride->getType()) !=
SE.getTypeSizeInBits(NewStride->getType())) {
if (SE.getTypeSizeInBits(OldStride->getType()) >
SE.getTypeSizeInBits(NewStride->getType()))
NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
else
OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
}
if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
} else if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
NewStride,
SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
}
}
// If all uses use the same type, don't bother looking for truncation-based
// reuse.
if (Types.size() == 1)
Types.clear();
DEBUG(print_factors_and_types(dbgs()));
}
/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
/// Instructions to IVStrideUses, we could partially skip this.
static User::op_iterator
findIVOperand(User::op_iterator OI, User::op_iterator OE,
Loop *L, ScalarEvolution &SE) {
for(; OI != OE; ++OI) {
if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
if (!SE.isSCEVable(Oper->getType()))
continue;
if (const SCEVAddRecExpr *AR =
dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
if (AR->getLoop() == L)
break;
}
}
}
return OI;
}
/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
/// operands, so wrap it in a convenient helper.
static Value *getWideOperand(Value *Oper) {
if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
return Trunc->getOperand(0);
return Oper;
}
/// isCompatibleIVType - Return true if we allow an IV chain to include both
/// types.
static bool isCompatibleIVType(Value *LVal, Value *RVal) {
Type *LType = LVal->getType();
Type *RType = RVal->getType();
return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
}
/// getExprBase - Return an approximation of this SCEV expression's "base", or
/// NULL for any constant. Returning the expression itself is
/// conservative. Returning a deeper subexpression is more precise and valid as
/// long as it isn't less complex than another subexpression. For expressions
/// involving multiple unscaled values, we need to return the pointer-type
/// SCEVUnknown. This avoids forming chains across objects, such as:
/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
///
/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
/// SCEVUnknown, we simply return the rightmost SCEV operand.
static const SCEV *getExprBase(const SCEV *S) {
switch (S->getSCEVType()) {
default: // uncluding scUnknown.
return S;
case scConstant:
return 0;
case scTruncate:
return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
case scZeroExtend:
return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
case scSignExtend:
return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
case scAddExpr: {
// Skip over scaled operands (scMulExpr) to follow add operands as long as
// there's nothing more complex.
// FIXME: not sure if we want to recognize negation.
const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
E(Add->op_begin()); I != E; ++I) {
const SCEV *SubExpr = *I;
if (SubExpr->getSCEVType() == scAddExpr)
return getExprBase(SubExpr);
if (SubExpr->getSCEVType() != scMulExpr)
return SubExpr;
}
return S; // all operands are scaled, be conservative.
}
case scAddRecExpr:
return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
}
}
/// Return true if the chain increment is profitable to expand into a loop
/// invariant value, which may require its own register. A profitable chain
/// increment will be an offset relative to the same base. We allow such offsets
/// to potentially be used as chain increment as long as it's not obviously
/// expensive to expand using real instructions.
bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
const SCEV *IncExpr,
ScalarEvolution &SE) {
// Aggressively form chains when -stress-ivchain.
if (StressIVChain)
return true;
// Do not replace a constant offset from IV head with a nonconstant IV
// increment.
if (!isa<SCEVConstant>(IncExpr)) {
const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
return 0;
}
SmallPtrSet<const SCEV*, 8> Processed;
return !isHighCostExpansion(IncExpr, Processed, SE);
}
/// Return true if the number of registers needed for the chain is estimated to
/// be less than the number required for the individual IV users. First prohibit
/// any IV users that keep the IV live across increments (the Users set should
/// be empty). Next count the number and type of increments in the chain.
///
/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
/// effectively use postinc addressing modes. Only consider it profitable it the
/// increments can be computed in fewer registers when chained.
///
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
ScalarEvolution &SE, const TargetTransformInfo &TTI) {
if (StressIVChain)
return true;
if (!Chain.hasIncs())
return false;
if (!Users.empty()) {
DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
E = Users.end(); I != E; ++I) {
dbgs() << " " << **I << "\n";
});
return false;
}
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
// The chain itself may require a register, so intialize cost to 1.
int cost = 1;
// A complete chain likely eliminates the need for keeping the original IV in
// a register. LSR does not currently know how to form a complete chain unless
// the header phi already exists.
if (isa<PHINode>(Chain.tailUserInst())
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;
}
const SCEV *LastIncExpr = 0;
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
I != E; ++I) {
if (I->IncExpr->isZero())
continue;
// Incrementing by zero or some constant is neutral. We assume constants can
// be folded into an addressing mode or an add's immediate operand.
if (isa<SCEVConstant>(I->IncExpr)) {
++NumConstIncrements;
continue;
}
if (I->IncExpr == LastIncExpr)
++NumReusedIncrements;
else
++NumVarIncrements;
LastIncExpr = I->IncExpr;
}
// An IV chain with a single increment is handled by LSR's postinc
// uses. However, a chain with multiple increments requires keeping the IV's
// value live longer than it needs to be if chained.
if (NumConstIncrements > 1)
--cost;
// Materializing increment expressions in the preheader that didn't exist in
// the original code may cost a register. For example, sign-extended array
// indices can produce ridiculous increments like this:
// IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
cost += NumVarIncrements;
// Reusing variable increments likely saves a register to hold the multiple of
// the stride.
cost -= NumReusedIncrements;
DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
<< "\n");
return cost < 0;
}
/// ChainInstruction - Add this IV user to an existing chain or make it the head
/// of a new chain.
void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec) {
// When IVs are used as types of varying widths, they are generally converted
// to a wider type with some uses remaining narrow under a (free) trunc.
Value *const NextIV = getWideOperand(IVOper);
const SCEV *const OperExpr = SE.getSCEV(NextIV);
const SCEV *const OperExprBase = getExprBase(OperExpr);
// Visit all existing chains. Check if its IVOper can be computed as a
// profitable loop invariant increment from the last link in the Chain.
unsigned ChainIdx = 0, NChains = IVChainVec.size();
const SCEV *LastIncExpr = 0;
for (; ChainIdx < NChains; ++ChainIdx) {
IVChain &Chain = IVChainVec[ChainIdx];
// Prune the solution space aggressively by checking that both IV operands
// are expressions that operate on the same unscaled SCEVUnknown. This
// "base" will be canceled by the subsequent getMinusSCEV call. Checking
// first avoids creating extra SCEV expressions.
if (!StressIVChain && Chain.ExprBase != OperExprBase)
continue;
Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
if (!isCompatibleIVType(PrevIV, NextIV))
continue;
// A phi node terminates a chain.
if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
continue;
// The increment must be loop-invariant so it can be kept in a register.
const SCEV *PrevExpr = SE.getSCEV(PrevIV);
const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
if (!SE.isLoopInvariant(IncExpr, L))
continue;
if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
LastIncExpr = IncExpr;
break;
}
}
// If we haven't found a chain, create a new one, unless we hit the max. Don't
// bother for phi nodes, because they must be last in the chain.
if (ChainIdx == NChains) {
if (isa<PHINode>(UserInst))
return;
if (NChains >= MaxChains && !StressIVChain) {
DEBUG(dbgs() << "IV Chain Limit\n");
return;
}
LastIncExpr = OperExpr;
// IVUsers may have skipped over sign/zero extensions. We don't currently
// attempt to form chains involving extensions unless they can be hoisted
// into this loop's AddRec.
if (!isa<SCEVAddRecExpr>(LastIncExpr))
return;
++NChains;
IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
OperExprBase));
ChainUsersVec.resize(NChains);
DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
<< ") IV=" << *LastIncExpr << "\n");
} else {
DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
<< ") IV+" << *LastIncExpr << "\n");
// Add this IV user to the end of the chain.
IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
}
IVChain &Chain = IVChainVec[ChainIdx];
SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
// This chain's NearUsers become FarUsers.
if (!LastIncExpr->isZero()) {
ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
NearUsers.end());
NearUsers.clear();
}
// All other uses of IVOperand become near uses of the chain.
// We currently ignore intermediate values within SCEV expressions, assuming
// they will eventually be used be the current chain, or can be computed
// from one of the chain increments. To be more precise we could
// transitively follow its user and only add leaf IV users to the set.
for (Value::use_iterator UseIter = IVOper->use_begin(),
UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
if (!OtherUse)
continue;
// Uses in the chain will no longer be uses if the chain is formed.
// Include the head of the chain in this iteration (not Chain.begin()).
IVChain::const_iterator IncIter = Chain.Incs.begin();
IVChain::const_iterator IncEnd = Chain.Incs.end();
for( ; IncIter != IncEnd; ++IncIter) {
if (IncIter->UserInst == OtherUse)
break;
}
if (IncIter != IncEnd)
continue;
if (SE.isSCEVable(OtherUse->getType())
&& !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
&& IU.isIVUserOrOperand(OtherUse)) {
continue;
}
NearUsers.insert(OtherUse);
}
// Since this user is part of the chain, it's no longer considered a use
// of the chain.
ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
}
/// CollectChains - Populate the vector of Chains.
///
/// This decreases ILP at the architecture level. Targets with ample registers,
/// multiple memory ports, and no register renaming probably don't want
/// this. However, such targets should probably disable LSR altogether.
///
/// The job of LSR is to make a reasonable choice of induction variables across
/// the loop. Subsequent passes can easily "unchain" computation exposing more
/// ILP *within the loop* if the target wants it.
///
/// Finding the best IV chain is potentially a scheduling problem. Since LSR
/// will not reorder memory operations, it will recognize this as a chain, but
/// will generate redundant IV increments. Ideally this would be corrected later
/// by a smart scheduler:
/// = A[i]
/// = A[i+x]
/// A[i] =
/// A[i+x] =
///
/// TODO: Walk the entire domtree within this loop, not just the path to the
/// loop latch. This will discover chains on side paths, but requires
/// maintaining multiple copies of the Chains state.
void LSRInstance::CollectChains() {
DEBUG(dbgs() << "Collecting IV Chains.\n");
SmallVector<ChainUsers, 8> ChainUsersVec;
SmallVector<BasicBlock *,8> LatchPath;
BasicBlock *LoopHeader = L->getHeader();
for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
LatchPath.push_back(Rung->getBlock());
}
LatchPath.push_back(LoopHeader);
// Walk the instruction stream from the loop header to the loop latch.
for (SmallVectorImpl<BasicBlock *>::reverse_iterator
BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
BBIter != BBEnd; ++BBIter) {
for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
I != E; ++I) {
// Skip instructions that weren't seen by IVUsers analysis.
if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
continue;
// Ignore users that are part of a SCEV expression. This way we only
// consider leaf IV Users. This effectively rediscovers a portion of
// IVUsers analysis but in program order this time.
if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
continue;
// Remove this instruction from any NearUsers set it may be in.
for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
ChainIdx < NChains; ++ChainIdx) {
ChainUsersVec[ChainIdx].NearUsers.erase(I);
}
// Search for operands that can be chained.
SmallPtrSet<Instruction*, 4> UniqueOperands;
User::op_iterator IVOpEnd = I->op_end();
User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
while (IVOpIter != IVOpEnd) {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
if (UniqueOperands.insert(IVOpInst))
ChainInstruction(I, IVOpInst, ChainUsersVec);
IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
}
} // Continue walking down the instructions.
} // Continue walking down the domtree.
// Visit phi backedges to determine if the chain can generate the IV postinc.
for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
if (!SE.isSCEVable(PN->getType()))
continue;
Instruction *IncV =
dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
if (IncV)
ChainInstruction(PN, IncV, ChainUsersVec);
}
// Remove any unprofitable chains.
unsigned ChainIdx = 0;
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
continue;
// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)
IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
FinalizeChain(IVChainVec[ChainIdx]);
++ChainIdx;
}
IVChainVec.resize(ChainIdx);
}
void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
I != E; ++I) {
DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n");
User::op_iterator UseI =
std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
assert(UseI != I->UserInst->op_end() && "cannot find IV operand");
IVIncSet.insert(UseI);
}
}
/// Return true if the IVInc can be folded into an addressing mode.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
Value *Operand, const TargetTransformInfo &TTI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
return false;
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address,
getAccessType(UserInst), /*BaseGV=*/ 0,
IncOffset, /*HaseBaseReg=*/ false))
return false;
return true;
}
/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
/// materialize the IV user's operand from the previous IV user's operand.
void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) {
// Find the new IVOperand for the head of the chain. It may have been replaced
// by LSR.
const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
Value *IVSrc = 0;
while (IVOpIter != IVOpEnd) {
IVSrc = getWideOperand(*IVOpIter);
// If this operand computes the expression that the chain needs, we may use
// it. (Check this after setting IVSrc which is used below.)
//
// Note that if Head.IncExpr is wider than IVSrc, then this phi is too
// narrow for the chain, so we can no longer use it. We do allow using a
// wider phi, assuming the LSR checked for free truncation. In that case we
// should already have a truncate on this operand such that
// getSCEV(IVSrc) == IncExpr.
if (SE.getSCEV(*IVOpIter) == Head.IncExpr
|| SE.getSCEV(IVSrc) == Head.IncExpr) {
break;
}
IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
}
if (IVOpIter == IVOpEnd) {
// Gracefully give up on this chain.
DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
return;
}
DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
const SCEV *LeftOverExpr = 0;
for (IVChain::const_iterator IncI = Chain.begin(),
IncE = Chain.end(); IncI != IncE; ++IncI) {
Instruction *InsertPt = IncI->UserInst;
if (isa<PHINode>(InsertPt))
InsertPt = L->getLoopLatch()->getTerminator();
// IVOper will replace the current IV User's operand. IVSrc is the IV
// value currently held in a register.
Value *IVOper = IVSrc;
if (!IncI->IncExpr->isZero()) {
// IncExpr was the result of subtraction of two narrow values, so must
// be signed.
const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
LeftOverExpr = LeftOverExpr ?
SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
}
if (LeftOverExpr && !LeftOverExpr->isZero()) {
// Expand the IV increment.
Rewriter.clearPostInc();
Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
SE.getUnknown(IncV));
IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
// If an IV increment can't be folded, use it as the next IV value.
if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = 0;
}
}
Type *OperTy = IncI->IVOperand->getType();
if (IVTy != OperTy) {
assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
"cannot extend a chained IV");
IRBuilder<> Builder(InsertPt);
IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
}
IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
DeadInsts.push_back(IncI->IVOperand);
}
// If LSR created a new, wider phi, we may also replace its postinc. We only
// do this if we also found a wide value for the head of the chain.
if (isa<PHINode>(Chain.tailUserInst())) {
for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
if (!isCompatibleIVType(Phi, IVSrc))
continue;
Instruction *PostIncV = dyn_cast<Instruction>(
Phi->getIncomingValueForBlock(L->getLoopLatch()));
if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
continue;
Value *IVOper = IVSrc;
Type *PostIncTy = PostIncV->getType();
if (IVTy != PostIncTy) {
assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
}
Phi->replaceUsesOfWith(PostIncV, IVOper);
DeadInsts.push_back(PostIncV);
}
}
}
void LSRInstance::CollectFixupsAndInitialFormulae() {
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
Instruction *UserInst = UI->getUser();
// Skip IV users that are part of profitable IV Chains.
User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
UI->getOperandValToReplace());
assert(UseI != UserInst->op_end() && "cannot find IV operand");
if (IVIncSet.count(UseI))
continue;
// Record the uses.
LSRFixup &LF = getNewFixup();
LF.UserInst = UserInst;
LF.OperandValToReplace = UI->getOperandValToReplace();
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
Type *AccessTy = 0;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
}
const SCEV *S = IU.getExpr(*UI);
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
// (N - i == 0), and this allows (N - i) to be the expression that we work
// with rather than just N or i, so we can consider the register
// requirements for both N and i at the same time. Limiting this code to
// equality icmps is not a problem because all interesting loops use
// equality icmps, thanks to IndVarSimplify.
if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
if (CI->isEquality()) {
// Swap the operands if needed to put the OperandValToReplace on the
// left, for consistency.
Value *NV = CI->getOperand(1);
if (NV == LF.OperandValToReplace) {
CI->setOperand(1, CI->getOperand(0));
CI->setOperand(0, NV);
NV = CI->getOperand(1);
Changed = true;
}
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
LF.PostIncLoops, SE, DT);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
// -1 and the negations of all interesting strides (except the negation
// of -1) are now also interesting.
for (size_t i = 0, e = Factors.size(); i != e; ++i)
if (Factors[i] != -1)
Factors.insert(-(uint64_t)Factors[i]);
Factors.insert(-1);
}
// Set up the initial formula for this use.
std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
LU.WidestFixupType = LF.OperandValToReplace->getType();
// If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) {
InsertInitialFormula(S, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), LF.LUIdx);
}
}
DEBUG(print_fixups(dbgs()));
}
/// InsertInitialFormula - Insert a formula for the given expression into
/// the given use, separating out loop-variant portions from loop-invariant
/// and loop-computable portions.
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
// Mark uses whose expressions cannot be expanded.
if (!isSafeToExpand(S, SE))
LU.RigidFormula = true;
Formula F;
F.InitialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
/// InsertSupplementalFormula - Insert a simple single-register formula for
/// the given expression into the given use.
void
LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
Formula F;
F.BaseRegs.push_back(S);
F.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
/// CountRegisters - Note which registers are used by the given formula,
/// updating RegUses.
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
if (F.ScaledReg)
RegUses.CountRegister(F.ScaledReg, LUIdx);
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I)
RegUses.CountRegister(*I, LUIdx);
}
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
if (!LU.InsertFormula(F))
return false;
CountRegisters(F, LUIdx);
return true;
}
/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
/// loop-invariant values which we're tracking. These other uses will pin these
/// values in registers, making them less profitable for elimination.
/// TODO: This currently misses non-constant addrec step registers.
/// TODO: Should this give more weight to users inside the loop?
void
LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
SmallPtrSet<const SCEV *, 8> Inserted;
while (!Worklist.empty()) {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Worklist.append(N->op_begin(), N->op_end());
else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
Worklist.push_back(D->getRHS());
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (!Inserted.insert(U)) continue;
const Value *V = U->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
// Look for instructions defined outside the loop.
if (L->contains(Inst)) continue;
} else if (isa<UndefValue>(V))
// Undef doesn't have a live range, so it doesn't matter.
continue;
for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const Instruction *UserInst = dyn_cast<Instruction>(*UI);
// Ignore non-instructions.
if (!UserInst)
continue;
// Ignore instructions in other functions (as can happen with
// Constants).
if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
continue;
// Ignore instructions not dominated by the loop.
const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
UserInst->getParent() :
cast<PHINode>(UserInst)->getIncomingBlock(
PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
// Ignore uses which are part of other SCEV expressions, to avoid
// analyzing them multiple times.
if (SE.isSCEVable(UserInst->getType())) {
const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
// If the user is a no-op, look through to its uses.
if (!isa<SCEVUnknown>(UserS))
continue;
if (UserS == U) {
Worklist.push_back(
SE.getUnknown(const_cast<Instruction *>(UserInst)));
continue;
}
}
// Ignore icmp instructions which are already being analyzed.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !UI.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
continue;
}
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = UI.getUse();
std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
LU.WidestFixupType = LF.OperandValToReplace->getType();
InsertSupplementalFormula(U, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
}
}
}
}
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
/// separate registers. If C is non-null, multiply each subexpression by C.
///
/// Return remainder expression after factoring the subexpressions captured by
/// Ops. If Ops is complete, return NULL.
static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
const Loop *L,
ScalarEvolution &SE,
unsigned Depth = 0) {
// Arbitrarily cap recursion to protect compile time.
if (Depth >= 3)
return S;
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
}
return 0;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (AR->getStart()->isZero())
return S;
const SCEV *Remainder = CollectSubexprs(AR->getStart(),
C, Ops, L, SE, Depth+1);
// Split the non-zero AddRec unless it is part of a nested recurrence that
// does not pertain to this loop.
if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
Remainder = 0;
}
if (Remainder != AR->getStart()) {
if (!Remainder)
Remainder = SE.getConstant(AR->getType(), 0);
return SE.getAddRecExpr(Remainder,
AR->getStepRecurrence(SE),
AR->getLoop(),
//FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
SCEV::FlagAnyWrap);
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
// Break (C * (a + b + c)) into C*a + C*b + C*c.
if (Mul->getNumOperands() != 2)
return S;
if (const SCEVConstant *Op0 =
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
const SCEV *Remainder =
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(SE.getMulExpr(C, Remainder));
return 0;
}
}
return S;
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
/// addrecs.
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base,
unsigned Depth) {
// Arbitrarily cap recursion to protect compile time.
if (Depth >= 3) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
SmallVector<const SCEV *, 8> AddOps;
const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
if (Remainder)
AddOps.push_back(Remainder);
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end(); J != JE; ++J) {
// Loop-variant "unknown" values are uninteresting; we won't be able to
// do anything meaningful with them.
if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
continue;
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, *J, Base.getNumRegs() > 1))
continue;
// Collect all operands except *J.
SmallVector<const SCEV *, 8> InnerAddOps
(((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
InnerAddOps.append
(llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
if (InnerAddOps.size() == 1 &&
isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
if (InnerSum->isZero())
continue;
Formula F = Base;
// Add the remaining pieces of the add back into the new formula.
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
if (InnerSumSC &&
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue();
F.BaseRegs.erase(F.BaseRegs.begin() + i);
} else
F.BaseRegs[i] = InnerSum;
// Add J as its own register, or an unfolded immediate.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue()))
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue();
else
F.BaseRegs.push_back(*J);
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
// it.
GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
}
}
}
/// GenerateCombinations - Generate a formula consisting of all of the
/// loop-dominating registers added into a single register.
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
if (Base.BaseRegs.size() <= 1) return;
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
for (SmallVectorImpl<const SCEV *>::const_iterator
I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
if (SE.properlyDominates(BaseReg, L->getHeader()) &&
!SE.hasComputableLoopEvolution(BaseReg, L))
Ops.push_back(BaseReg);
else
F.BaseRegs.push_back(BaseReg);
}
if (Ops.size() > 1) {
const SCEV *Sum = SE.getAddExpr(Ops);
// TODO: If Sum is zero, it probably means ScalarEvolution missed an
// opportunity to fold something. For now, just ignore such cases
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
(void)InsertFormula(LU, LUIdx, F);
}
}
}
/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
if (Base.BaseGV) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *G = Base.BaseRegs[i];
GlobalValue *GV = ExtractSymbol(G, SE);
if (G->isZero() || !GV)
continue;
Formula F = Base;
F.BaseGV = GV;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
}
}
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// TODO: For now, just add the min and max offset, because it usually isn't
// worthwhile looking at everything inbetween.
SmallVector<int64_t, 2> Worklist;
Worklist.push_back(LU.MinOffset);
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *G = Base.BaseRegs[i];
for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
E = Worklist.end(); I != E; ++I) {
Formula F = Base;
F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
LU.AccessTy, F)) {
// Add the offset to the base register.
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
if (NewG->isZero()) {
std::swap(F.BaseRegs[i], F.BaseRegs.back());
F.BaseRegs.pop_back();
} else
F.BaseRegs[i] = NewG;
(void)InsertFormula(LU, LUIdx, F);
}
}
int64_t Imm = ExtractImmediate(G, SE);
if (G->isZero() || Imm == 0)
continue;
Formula F = Base;
F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
}
}
/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
/// the comparison. For example, x == y -> x*c == y*c.
void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (LU.Kind != LSRUse::ICmpZero) return;
// Determine the integer type for the base formula.
Type *IntTy = Base.getType();
if (!IntTy) return;
if (SE.getTypeSizeInBits(IntTy) > 64) return;
// Don't do this if there is more than one offset.
if (LU.MinOffset != LU.MaxOffset) return;
assert(!Base.BaseGV && "ICmpZero use is not legal!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
// Check that the multiplication doesn't overflow.
if (Base.BaseOffset == INT64_MIN && Factor == -1)
continue;
int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
if (NewBaseOffset / Factor != Base.BaseOffset)
continue;
+ // If the offset will be truncated at this use, check that it is in bounds.
+ if (!IntTy->isPointerTy() &&
+ !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
+ continue;
// Check that multiplying with the use offset doesn't overflow.
int64_t Offset = LU.MinOffset;
if (Offset == INT64_MIN && Factor == -1)
continue;
Offset = (uint64_t)Offset * Factor;
if (Offset / Factor != LU.MinOffset)
continue;
+ // If the offset will be truncated at this use, check that it is in bounds.
+ if (!IntTy->isPointerTy() &&
+ !ConstantInt::isValueValidForType(IntTy, Offset))
+ continue;
Formula F = Base;
F.BaseOffset = NewBaseOffset;
// Check that this scale is legal.
if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
continue;
// Compensate for the use having MinOffset built into it.
F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
// Check that multiplying with each base register doesn't overflow.
for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
goto next;
}
// Check that multiplying with the scaled register doesn't overflow.
if (F.ScaledReg) {
F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
continue;
}
// Check that multiplying with the unfolded offset doesn't overflow.
if (F.UnfoldedOffset != 0) {
if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
continue;
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
+ continue;
+ // If the offset will be truncated, check that it is in bounds.
+ if (!IntTy->isPointerTy() &&
+ !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
continue;
}
// If we make it here and it's legal, add it.
(void)InsertFormula(LU, LUIdx, F);
next:;
}
}
/// GenerateScales - Generate stride factor reuse formulae by making use of
/// scaled-offset address modes, for example.
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Determine the integer type for the base formula.
Type *IntTy = Base.getType();
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
if (Base.Scale != 0) return;
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
Base.Scale = Factor;
Base.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
Base)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
LU.AccessTy, Base) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
continue;
}
// For an ICmpZero, negating a solitary base register won't lead to
// new solutions.
if (LU.Kind == LSRUse::ICmpZero &&
!Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
continue;
// For each addrec base reg, apply the scale, if possible.
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
if (const SCEVAddRecExpr *AR =
dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
if (FactorS->isZero())
continue;
// Divide out the factor, ignoring high bits, since we'll be
// scaling the value back up in the end.
if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
// TODO: This could be optimized to avoid all the copying.
Formula F = Base;
F.ScaledReg = Quotient;
F.DeleteBaseReg(F.BaseRegs[i]);
(void)InsertFormula(LU, LUIdx, F);
}
}
}
}
/// GenerateTruncates - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Don't bother truncating symbolic values.
if (Base.BaseGV) return;
// Determine the integer type for the base formula.
Type *DstTy = Base.getType();
if (!DstTy) return;
DstTy = SE.getEffectiveSCEVType(DstTy);
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
Type *SrcTy = *I;
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J)
*J = SE.getAnyExtendExpr(*J, SrcTy);
// TODO: This assumes we've done basic processing on all uses and
// have an idea what the register usage is.
if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
continue;
(void)InsertFormula(LU, LUIdx, F);
}
}
}
namespace {
/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
/// defer modifications so that the search phase doesn't have to worry about
/// the data structures moving underneath it.
struct WorkItem {
size_t LUIdx;
int64_t Imm;
const SCEV *OrigReg;
WorkItem(size_t LI, int64_t I, const SCEV *R)
: LUIdx(LI), Imm(I), OrigReg(R) {}
void print(raw_ostream &OS) const;
void dump() const;
};
}
void WorkItem::print(raw_ostream &OS) const {
OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
<< " , add offset " << Imm;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
#endif
/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
/// distance apart and try to form reuse opportunities between them.
void LSRInstance::GenerateCrossUseConstantOffsets() {
// Group the registers by their value without any added constant offset.
typedef std::map<int64_t, const SCEV *> ImmMapTy;
typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
RegMapTy Map;
DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
SmallVector<const SCEV *, 8> Sequence;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
const SCEV *Reg = *I;
int64_t Imm = ExtractImmediate(Reg, SE);
std::pair<RegMapTy::iterator, bool> Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
if (Pair.second)
Sequence.push_back(Reg);
Pair.first->second.insert(std::make_pair(Imm, *I));
UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
}
// Now examine each set of registers with the same base value. Build up
// a list of work to do and do the work in a separate step so that we're
// not adding formulae and register counts while we're searching.
SmallVector<WorkItem, 32> WorkItems;
SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
E = Sequence.end(); I != E; ++I) {
const SCEV *Reg = *I;
const ImmMapTy &Imms = Map.find(Reg)->second;
// It's not worthwhile looking for reuse if there's only one offset.
if (Imms.size() == 1)
continue;
DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J)
dbgs() << ' ' << J->first;
dbgs() << '\n');
// Examine each offset.
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J) {
const SCEV *OrigReg = J->second;
int64_t JImm = J->first;
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
if (!isa<SCEVConstant>(OrigReg) &&
UsedByIndicesMap[Reg].count() == 1) {
DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
continue;
}
// Conservatively examine offsets between this orig reg a few selected
// other orig regs.
ImmMapTy::const_iterator OtherImms[] = {
Imms.begin(), prior(Imms.end()),
Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
if (M == J || M == JE) continue;
// Compute the difference between the two.
int64_t Imm = (uint64_t)JImm - M->first;
for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
LUIdx = UsedByIndices.find_next(LUIdx))
// Make a memo of this use, offset, and register tuple.
if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
}
}
}
Map.clear();
Sequence.clear();
UsedByIndicesMap.clear();
UniqueItems.clear();
// Now iterate through the worklist and add new formulae.
for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
E = WorkItems.end(); I != E; ++I) {
const WorkItem &WI = *I;
size_t LUIdx = WI.LUIdx;
LSRUse &LU = Uses[LUIdx];
int64_t Imm = WI.Imm;
const SCEV *OrigReg = WI.OrigReg;
Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
// TODO: Use a more targeted data structure.
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
const Formula &F = LU.Formulae[L];
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
// Don't create 50 + reg(-50).
if (F.referencesReg(SE.getSCEV(
ConstantInt::get(IntTy, -(uint64_t)Offset))))
continue;
Formula NewF = F;
NewF.BaseOffset = Offset;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
NewF))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
// If the new scale is a constant in a register, and adding the constant
// value to the immediate would produce a value closer to zero than the
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
if (C->getValue()->isNegative() !=
(NewF.BaseOffset < 0) &&
(C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
.ule(abs64(NewF.BaseOffset)))
continue;
// OK, looks good.
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
const SCEV *BaseReg = F.BaseRegs[N];
if (BaseReg != OrigReg)
continue;
Formula NewF = F;
NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, NewF)) {
if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
}
NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
// If the new formula has a constant in a register, and adding the
// constant value to the immediate would produce a value closer to
// zero than the immediate itself, then the formula isn't worthwhile.
for (SmallVectorImpl<const SCEV *>::const_iterator
J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
J != JE; ++J)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
abs64(NewF.BaseOffset)) &&
(C->getValue()->getValue() +
NewF.BaseOffset).countTrailingZeros() >=
countTrailingZeros<uint64_t>(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
}
}
}
}
}
/// GenerateAllReuseFormulae - Generate formulae for each use.
void
LSRInstance::GenerateAllReuseFormulae() {
// This is split into multiple loops so that hasRegsUsedByUsesOtherThan
// queries are more precise.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateScales(LU, LUIdx, LU.Formulae[i]);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
}
GenerateCrossUseConstantOffsets();
DEBUG(dbgs() << "\n"
"After generating reuse formulae:\n";
print_uses(dbgs()));
}
/// If there are multiple formulae with the same set of registers used
/// by other uses, pick the best one and delete the others.
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
SmallPtrSet<const SCEV *, 16> LoserRegs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
// Collect the best formula for each unique set of shared registers. This
// is reset for each use.
typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
BestFormulaeTy;
BestFormulaeTy BestFormulae;
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
bool Any = false;
for (size_t FIdx = 0, NumForms = LU.Formulae.size();
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
// Some formulas are instant losers. For example, they may depend on
// nonexistent AddRecs from other loops. These need to be filtered
// immediately, otherwise heuristics could choose them over others leading
// to an unsatisfactory solution. Passing LoserRegs into RateFormula here
// avoids the need to recompute this information across formulae using the
// same bad AddRec. Passing LoserRegs is also essential unless we remove
// the corresponding bad register from the Regs set.
Cost CostF;
Regs.clear();
CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
&LoserRegs);
if (CostF.isLoser()) {
// During initial formula generation, undesirable formulae are generated
// by uses within other loops that have some non-trivial address mode or
// use the postinc form of the IV. LSR needs to provide these formulae
// as the basis of rediscovering the desired formula that uses an AddRec
// corresponding to the existing phi. Once all formulae have been
// generated, these initial losers may be pruned.
DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
dbgs() << "\n");
}
else {
SmallVector<const SCEV *, 4> Key;
for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
Key.push_back(Reg);
}
if (F.ScaledReg &&
RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for
// uniquifying.
std::sort(Key.begin(), Key.end());
std::pair<BestFormulaeTy::const_iterator, bool> P =
BestFormulae.insert(std::make_pair(Key, FIdx));
if (P.second)
continue;
Formula &Best = LU.Formulae[P.first->second];
Cost CostBest;
Regs.clear();
CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
DT, LU);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
}
#ifndef NDEBUG
ChangedFormulae = true;
#endif
LU.DeleteFormula(F);
--FIdx;
--NumForms;
Any = true;
}
// Now that we've filtered out some formulae, recompute the Regs set.
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
// Reset this to prepare for the next use.
BestFormulae.clear();
}
DEBUG(if (ChangedFormulae) {
dbgs() << "\n"
"After filtering out undesirable candidates:\n";
print_uses(dbgs());
});
}
// This is a rough guess that seems to work fairly well.
static const size_t ComplexityLimit = UINT16_MAX;
/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
/// solutions the solver might have to consider. It almost never considers
/// this many solutions because it prune the search space, but the pruning
/// isn't always sufficient.
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
size_t Power = 1;
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
size_t FSize = I->Formulae.size();
if (FSize >= ComplexityLimit) {
Power = ComplexityLimit;
break;
}
Power *= FSize;
if (Power >= ComplexityLimit)
break;
}
return Power;
}
/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
/// of the registers of another formula, it won't help reduce register
/// pressure (though it may not necessarily hurt register pressure); remove
/// it to simplify the system.
void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other "
"formulae.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
// Look for a formula with a constant or GV in a register. If the use
// also has a formula with that same value in an immediate field,
// delete the one that uses a register.
for (SmallVectorImpl<const SCEV *>::const_iterator
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
Formula NewF = F;
NewF.BaseOffset += C->getValue()->getSExtValue();
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
Any = true;
break;
}
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
if (!F.BaseGV) {
Formula NewF = F;
NewF.BaseGV = GV;
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
Any = true;
break;
}
}
}
}
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
}
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
}
/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
/// for expressions like A, A+1, A+2, etc., allocate a single register for
/// them.
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
DEBUG(dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n");
// This is especially useful for unrolled loops.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
if (F.BaseOffset == 0 || F.Scale != 0)
continue;
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
if (!LUThatHas)
continue;
if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
LU.Kind, LU.AccessTy))
continue;
DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
// Update the relocs to reference the new use.
for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
LSRFixup &Fixup = *I;
if (Fixup.LUIdx == LUIdx) {
Fixup.LUIdx = LUThatHas - &Uses.front();
Fixup.Offset += F.BaseOffset;
// Add the new offset to LUThatHas' offset list.
if (LUThatHas->Offsets.back() != Fixup.Offset) {
LUThatHas->Offsets.push_back(Fixup.Offset);
if (Fixup.Offset > LUThatHas->MaxOffset)
LUThatHas->MaxOffset = Fixup.Offset;
if (Fixup.Offset < LUThatHas->MinOffset)
LUThatHas->MinOffset = Fixup.Offset;
}
DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
}
if (Fixup.LUIdx == NumUses-1)
Fixup.LUIdx = LUIdx;
}
// Delete formulae from the new use which are no longer legal.
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
Formula &F = LUThatHas->Formulae[i];
if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
LUThatHas->Kind, LUThatHas->AccessTy, F)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LUThatHas->DeleteFormula(F);
--i;
--e;
Any = true;
}
}
if (Any)
LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
// Delete the old use.
DeleteUse(LU, LUIdx);
--LUIdx;
--NumUses;
break;
}
}
DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
/// we've done more filtering, as it may be able to find more formulae to
/// eliminate.
void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
"undesirable dedicated registers.\n");
FilterOutUndesirableDedicatedRegisters();
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
}
/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
/// to be profitable, and then in any use which has any reference to that
/// register, delete all formulae which do not reference that register.
void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// With all other options exhausted, loop until the system is simple
// enough to handle.
SmallPtrSet<const SCEV *, 4> Taken;
while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
// Ok, we have too many of formulae on our hands to conveniently handle.
// Use a rough heuristic to thin out the list.
DEBUG(dbgs() << "The search space is too complex.\n");
// Pick the register which is used by the most LSRUses, which is likely
// to be a good reuse register candidate.
const SCEV *Best = 0;
unsigned BestNum = 0;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
const SCEV *Reg = *I;
if (Taken.count(Reg))
continue;
if (!Best)
Best = Reg;
else {
unsigned Count = RegUses.getUsedByIndices(Reg).count();
if (Count > BestNum) {
Best = Reg;
BestNum = Count;
}
}
}
DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
<< " will yield profitable reuse.\n");
Taken.insert(Best);
// In any use with formulae which references this register, delete formulae
// which don't reference it.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
if (!LU.Regs.count(Best)) continue;
bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
if (!F.referencesReg(Best)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LU.DeleteFormula(F);
--e;
--i;
Any = true;
assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
continue;
}
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
}
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
}
/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
/// formulae to choose from, use some rough heuristics to prune down the number
/// of formulae. This keeps the main solver from taking an extraordinary amount
/// of time in some worst-case scenarios.
void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
NarrowSearchSpaceByPickingWinnerRegs();
}
/// SolveRecurse - This is the recursive solver.
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
const Cost &CurCost,
const SmallPtrSet<const SCEV *, 16> &CurRegs,
DenseSet<const SCEV *> &VisitedRegs) const {
// Some ideas:
// - prune more:
// - use more aggressive filtering
// - sort the formula so that the most profitable solutions are found first
// - sort the uses too
// - search faster:
// - don't compute a cost, and then compare. compare while computing a cost
// and bail early.
// - track register sets with SmallBitVector
const LSRUse &LU = Uses[Workspace.size()];
// If this use references any register that's already a part of the
// in-progress solution, consider it a requirement that a formula must
// reference that register in order to be considered. This prunes out
// unprofitable searching.
SmallSetVector<const SCEV *, 4> ReqRegs;
for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
E = CurRegs.end(); I != E; ++I)
if (LU.Regs.count(*I))
ReqRegs.insert(*I);
SmallPtrSet<const SCEV *, 16> NewRegs;
Cost NewCost;
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
// Ignore formulae which do not use any of the required registers.
bool SatisfiedReqReg = true;
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
if ((!F.ScaledReg || F.ScaledReg != Reg) &&
std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
F.BaseRegs.end()) {
SatisfiedReqReg = false;
break;
}
}
if (!SatisfiedReqReg) {
// If none of the formulae satisfied the required registers, then we could
// clear ReqRegs and try again. Currently, we simply give up in this case.
continue;
}
// Evaluate the cost of the current formula. If it's already worse than
// the current best, prune the search at that point.
NewCost = CurCost;
NewRegs = CurRegs;
NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
LU);
if (NewCost < SolutionCost) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
NewRegs, VisitedRegs);
if (F.getNumRegs() == 1 && Workspace.size() == 1)
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
} else {
DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
dbgs() << ".\n Regs:";
for (SmallPtrSet<const SCEV *, 16>::const_iterator
I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
dbgs() << ' ' << **I;
dbgs() << '\n');
SolutionCost = NewCost;
Solution = Workspace;
}
Workspace.pop_back();
}
}
}
/// Solve - Choose one formula from each use. Return the results in the given
/// Solution vector.
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
Cost SolutionCost;
SolutionCost.Loose();
Cost CurCost;
SmallPtrSet<const SCEV *, 16> CurRegs;
DenseSet<const SCEV *> VisitedRegs;
Workspace.reserve(Uses.size());
// SolveRecurse does all the work.
SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
CurRegs, VisitedRegs);
if (Solution.empty()) {
DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
return;
}
// Ok, we've now made all our decisions.
DEBUG(dbgs() << "\n"
"The chosen solution requires "; SolutionCost.print(dbgs());
dbgs() << ":\n";
for (size_t i = 0, e = Uses.size(); i != e; ++i) {
dbgs() << " ";
Uses[i].print(dbgs());
dbgs() << "\n"
" ";
Solution[i]->print(dbgs());
dbgs() << '\n';
});
assert(Solution.size() == Uses.size() && "Malformed solution!");
}
/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
/// the dominator tree far as we can go while still being dominated by the
/// input positions. This helps canonicalize the insert position, which
/// encourages sharing.
BasicBlock::iterator
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs)
const {
for (;;) {
const Loop *IPLoop = LI.getLoopFor(IP->getParent());
unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
BasicBlock *IDom;
for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
if (!Rung) return IP;
Rung = Rung->getIDom();
if (!Rung) return IP;
IDom = Rung->getBlock();
// Don't climb into a loop though.
const Loop *IDomLoop = LI.getLoopFor(IDom);
unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
if (IDomDepth <= IPLoopDepth &&
(IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
break;
}
bool AllDominate = true;
Instruction *BetterPos = 0;
Instruction *Tentative = IDom->getTerminator();
for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
E = Inputs.end(); I != E; ++I) {
Instruction *Inst = *I;
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
AllDominate = false;
break;
}
// Attempt to find an insert position in the middle of the block,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
(!BetterPos || !DT.dominates(Inst, BetterPos)))
BetterPos = llvm::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
if (BetterPos)
IP = BetterPos;
else
IP = Tentative;
}
return IP;
}
/// AdjustInsertPositionForExpand - Determine an input position which will be
/// dominated by the operands and which will dominate the result.
BasicBlock::iterator
LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
const LSRFixup &LF,
const LSRUse &LU,
SCEVExpander &Rewriter) const {
// Collect some instructions which must be dominated by the
// expanding replacement. These must be dominated by any operands that
// will be required in the expansion.
SmallVector<Instruction *, 4> Inputs;
if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
Inputs.push_back(I);
if (LU.Kind == LSRUse::ICmpZero)
if (Instruction *I =
dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
Inputs.push_back(I);
if (LF.PostIncLoops.count(L)) {
if (LF.isUseFullyOutsideLoop(L))
Inputs.push_back(L->getLoopLatch()->getTerminator());
else
Inputs.push_back(IVIncInsertPos);
}
// The expansion must also be dominated by the increment positions of any
// loops it for which it is using post-inc mode.
for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
E = LF.PostIncLoops.end(); I != E; ++I) {
const Loop *PIL = *I;
if (PIL == L) continue;
// Be dominated by the loop exit.
SmallVector<BasicBlock *, 4> ExitingBlocks;
PIL->getExitingBlocks(ExitingBlocks);
if (!ExitingBlocks.empty()) {
BasicBlock *BB = ExitingBlocks[0];
for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
Inputs.push_back(BB->getTerminator());
}
}
assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
&& !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction");
// Then, climb up the immediate dominator tree as far as we can go while
// still being dominated by the input positions.
BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
// Don't insert instructions before PHI nodes.
while (isa<PHINode>(IP)) ++IP;
// Ignore landingpad instructions.
while (isa<LandingPadInst>(IP)) ++IP;
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
// Set IP below instructions recently inserted by SCEVExpander. This keeps the
// IP consistent across expansions and allows the previously inserted
// instructions to be reused by subsequent expansion.
while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
return IP;
}
/// Expand - Emit instructions for the leading candidate expression for this
/// LSRUse (this is called "expanding").
Value *LSRInstance::Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx];
if (LU.RigidFormula)
return LF.OperandValToReplace;
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
Rewriter.setPostInc(LF.PostIncLoops);
// This is the type that the user actually needs.
Type *OpTy = LF.OperandValToReplace->getType();
// This will be the type that we'll initially expand to.
Type *Ty = F.getType();
if (!Ty)
// No type known; just expand directly to the ultimate type.
Ty = OpTy;
else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
// Expand directly to the ultimate type if it's the right size.
Ty = OpTy;
// This is the type to do integer arithmetic in.
Type *IntTy = SE.getEffectiveSCEVType(Ty);
// Build up a list of operands to add together to form the full base.
SmallVector<const SCEV *, 8> Ops;
// Expand the BaseRegs portion.
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
const SCEV *Reg = *I;
assert(!Reg->isZero() && "Zero allocated in a base register!");
// If we're expanding for a post-inc user, make the post-inc adjustment.
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
Reg = TransformForPostIncUse(Denormalize, Reg,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
// If we're expanding for a post-inc user, make the post-inc adjustment.
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
if (LU.Kind == LSRUse::ICmpZero) {
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
// Flush the operand list to suppress SCEVExpander hoisting address modes.
if (!Ops.empty() && LU.Kind == LSRUse::Address) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
ScaledS = SE.getMulExpr(ScaledS,
SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
}
}
// Expand the GV portion.
if (F.BaseGV) {
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
Ops.push_back(SE.getUnknown(F.BaseGV));
}
// Flush the operand list to suppress SCEVExpander hoisting of both folded and
// unfolded offsets. LSR assumes they both live next to their uses.
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
// Expand the immediate portion.
int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
if (Offset != 0) {
if (LU.Kind == LSRUse::ICmpZero) {
// The other interesting way of "folding" with an ICmpZero is to use a
// negated immediate.
if (!ICmpScaledV)
ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
else {
Ops.push_back(SE.getUnknown(ICmpScaledV));
ICmpScaledV = ConstantInt::get(IntTy, Offset);
}
} else {
// Just add the immediate values. These again are expected to be matched
// as part of the address.
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
}
}
// Expand the unfolded offset portion.
int64_t UnfoldedOffset = F.UnfoldedOffset;
if (UnfoldedOffset != 0) {
// Just add the immediate values.
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
UnfoldedOffset)));
}
// Emit instructions summing all the operands.
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
// We're done expanding now, so reset the rewriter.
Rewriter.clearPostInc();
// An ICmpZero Formula represents an ICmp which we're handling as a
// comparison against zero. Now that we've expanded an expression for that
// form, update the ICmp's other operand.
if (LU.Kind == LSRUse::ICmpZero) {
ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
DeadInsts.push_back(CI->getOperand(1));
assert(!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!");
if (F.Scale == -1) {
if (ICmpScaledV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
OpTy, false),
ICmpScaledV, OpTy, "tmp", CI);
ICmpScaledV = Cast;
}
CI->setOperand(1, ICmpScaledV);
} else {
assert(F.Scale == 0 &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
-(uint64_t)Offset);
if (C->getType() != OpTy)
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
OpTy, false),
C, OpTy);
CI->setOperand(1, C);
}
}
return FullV;
}
/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
/// of their operands effectively happens in their predecessor blocks, so the
/// expression may need to be expanded in multiple places.
void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const {
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
BasicBlock *BB = PN->getIncomingBlock(i);
// If this is a critical edge, split the edge so that we do not insert
// the code on all predecessor/successor paths. We do this unless this
// is the canonical backedge for this loop, which complicates post-inc
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
!isa<IndirectBrInst>(BB->getTerminator())) {
BasicBlock *Parent = PN->getParent();
Loop *PNLoop = LI.getLoopFor(Parent);
if (!PNLoop || Parent != PNLoop->getHeader()) {
// Split the critical edge.
BasicBlock *NewBB = 0;
if (!Parent->isLandingPad()) {
NewBB = SplitCriticalEdge(BB, Parent, P,
/*MergeIdenticalEdges=*/true,
/*DontDeleteUselessPhis=*/true);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
NewBB = NewBBs[0];
}
// If NewBB==NULL, then SplitCriticalEdge refused to split because all
// phi predecessors are identical. The simple thing to do is skip
// splitting in this case rather than complicate the API.
if (NewBB) {
// If PN is outside of the loop and BB is in the loop, we want to
// move the block to be immediately before the PHI block, not
// immediately after BB.
if (L->contains(BB) && !L->contains(PN))
NewBB->moveBefore(PN->getParent());
// Splitting the edge can reduce the number of PHI entries we have.
e = PN->getNumIncomingValues();
BB = NewBB;
i = PN->getBasicBlockIndex(BB);
}
}
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy)
FullV =
CastInst::Create(CastInst::getCastOpcode(FullV, false,
OpTy, false),
FullV, LF.OperandValToReplace->getType(),
"tmp", BB->getTerminator());
PN->setIncomingValue(i, FullV);
Pair.first->second = FullV;
}
}
}
/// Rewrite - Emit instructions for the leading candidate expression for this
/// LSRUse (this is called "expanding"), and update the UserInst to reference
/// the newly expanded value.
void LSRInstance::Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const {
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
} else {
Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
FullV, OpTy, "tmp", LF.UserInst);
FullV = Cast;
}
// Update the user. ICmpZero is handled specially here (for now) because
// Expand may have updated one of the operands of the icmp already, and
// its new value may happen to be equal to LF.OperandValToReplace, in
// which case doing replaceUsesOfWith leads to replacing both operands
// with the same value. TODO: Reorganize this.
if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
LF.UserInst->setOperand(0, FullV);
else
LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
}
DeadInsts.push_back(LF.OperandValToReplace);
}
/// ImplementSolution - Rewrite all the fixup locations with new values,
/// following the chosen solution.
void
LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P) {
// Keep track of instructions we may have made dead, so that
// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
SCEVExpander Rewriter(SE, "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
Rewriter.disableCanonicalMode();
Rewriter.enableLSRMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
// Mark phi nodes that terminate chains so the expander tries to reuse them.
for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
Rewriter.setChainedPhi(PN);
}
// Expand the new value definitions and update the users.
for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
const LSRFixup &Fixup = *I;
Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
Changed = true;
}
for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
GenerateIVChain(*ChainI, Rewriter, DeadInsts);
Changed = true;
}
// Clean up after ourselves. This must be done before deleting any
// instructions.
Rewriter.clear();
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
LSRInstance::LSRInstance(Loop *L, Pass *P)
: IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
// If there's no interesting work to be done, bail early.
if (IU.empty()) return;
// If there's too much analysis to be done, bail early. We won't be able to
// model the problem anyway.
unsigned NumUsers = 0;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
if (++NumUsers > MaxIVUsers) {
DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L
<< "\n");
return;
}
}
#ifndef NDEBUG
// All dominating loops must have preheaders, or SCEVExpander may not be able
// to materialize an AddRecExpr whose Start is an outer AddRecExpr.
//
// IVUsers analysis should only create users that are dominated by simple loop
// headers. Since this loop should dominate all of its users, its user list
// should be empty if this loop itself is not within a simple loop nest.
for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
Rung; Rung = Rung->getIDom()) {
BasicBlock *BB = Rung->getBlock();
const Loop *DomLoop = LI.getLoopFor(BB);
if (DomLoop && DomLoop->getHeader() == BB) {
assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
}
}
#endif // DEBUG
DEBUG(dbgs() << "\nLSR on loop ";
WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
dbgs() << ":\n");
// First, perform some low-level loop optimizations.
OptimizeShadowIV();
OptimizeLoopTermCond();
// If loop preparation eliminates all interesting IV users, bail.
if (IU.empty()) return;
// Skip nested loops until we can model them better with formulae.
if (!L->empty()) {
DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
return;
}
// Start collecting data and preparing for the solver.
CollectChains();
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
CollectLoopInvariantFixupsAndFormulae();
assert(!Uses.empty() && "IVUsers reported at least one use");
DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
print_uses(dbgs()));
// Now use the reuse data to generate a bunch of interesting ways
// to formulate the values needed for the uses.
GenerateAllReuseFormulae();
FilterOutUndesirableDedicatedRegisters();
NarrowSearchSpaceUsingHeuristics();
SmallVector<const Formula *, 8> Solution;
Solve(Solution);
// Release memory that is no longer needed.
Factors.clear();
Types.clear();
RegUses.clear();
if (Solution.empty())
return;
#ifndef NDEBUG
// Formulae should be legal.
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
I != E; ++I) {
const LSRUse &LU = *I;
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end();
J != JE; ++J)
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
*J) && "Illegal formula generated!");
};
#endif
// Now that we've decided what we want, make it so.
ImplementSolution(Solution, P);
}
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
if (Factors.empty() && Types.empty()) return;
OS << "LSR has identified the following interesting factors and types: ";
bool First = true;
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
OS << '*' << *I;
}
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
OS << '(' << **I << ')';
}
OS << '\n';
}
void LSRInstance::print_fixups(raw_ostream &OS) const {
OS << "LSR is examining the following fixup sites:\n";
for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
dbgs() << " ";
I->print(OS);
OS << '\n';
}
}
void LSRInstance::print_uses(raw_ostream &OS) const {
OS << "LSR is examining the following uses:\n";
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
const LSRUse &LU = *I;
dbgs() << " ";
LU.print(OS);
OS << '\n';
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end(); J != JE; ++J) {
OS << " ";
J->print(OS);
OS << '\n';
}
}
}
void LSRInstance::print(raw_ostream &OS) const {
print_factors_and_types(OS);
print_fixups(OS);
print_uses(OS);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
#endif
namespace {
class LoopStrengthReduce : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
LoopStrengthReduce();
private:
bool runOnLoop(Loop *L, LPPassManager &LPM);
void getAnalysisUsage(AnalysisUsage &AU) const;
};
}
char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
Pass *llvm::createLoopStrengthReducePass() {
return new LoopStrengthReduce();
}
LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
// Requiring LoopSimplify a second time here prevents IVUsers from running
// twice, since LoopSimplify was invalidated by running ScalarEvolution.
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
AU.addRequired<TargetTransformInfo>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
bool Changed = false;
// Run the main LSR transformation.
Changed |= LSRInstance(L, this).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakVH, 16> DeadInsts;
SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
DeadInsts,
&getAnalysis<TargetTransformInfo>());
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);
DeleteDeadPHIs(L->getHeader());
}
}
return Changed;
}
Index: head/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp (revision 265924)
+++ head/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp (revision 265925)
@@ -1,281 +1,283 @@
//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass transforms loops by placing phi nodes at the end of the loops for
// all values that are live across the loop boundary. For example, it turns
// the left into the right code:
//
// for (...) for (...)
// if (c) if (c)
// X1 = ... X1 = ...
// else else
// X2 = ... X2 = ...
// X3 = phi(X1, X2) X3 = phi(X1, X2)
// ... = X3 + 4 X4 = phi(X3)
// ... = X4 + 4
//
// This is still valid LLVM; the extra phi nodes are purely redundant, and will
// be trivially eliminated by InstCombine. The major benefit of this
// transformation is that it makes many other loop optimizations, such as
// LoopUnswitching, simpler.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "lcssa"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Support/PredIteratorCache.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumLCSSA, "Number of live out of a loop variables");
namespace {
struct LCSSA : public LoopPass {
static char ID; // Pass identification, replacement for typeid
LCSSA() : LoopPass(ID) {
initializeLCSSAPass(*PassRegistry::getPassRegistry());
}
// Cached analysis information for the current function.
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
PredIteratorCache PredCache;
Loop *L;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG. It maintains both of these,
/// as well as the CFG. It also requires dominator information.
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<DominatorTree>();
AU.addRequired<LoopInfo>();
AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
}
private:
bool ProcessInstruction(Instruction *Inst,
const SmallVectorImpl<BasicBlock*> &ExitBlocks);
/// verifyAnalysis() - Verify loop nest.
virtual void verifyAnalysis() const {
// Check the special guarantees that LCSSA makes.
assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
}
};
}
char LCSSA::ID = 0;
INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
Pass *llvm::createLCSSAPass() { return new LCSSA(); }
char &llvm::LCSSAID = LCSSA::ID;
/// BlockDominatesAnExit - Return true if the specified block dominates at least
/// one of the blocks in the specified list.
static bool BlockDominatesAnExit(BasicBlock *BB,
const SmallVectorImpl<BasicBlock*> &ExitBlocks,
DominatorTree *DT) {
DomTreeNode *DomNode = DT->getNode(BB);
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
return true;
return false;
}
/// runOnFunction - Process all loops in the function, inner-most out.
bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
L = TheLoop;
DT = &getAnalysis<DominatorTree>();
LI = &getAnalysis<LoopInfo>();
SE = getAnalysisIfAvailable<ScalarEvolution>();
// Get the set of exiting blocks.
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getExitBlocks(ExitBlocks);
if (ExitBlocks.empty())
return false;
// Look at all the instructions in the loop, checking to see if they have uses
// outside the loop. If so, rewrite those uses.
bool MadeChange = false;
for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
BBI != E; ++BBI) {
BasicBlock *BB = *BBI;
// For large loops, avoid use-scanning by using dominance information: In
// particular, if a block does not dominate any of the loop exits, then none
// of the values defined in the block could be used outside the loop.
if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
continue;
for (BasicBlock::iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
// Reject two common cases fast: instructions with no uses (like stores)
// and instructions with one use that is in the same block as this.
if (I->use_empty() ||
(I->hasOneUse() && I->use_back()->getParent() == BB &&
!isa<PHINode>(I->use_back())))
continue;
MadeChange |= ProcessInstruction(I, ExitBlocks);
}
}
// If we modified the code, remove any caches about the loop from SCEV to
// avoid dangling entries.
// FIXME: This is a big hammer, can we clear the cache more selectively?
if (SE && MadeChange)
SE->forgetLoop(L);
assert(L->isLCSSAForm(*DT));
PredCache.clear();
return MadeChange;
}
/// isExitBlock - Return true if the specified block is in the list.
static bool isExitBlock(BasicBlock *BB,
const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
if (ExitBlocks[i] == BB)
return true;
return false;
}
/// ProcessInstruction - Given an instruction in the loop, check to see if it
/// has any uses that are outside the current loop. If so, insert LCSSA PHI
/// nodes and rewrite the uses.
bool LCSSA::ProcessInstruction(Instruction *Inst,
const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
SmallVector<Use*, 16> UsesToRewrite;
BasicBlock *InstBB = Inst->getParent();
for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
UI != E; ++UI) {
User *U = *UI;
BasicBlock *UserBB = cast<Instruction>(U)->getParent();
if (PHINode *PN = dyn_cast<PHINode>(U))
UserBB = PN->getIncomingBlock(UI);
if (InstBB != UserBB && !L->contains(UserBB))
UsesToRewrite.push_back(&UI.getUse());
}
// If there are no uses outside the loop, exit with no change.
if (UsesToRewrite.empty()) return false;
++NumLCSSA; // We are applying the transformation
// Invoke instructions are special in that their result value is not available
// along their unwind edge. The code below tests to see whether DomBB dominates
// the value, so adjust DomBB to the normal destination block, which is
// effectively where the value is first usable.
BasicBlock *DomBB = Inst->getParent();
if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
DomBB = Inv->getNormalDest();
DomTreeNode *DomNode = DT->getNode(DomBB);
SmallVector<PHINode*, 16> AddedPHIs;
SSAUpdater SSAUpdate;
SSAUpdate.Initialize(Inst->getType(), Inst->getName());
// Insert the LCSSA phi's into all of the exit blocks dominated by the
// value, and add them to the Phi's map.
for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
BasicBlock *ExitBB = *BBI;
if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
// If we already inserted something for this BB, don't reprocess it.
if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
PHINode *PN = PHINode::Create(Inst->getType(),
PredCache.GetNumPreds(ExitBB),
Inst->getName()+".lcssa",
ExitBB->begin());
// Add inputs from inside the loop for this PHI.
for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
PN->addIncoming(Inst, *PI);
// If the exit block has a predecessor not within the loop, arrange for
// the incoming value use corresponding to that predecessor to be
// rewritten in terms of a different LCSSA PHI.
if (!L->contains(*PI))
UsesToRewrite.push_back(
&PN->getOperandUse(
PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
}
AddedPHIs.push_back(PN);
// Remember that this phi makes the value alive in this block.
SSAUpdate.AddAvailableValue(ExitBB, PN);
}
// Rewrite all uses outside the loop in terms of the new PHIs we just
// inserted.
for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
// If this use is in an exit block, rewrite to use the newly inserted PHI.
// This is required for correctness because SSAUpdate doesn't handle uses in
// the same block. It assumes the PHI we inserted is at the end of the
// block.
Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
BasicBlock *UserBB = User->getParent();
if (PHINode *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
if (isa<PHINode>(UserBB->begin()) &&
isExitBlock(UserBB, ExitBlocks)) {
// Tell the VHs that the uses changed. This updates SCEV's caches.
if (UsesToRewrite[i]->get()->hasValueHandle())
ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
UsesToRewrite[i]->set(UserBB->begin());
continue;
}
// Otherwise, do full PHI insertion.
SSAUpdate.RewriteUse(*UsesToRewrite[i]);
}
// Remove PHI nodes that did not have any uses rewritten.
for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
if (AddedPHIs[i]->use_empty())
AddedPHIs[i]->eraseFromParent();
}
return true;
}
Index: head/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (revision 265924)
+++ head/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (revision 265925)
@@ -1,5157 +1,5166 @@
//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
// and generates target-independent LLVM-IR.
// The vectorizer uses the TargetTransformInfo analysis to estimate the costs
// of instructions in order to estimate the profitability of vectorization.
//
// The loop vectorizer combines consecutive loop iterations into a single
// 'wide' iteration. After this transformation the index is incremented
// by the SIMD vector width, and not by one.
//
// This pass has three parts:
// 1. The main loop pass that drives the different parts.
// 2. LoopVectorizationLegality - A unit that checks for the legality
// of the vectorization.
// 3. InnerLoopVectorizer - A unit that performs the actual
// widening of instructions.
// 4. LoopVectorizationCostModel - A unit that checks for the profitability
// of vectorization. It decides on the optimal vector width, which
// can be one, if vectorization is not profitable.
//
//===----------------------------------------------------------------------===//
//
// The reduction-variable vectorization is based on the paper:
// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
//
// Variable uniformity checks are inspired by:
// Karrenberg, R. and Hack, S. Whole Function Vectorization.
//
// Other ideas/concepts are from:
// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
//
// S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of
// Vectorizing Compilers.
//
//===----------------------------------------------------------------------===//
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <map>
using namespace llvm;
using namespace llvm::PatternMatch;
static cl::opt<unsigned>
VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
cl::desc("Sets the SIMD width. Zero is autoselect."));
static cl::opt<unsigned>
VectorizationUnroll("force-vector-unroll", cl::init(0), cl::Hidden,
cl::desc("Sets the vectorization unroll count. "
"Zero is autoselect."));
static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
/// We don't vectorize loops with a known constant trip count below this number.
static cl::opt<unsigned>
TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
cl::Hidden,
cl::desc("Don't vectorize loops with a constant "
"trip count that is smaller than this "
"value."));
/// We don't unroll loops with a known constant trip count below this number.
static const unsigned TinyTripCountUnrollThreshold = 128;
/// When performing memory disambiguation checks at runtime do not make more
/// than this number of comparisons.
static const unsigned RuntimeMemoryCheckThreshold = 8;
/// Maximum simd width.
static const unsigned MaxVectorWidth = 64;
/// Maximum vectorization unroll count.
static const unsigned MaxUnrollFactor = 16;
/// The cost of a loop that is considered 'small' by the unroller.
static const unsigned SmallLoopCost = 20;
namespace {
// Forward declarations.
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
/// scalars. This class also implements the following features:
/// * It inserts an epilogue loop for handling loops that don't have iteration
/// counts that are known to be a multiple of the vectorization factor.
/// * It handles the code generation for reduction variables.
/// * Scalarization (implementation using scalars) of un-vectorizable
/// instructions.
/// InnerLoopVectorizer does not perform any vectorization-legality
/// checks, and relies on the caller to check for the different legality
/// aspects. The InnerLoopVectorizer relies on the
/// LoopVectorizationLegality class to provide information about the induction
/// and reduction variables that were found to a given vectorization factor.
class InnerLoopVectorizer {
public:
InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
DominatorTree *DT, DataLayout *DL,
const TargetLibraryInfo *TLI, unsigned VecWidth,
unsigned UnrollFactor)
: OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0),
OldInduction(0), WidenMap(UnrollFactor) {}
// Perform the actual loop widening (vectorization).
void vectorize(LoopVectorizationLegality *Legal) {
// Create a new empty loop. Unlink the old loop and connect the new one.
createEmptyLoop(Legal);
// Widen each instruction in the old loop to a new one in the new loop.
// Use the Legality module to find the induction and reduction variables.
vectorizeLoop(Legal);
// Register the new loop and update the analysis passes.
updateAnalysis();
}
virtual ~InnerLoopVectorizer() {}
protected:
/// A small list of PHINodes.
typedef SmallVector<PHINode*, 4> PhiVector;
/// When we unroll loops we have multiple vector values for each scalar.
/// This data structure holds the unrolled and vectorized values that
/// originated from one scalar instruction.
typedef SmallVector<Value*, 2> VectorParts;
// When we if-convert we need create edge masks. We have to cache values so
// that we don't end up with exponential recursion/IR.
typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
VectorParts> EdgeMaskCache;
/// Add code that checks at runtime if the accessed arrays overlap.
/// Returns the comparator value or NULL if no check is needed.
Instruction *addRuntimeCheck(LoopVectorizationLegality *Legal,
Instruction *Loc);
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop(LoopVectorizationLegality *Legal);
/// Copy and widen the instructions from the old loop.
virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
/// \brief The Loop exit block may have single value PHI nodes where the
/// incoming value is 'Undef'. While vectorizing we only handled real values
/// that were defined inside the loop. Here we fix the 'undef case'.
/// See PR14725.
void fixLCSSAPHIs();
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True. It returns the *entry*
/// mask for the block BB.
VectorParts createBlockInMask(BasicBlock *BB);
/// A helper function that computes the predicate of the edge between SRC
/// and DST.
VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
/// A helper function to vectorize a single BB within the innermost loop.
void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
PhiVector *PV);
/// Vectorize a single PHINode in a block. This method handles the induction
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
/// arbitrary length vectors.
void widenPHIInstruction(Instruction *PN, VectorParts &Entry,
LoopVectorizationLegality *Legal,
unsigned UF, unsigned VF, PhiVector *PV);
/// Insert the new loop to the loop hierarchy and pass manager
/// and update the analysis passes.
void updateAnalysis();
/// This instruction is un-vectorizable. Implement it as a sequence
/// of scalars.
virtual void scalarizeInstruction(Instruction *Instr);
/// Vectorize Load and Store instructions,
virtual void vectorizeMemoryInstruction(Instruction *Instr,
LoopVectorizationLegality *Legal);
/// Create a broadcast instruction. This method generates a broadcast
/// instruction (shuffle) for loop invariant values and for the induction
/// value. If this is the induction variable then we extend it to N, N+1, ...
/// this is needed because each iteration in the loop corresponds to a SIMD
/// element.
virtual Value *getBroadcastInstrs(Value *V);
/// This function adds 0, 1, 2 ... to each vector element, starting at zero.
/// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
/// The sequence starts at StartIndex.
virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
/// When we widen (vectorize) values we place them in the map. If the values
/// are not within the map, they have to be loop invariant, so we simply
/// broadcast them into a vector.
VectorParts &getVectorValue(Value *V);
/// Generate a shuffle sequence that will reverse the vector Vec.
virtual Value *reverseVector(Value *Vec);
/// This is a helper class that holds the vectorizer state. It maps scalar
/// instructions to vector instructions. When the code is 'unrolled' then
/// then a single scalar value is mapped to multiple vector parts. The parts
/// are stored in the VectorPart type.
struct ValueMap {
/// C'tor. UnrollFactor controls the number of vectors ('parts') that
/// are mapped.
ValueMap(unsigned UnrollFactor) : UF(UnrollFactor) {}
/// \return True if 'Key' is saved in the Value Map.
bool has(Value *Key) const { return MapStorage.count(Key); }
/// Initializes a new entry in the map. Sets all of the vector parts to the
/// save value in 'Val'.
/// \return A reference to a vector with splat values.
VectorParts &splat(Value *Key, Value *Val) {
VectorParts &Entry = MapStorage[Key];
Entry.assign(UF, Val);
return Entry;
}
///\return A reference to the value that is stored at 'Key'.
VectorParts &get(Value *Key) {
VectorParts &Entry = MapStorage[Key];
if (Entry.empty())
Entry.resize(UF);
assert(Entry.size() == UF);
return Entry;
}
private:
/// The unroll factor. Each entry in the map stores this number of vector
/// elements.
unsigned UF;
/// Map storage. We use std::map and not DenseMap because insertions to a
/// dense map invalidates its iterators.
std::map<Value *, VectorParts> MapStorage;
};
/// The original loop.
Loop *OrigLoop;
/// Scev analysis to use.
ScalarEvolution *SE;
/// Loop Info.
LoopInfo *LI;
/// Dominator Tree.
DominatorTree *DT;
/// Data Layout.
DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
unsigned VF;
protected:
/// The vectorization unroll factor to use. Each scalar is vectorized to this
/// many different vector instructions.
unsigned UF;
/// The builder that we use
IRBuilder<> Builder;
// --- Vectorization state ---
/// The vector-loop preheader.
BasicBlock *LoopVectorPreHeader;
/// The scalar-loop preheader.
BasicBlock *LoopScalarPreHeader;
/// Middle Block between the vector and the scalar.
BasicBlock *LoopMiddleBlock;
///The ExitBlock of the scalar loop.
BasicBlock *LoopExitBlock;
///The vector loop body.
BasicBlock *LoopVectorBody;
///The scalar loop body.
BasicBlock *LoopScalarBody;
/// A list of all bypass blocks. The first block is the entry of the loop.
SmallVector<BasicBlock *, 4> LoopBypassBlocks;
/// The new Induction variable which was added to the new block.
PHINode *Induction;
/// The induction variable of the old basic block.
PHINode *OldInduction;
/// Holds the extended (to the widest induction type) start index.
Value *ExtendedIdx;
/// Maps scalars to widened vectors.
ValueMap WidenMap;
EdgeMaskCache MaskCache;
};
class InnerLoopUnroller : public InnerLoopVectorizer {
public:
InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
DominatorTree *DT, DataLayout *DL,
const TargetLibraryInfo *TLI, unsigned UnrollFactor) :
InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
private:
virtual void scalarizeInstruction(Instruction *Instr);
virtual void vectorizeMemoryInstruction(Instruction *Instr,
LoopVectorizationLegality *Legal);
virtual Value *getBroadcastInstrs(Value *V);
virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
virtual Value *reverseVector(Value *Vec);
};
/// \brief Look for a meaningful debug location on the instruction or it's
/// operands.
static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
if (!I)
return I;
DebugLoc Empty;
if (I->getDebugLoc() != Empty)
return I;
for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) {
if (Instruction *OpInst = dyn_cast<Instruction>(*OI))
if (OpInst->getDebugLoc() != Empty)
return OpInst;
}
return I;
}
/// \brief Set the debug location in the builder using the debug location in the
/// instruction.
static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr))
B.SetCurrentDebugLocation(Inst->getDebugLoc());
else
B.SetCurrentDebugLocation(DebugLoc());
}
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
/// legality. This class has two main kinds of checks:
/// * Memory checks - The code in canVectorizeMemory checks if vectorization
/// will change the order of memory accesses in a way that will change the
/// correctness of the program.
/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
/// checks for a number of different conditions, such as the availability of a
/// single induction variable, that all types are supported and vectorize-able,
/// etc. This code reflects the capabilities of InnerLoopVectorizer.
/// This class is also used by InnerLoopVectorizer for identifying
/// induction variable and the different reduction variables.
class LoopVectorizationLegality {
public:
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
DominatorTree *DT, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TLI(TLI),
Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
MaxSafeDepDistBytes(-1U) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
RK_NoReduction, ///< Not a reduction.
RK_IntegerAdd, ///< Sum of integers.
RK_IntegerMult, ///< Product of integers.
RK_IntegerOr, ///< Bitwise or logical OR of numbers.
RK_IntegerAnd, ///< Bitwise or logical AND of numbers.
RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
RK_FloatAdd, ///< Sum of floats.
RK_FloatMult, ///< Product of floats.
RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
};
/// This enum represents the kinds of inductions that we support.
enum InductionKind {
IK_NoInduction, ///< Not an induction variable.
IK_IntInduction, ///< Integer induction variable. Step = 1.
IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1.
IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem).
IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem).
};
// This enum represents the kind of minmax reduction.
enum MinMaxReductionKind {
MRK_Invalid,
MRK_UIntMin,
MRK_UIntMax,
MRK_SIntMin,
MRK_SIntMax,
MRK_FloatMin,
MRK_FloatMax
};
/// This struct holds information about reduction variables.
struct ReductionDescriptor {
ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
MinMaxReductionKind MK)
: StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
// The starting value of the reduction.
// It does not have to be zero!
TrackingVH<Value> StartValue;
// The instruction who's value is used outside the loop.
Instruction *LoopExitInstr;
// The kind of the reduction.
ReductionKind Kind;
// If this a min/max reduction the kind of reduction.
MinMaxReductionKind MinMaxKind;
};
/// This POD struct holds information about a potential reduction operation.
struct ReductionInstDesc {
ReductionInstDesc(bool IsRedux, Instruction *I) :
IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
ReductionInstDesc(Instruction *I, MinMaxReductionKind K) :
IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
// Is this instruction a reduction candidate.
bool IsReduction;
// The last instruction in a min/max pattern (select of the select(icmp())
// pattern), or the current reduction instruction otherwise.
Instruction *PatternLastInst;
// If this is a min/max pattern the comparison predicate.
MinMaxReductionKind MinMaxKind;
};
/// This struct holds information about the memory runtime legality
/// check that a group of pointers do not overlap.
struct RuntimePointerCheck {
RuntimePointerCheck() : Need(false) {}
/// Reset the state of the pointer runtime information.
void reset() {
Need = false;
Pointers.clear();
Starts.clear();
Ends.clear();
IsWritePtr.clear();
DependencySetId.clear();
}
/// Insert a pointer and calculate the start and end SCEVs.
void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
unsigned DepSetId);
/// This flag indicates if we need to add the runtime check.
bool Need;
/// Holds the pointers that we need to check.
SmallVector<TrackingVH<Value>, 2> Pointers;
/// Holds the pointer value at the beginning of the loop.
SmallVector<const SCEV*, 2> Starts;
/// Holds the pointer value at the end of the loop.
SmallVector<const SCEV*, 2> Ends;
/// Holds the information if this pointer is used for writing to memory.
SmallVector<bool, 2> IsWritePtr;
/// Holds the id of the set of pointers that could be dependent because of a
/// shared underlying object.
SmallVector<unsigned, 2> DependencySetId;
};
/// A struct for saving information about induction variables.
struct InductionInfo {
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
/// Start value.
TrackingVH<Value> StartValue;
/// Induction kind.
InductionKind IK;
};
/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
/// InductionList saves induction variables and maps them to the
/// induction descriptor.
typedef MapVector<PHINode*, InductionInfo> InductionList;
/// Returns true if it is legal to vectorize this loop.
/// This does not mean that it is profitable to vectorize this
/// loop, only that it is legal to do so.
bool canVectorize();
/// Returns the Induction variable.
PHINode *getInduction() { return Induction; }
/// Returns the reduction variables found in the loop.
ReductionList *getReductionVars() { return &Reductions; }
/// Returns the induction variables found in the loop.
InductionList *getInductionVars() { return &Inductions; }
/// Returns the widest induction type.
Type *getWidestInductionType() { return WidestIndTy; }
/// Returns True if V is an induction variable in this loop.
bool isInductionVariable(const Value *V);
/// Return true if the block BB needs to be predicated in order for the loop
/// to be vectorized.
bool blockNeedsPredication(BasicBlock *BB);
/// Check if this pointer is consecutive when vectorizing. This happens
/// when the last index of the GEP is the induction variable, or that the
/// pointer itself is an induction variable.
/// This check allows us to vectorize A[idx] into a wide load/store.
/// Returns:
/// 0 - Stride is unknown or non consecutive.
/// 1 - Address is consecutive.
/// -1 - Address is consecutive, and decreasing.
int isConsecutivePtr(Value *Ptr);
/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V);
/// Returns true if this instruction will remain scalar after vectorization.
bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
/// Returns the information that we collected about runtime memory check.
RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
/// This function returns the identity element (or neutral element) for
/// the operation K.
static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
/// and we only need to check individual instructions.
bool canVectorizeInstrs();
/// When we vectorize loops we may change the order in which
/// we read and write from memory. This method checks if it is
/// legal to vectorize the code, considering only memory constrains.
/// Returns true if the loop is vectorizable
bool canVectorizeMemory();
/// Return true if we can vectorize this loop using the IF-conversion
/// transformation.
bool canVectorizeWithIfConvert();
/// Collect the variables that need to stay uniform after vectorization.
void collectLoopUniforms();
/// Return true if all of the instructions in the block can be speculatively
/// executed. \p SafePtrs is a list of addresses that are known to be legal
/// and we know that we can read from them without segfault.
bool blockCanBePredicated(BasicBlock *BB, SmallPtrSet<Value *, 8>& SafePtrs);
/// Returns True, if 'Phi' is the kind of reduction variable for type
/// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
/// Returns a struct describing if the instruction 'I' can be a reduction
/// variable of type 'Kind'. If the reduction is a min/max pattern of
/// select(icmp()) this function advances the instruction pointer 'I' from the
/// compare instruction to the select instruction and stores this pointer in
/// 'PatternLastInst' member of the returned struct.
ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
ReductionInstDesc &Desc);
/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
/// pattern corresponding to a min(X, Y) or max(X, Y).
static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
ReductionInstDesc &Prev);
/// Returns the induction kind of Phi. This function may return NoInduction
/// if the PHI is not an induction variable.
InductionKind isInductionVariable(PHINode *Phi);
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
ScalarEvolution *SE;
/// DataLayout analysis.
DataLayout *DL;
/// Dominators.
DominatorTree *DT;
/// Target Library Info.
TargetLibraryInfo *TLI;
// --- vectorization state --- //
/// Holds the integer induction variable. This is the counter of the
/// loop.
PHINode *Induction;
/// Holds the reduction variables.
ReductionList Reductions;
/// Holds all of the induction variables that we found in the loop.
/// Notice that inductions don't need to start at zero and that induction
/// variables can be pointers.
InductionList Inductions;
/// Holds the widest induction type encountered.
Type *WidestIndTy;
/// Allowed outside users. This holds the reduction
/// vars which can be accessed from outside the loop.
SmallPtrSet<Value*, 4> AllowedExit;
/// This set holds the variables which are known to be uniform after
/// vectorization.
SmallPtrSet<Instruction*, 4> Uniforms;
/// We need to check that all of the pointers in this list are disjoint
/// at runtime.
RuntimePointerCheck PtrRtCheck;
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
unsigned MaxSafeDepDistBytes;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
/// vectorization.
/// In many cases vectorization is not profitable. This can happen because of
/// a number of reasons. In this class we mainly attempt to predict the
/// expected speedup/slowdowns due to the supported instruction set. We use the
/// TargetTransformInfo to query the different backends for the cost of
/// different operations.
class LoopVectorizationCostModel {
public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
DataLayout *DL, const TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {}
/// Information about vectorization costs
struct VectorizationFactor {
unsigned Width; // Vector width with best cost
unsigned Cost; // Cost of the loop with that width
};
/// \return The most profitable vectorization factor and the cost of that VF.
/// This method checks every power of two up to VF. If UserVF is not ZERO
/// then this vectorization factor will be selected if vectorization is
/// possible.
VectorizationFactor selectVectorizationFactor(bool OptForSize,
unsigned UserVF);
/// \return The size (in bits) of the widest type in the code that
/// needs to be vectorized. We ignore values that remain scalar such as
/// 64 bit loop indices.
unsigned getWidestType();
/// \return The most profitable unroll factor.
/// If UserUF is non-zero then this method finds the best unroll-factor
/// based on register pressure and other parameters.
/// VF and LoopCost are the selected vectorization factor and the cost of the
/// selected VF.
unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF,
unsigned LoopCost);
/// \brief A struct that represents some properties of the register usage
/// of a loop.
struct RegisterUsage {
/// Holds the number of loop invariant values that are used in the loop.
unsigned LoopInvariantRegs;
/// Holds the maximum number of concurrent live intervals in the loop.
unsigned MaxLocalUsers;
/// Holds the number of instructions in the loop.
unsigned NumInstructions;
};
/// \return information about the register usage of the loop.
RegisterUsage calculateRegisterUsage();
private:
/// Returns the expected execution cost. The unit of the cost does
/// not matter because we use the 'cost' units to compare different
/// vector widths. The cost that is returned is *not* normalized by
/// the factor width.
unsigned expectedCost(unsigned VF);
/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
unsigned getInstructionCost(Instruction *I, unsigned VF);
/// A helper function for converting Scalar types to vector types.
/// If the incoming type is void, we return void. If the VF is 1, we return
/// the scalar type.
static Type* ToVectorTy(Type *Scalar, unsigned VF);
/// Returns whether the instruction is a load or store and will be a emitted
/// as a vector operation.
bool isConsecutiveLoadOrStore(Instruction *I);
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
ScalarEvolution *SE;
/// Loop Info analysis.
LoopInfo *LI;
/// Vectorization legality.
LoopVectorizationLegality *Legal;
/// Vector target information.
const TargetTransformInfo &TTI;
/// Target data layout information.
DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
};
/// Utility class for getting and setting loop vectorizer hints in the form
/// of loop metadata.
struct LoopVectorizeHints {
/// Vectorization width.
unsigned Width;
/// Vectorization unroll factor.
unsigned Unroll;
LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
: Width(VectorizationFactor)
, Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
, LoopID(L->getLoopID()) {
getHints(L);
// The command line options override any loop metadata except for when
// width == 1 which is used to indicate the loop is already vectorized.
if (VectorizationFactor.getNumOccurrences() > 0 && Width != 1)
Width = VectorizationFactor;
if (VectorizationUnroll.getNumOccurrences() > 0)
Unroll = VectorizationUnroll;
DEBUG(if (DisableUnrolling && Unroll == 1)
dbgs() << "LV: Unrolling disabled by the pass manager\n");
}
/// Return the loop vectorizer metadata prefix.
static StringRef Prefix() { return "llvm.vectorizer."; }
MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) {
SmallVector<Value*, 2> Vals;
Vals.push_back(MDString::get(Context, Name));
Vals.push_back(ConstantInt::get(Type::getInt32Ty(Context), V));
return MDNode::get(Context, Vals);
}
/// Mark the loop L as already vectorized by setting the width to 1.
void setAlreadyVectorized(Loop *L) {
LLVMContext &Context = L->getHeader()->getContext();
Width = 1;
// Create a new loop id with one more operand for the already_vectorized
// hint. If the loop already has a loop id then copy the existing operands.
SmallVector<Value*, 4> Vals(1);
if (LoopID)
for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i)
Vals.push_back(LoopID->getOperand(i));
Vals.push_back(createHint(Context, Twine(Prefix(), "width").str(), Width));
Vals.push_back(createHint(Context, Twine(Prefix(), "unroll").str(), 1));
MDNode *NewLoopID = MDNode::get(Context, Vals);
// Set operand 0 to refer to the loop id itself.
NewLoopID->replaceOperandWith(0, NewLoopID);
L->setLoopID(NewLoopID);
if (LoopID)
LoopID->replaceAllUsesWith(NewLoopID);
LoopID = NewLoopID;
}
private:
MDNode *LoopID;
/// Find hints specified in the loop metadata.
void getHints(const Loop *L) {
if (!LoopID)
return;
// First operand should refer to the loop id itself.
assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
const MDString *S = 0;
SmallVector<Value*, 4> Args;
// The expected hint is either a MDString or a MDNode with the first
// operand a MDString.
if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
if (!MD || MD->getNumOperands() == 0)
continue;
S = dyn_cast<MDString>(MD->getOperand(0));
for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
Args.push_back(MD->getOperand(i));
} else {
S = dyn_cast<MDString>(LoopID->getOperand(i));
assert(Args.size() == 0 && "too many arguments for MDString");
}
if (!S)
continue;
// Check if the hint starts with the vectorizer prefix.
StringRef Hint = S->getString();
if (!Hint.startswith(Prefix()))
continue;
// Remove the prefix.
Hint = Hint.substr(Prefix().size(), StringRef::npos);
if (Args.size() == 1)
getHint(Hint, Args[0]);
}
}
// Check string hint with one operand.
void getHint(StringRef Hint, Value *Arg) {
const ConstantInt *C = dyn_cast<ConstantInt>(Arg);
if (!C) return;
unsigned Val = C->getZExtValue();
if (Hint == "width") {
if (isPowerOf2_32(Val) && Val <= MaxVectorWidth)
Width = Val;
else
DEBUG(dbgs() << "LV: ignoring invalid width hint metadata\n");
} else if (Hint == "unroll") {
if (isPowerOf2_32(Val) && Val <= MaxUnrollFactor)
Unroll = Val;
else
DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
} else {
DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
}
}
};
/// The LoopVectorize Pass.
struct LoopVectorize : public LoopPass {
/// Pass identification, replacement for typeid
static char ID;
explicit LoopVectorize(bool NoUnrolling = false)
: LoopPass(ID), DisableUnrolling(NoUnrolling) {
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
ScalarEvolution *SE;
DataLayout *DL;
LoopInfo *LI;
TargetTransformInfo *TTI;
DominatorTree *DT;
TargetLibraryInfo *TLI;
bool DisableUnrolling;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
if (!L->empty())
return false;
SE = &getAnalysis<ScalarEvolution>();
DL = getAnalysisIfAvailable<DataLayout>();
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
DT = &getAnalysis<DominatorTree>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
// If the target claims to have no vector registers don't attempt
// vectorization.
if (!TTI->getNumberOfRegisters(true))
return false;
if (DL == NULL) {
DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout\n");
return false;
}
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
LoopVectorizeHints Hints(L, DisableUnrolling);
if (Hints.Width == 1 && Hints.Unroll == 1) {
DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
}
// Check if it is legal to vectorize the loop.
LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
}
// Use the cost model.
LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI);
// Check the function attributes to find out if this function should be
// optimized for size.
Function *F = L->getHeader()->getParent();
Attribute::AttrKind SzAttr = Attribute::OptimizeForSize;
Attribute::AttrKind FlAttr = Attribute::NoImplicitFloat;
unsigned FnIndex = AttributeSet::FunctionIndex;
bool OptForSize = F->getAttributes().hasAttribute(FnIndex, SzAttr);
bool NoFloat = F->getAttributes().hasAttribute(FnIndex, FlAttr);
if (NoFloat) {
DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
"attribute is used.\n");
return false;
}
// Select the optimal vectorization factor.
LoopVectorizationCostModel::VectorizationFactor VF;
VF = CM.selectVectorizationFactor(OptForSize, Hints.Width);
// Select the unroll factor.
unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width,
VF.Cost);
DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
F->getParent()->getModuleIdentifier() << '\n');
DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
if (VF.Width == 1) {
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
if (UF == 1)
return false;
// We decided not to vectorize, but we may want to unroll.
InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
Unroller.vectorize(&LVL);
} else {
// If we decided that it is *legal* to vectorize the loop then do it.
InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
LB.vectorize(&LVL);
}
// Mark the loop as already vectorized to avoid vectorizing again.
Hints.setAlreadyVectorized(L);
DEBUG(verifyFunction(*L->getHeader()->getParent()));
return true;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
LoopPass::getAnalysisUsage(AU);
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addRequired<DominatorTree>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<TargetTransformInfo>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominatorTree>();
}
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// Implementation of LoopVectorizationLegality, InnerLoopVectorizer and
// LoopVectorizationCostModel.
//===----------------------------------------------------------------------===//
void
LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
Loop *Lp, Value *Ptr,
bool WritePtr,
unsigned DepSetId) {
const SCEV *Sc = SE->getSCEV(Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
Pointers.push_back(Ptr);
Starts.push_back(AR->getStart());
Ends.push_back(ScEnd);
IsWritePtr.push_back(WritePtr);
DependencySetId.push_back(DepSetId);
}
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
// We need to place the broadcast of invariant variables outside the loop.
Instruction *Instr = dyn_cast<Instruction>(V);
bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
// Place the code for broadcasting invariant variables in the new preheader.
IRBuilder<>::InsertPointGuard Guard(Builder);
if (Invariant)
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
// Broadcast the scalar into all locations in the vector.
Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
return Shuf;
}
Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
bool Negate) {
assert(Val->getType()->isVectorTy() && "Must be a vector");
assert(Val->getType()->getScalarType()->isIntegerTy() &&
"Elem must be an integer");
// Create the types.
Type *ITy = Val->getType()->getScalarType();
VectorType *Ty = cast<VectorType>(Val->getType());
int VLen = Ty->getNumElements();
SmallVector<Constant*, 8> Indices;
// Create a vector of consecutive numbers from zero to VF.
for (int i = 0; i < VLen; ++i) {
int64_t Idx = Negate ? (-i) : i;
Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate));
}
// Add the consecutive indices to the vector value.
Constant *Cv = ConstantVector::get(Indices);
assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
return Builder.CreateAdd(Val, Cv, "induction");
}
/// \brief Find the operand of the GEP that should be checked for consecutive
/// stores. This ignores trailing indices that have no effect on the final
/// pointer.
static unsigned getGEPInductionOperand(DataLayout *DL,
const GetElementPtrInst *Gep) {
unsigned LastOperand = Gep->getNumOperands() - 1;
unsigned GEPAllocSize = DL->getTypeAllocSize(
cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
// Walk backwards and try to peel off zeros.
while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
// Find the type we're currently indexing into.
gep_type_iterator GEPTI = gep_type_begin(Gep);
std::advance(GEPTI, LastOperand - 1);
// If it's a type with the same allocation size as the result of the GEP we
// can peel off the zero index.
if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize)
break;
--LastOperand;
}
return LastOperand;
}
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
// Make sure that the pointer does not point to structs.
if (Ptr->getType()->getPointerElementType()->isAggregateType())
return 0;
// If this value is a pointer induction variable we know it is consecutive.
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
if (Phi && Inductions.count(Phi)) {
InductionInfo II = Inductions[Phi];
if (IK_PtrInduction == II.IK)
return 1;
else if (IK_ReversePtrInduction == II.IK)
return -1;
}
GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
if (!Gep)
return 0;
unsigned NumOperands = Gep->getNumOperands();
Value *GpPtr = Gep->getPointerOperand();
// If this GEP value is a consecutive pointer induction variable and all of
// the indices are constant then we know it is consecutive. We can
Phi = dyn_cast<PHINode>(GpPtr);
if (Phi && Inductions.count(Phi)) {
// Make sure that the pointer does not point to structs.
PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
if (GepPtrType->getElementType()->isAggregateType())
return 0;
// Make sure that all of the index operands are loop invariant.
for (unsigned i = 1; i < NumOperands; ++i)
if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
InductionInfo II = Inductions[Phi];
if (IK_PtrInduction == II.IK)
return 1;
else if (IK_ReversePtrInduction == II.IK)
return -1;
}
unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
// Check that all of the gep indices are uniform except for our induction
// operand.
for (unsigned i = 0; i != NumOperands; ++i)
if (i != InductionOperand &&
!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
// We can emit wide load/stores only if the last non-zero index is the
// induction variable.
const SCEV *Last = SE->getSCEV(Gep->getOperand(InductionOperand));
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
const SCEV *Step = AR->getStepRecurrence(*SE);
// The memory is consecutive because the last index is consecutive
// and all other indices are loop invariant.
if (Step->isOne())
return 1;
if (Step->isAllOnesValue())
return -1;
}
return 0;
}
bool LoopVectorizationLegality::isUniform(Value *V) {
return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
}
InnerLoopVectorizer::VectorParts&
InnerLoopVectorizer::getVectorValue(Value *V) {
assert(V != Induction && "The new induction variable should not be used.");
assert(!V->getType()->isVectorTy() && "Can't widen a vector");
// If we have this scalar in the map, return it.
if (WidenMap.has(V))
return WidenMap.get(V);
// If this scalar is unknown, assume that it is a constant or that it is
// loop invariant. Broadcast V and save the value for future uses.
Value *B = getBroadcastInstrs(V);
return WidenMap.splat(V, B);
}
Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
assert(Vec->getType()->isVectorTy() && "Invalid type");
SmallVector<Constant*, 8> ShuffleMask;
for (unsigned i = 0; i < VF; ++i)
ShuffleMask.push_back(Builder.getInt32(VF - i - 1));
return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()),
ConstantVector::get(ShuffleMask),
"reverse");
}
void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
LoopVectorizationLegality *Legal) {
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(Instr);
StoreInst *SI = dyn_cast<StoreInst>(Instr);
assert((LI || SI) && "Invalid Load/Store instruction");
Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType();
Type *DataTy = VectorType::get(ScalarDataTy, VF);
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
// An alignment of 0 means target abi alignment. We need to use the scalar's
// target abi alignment in such a case.
if (!Alignment)
Alignment = DL->getABITypeAlignment(ScalarDataTy);
unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
if (ScalarAllocatedSize != VectorElementSize)
return scalarizeInstruction(Instr);
// If the pointer is loop invariant or if it is non consecutive,
// scalarize the load.
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
bool Reverse = ConsecutiveStride < 0;
bool UniformLoad = LI && Legal->isUniform(Ptr);
if (!ConsecutiveStride || UniformLoad)
return scalarizeInstruction(Instr);
Constant *Zero = Builder.getInt32(0);
VectorParts &Entry = WidenMap.get(Instr);
// Handle consecutive loads/stores.
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
setDebugLocFromInst(Builder, Gep);
Value *PtrOperand = Gep->getPointerOperand();
Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
// Create the new GEP with the new induction variable.
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
Gep2->setOperand(0, FirstBasePtr);
Gep2->setName("gep.indvar.base");
Ptr = Builder.Insert(Gep2);
} else if (Gep) {
setDebugLocFromInst(Builder, Gep);
assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
OrigLoop) && "Base ptr must be invariant");
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
unsigned NumOperands = Gep->getNumOperands();
unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
// Create the new GEP with the new induction variable.
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
for (unsigned i = 0; i < NumOperands; ++i) {
Value *GepOperand = Gep->getOperand(i);
Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand);
// Update last index or loop invariant instruction anchored in loop.
if (i == InductionOperand ||
(GepOperandInst && OrigLoop->contains(GepOperandInst))) {
assert((i == InductionOperand ||
SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) &&
"Must be last index or loop invariant");
VectorParts &GEPParts = getVectorValue(GepOperand);
Value *Index = GEPParts[0];
Index = Builder.CreateExtractElement(Index, Zero);
Gep2->setOperand(i, Index);
Gep2->setName("gep.indvar.idx");
}
}
Ptr = Builder.Insert(Gep2);
} else {
// Use the induction element ptr.
assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
setDebugLocFromInst(Builder, Ptr);
VectorParts &PtrVal = getVectorValue(Ptr);
Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
}
// Handle Stores:
if (SI) {
assert(!Legal->isUniform(SI->getPointerOperand()) &&
"We do not allow storing to uniform addresses");
setDebugLocFromInst(Builder, SI);
// We don't want to update the value in the map as it might be used in
// another expression. So don't use a reference type for "StoredVal".
VectorParts StoredVal = getVectorValue(SI->getValueOperand());
for (unsigned Part = 0; Part < UF; ++Part) {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
if (Reverse) {
// If we store to reverse consecutive memory locations then we need
// to reverse the order of elements in the stored value.
StoredVal[Part] = reverseVector(StoredVal[Part]);
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
}
Value *VecPtr = Builder.CreateBitCast(PartPtr,
DataTy->getPointerTo(AddressSpace));
Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment);
}
return;
}
// Handle loads.
assert(LI && "Must have a load instruction");
setDebugLocFromInst(Builder, LI);
for (unsigned Part = 0; Part < UF; ++Part) {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
if (Reverse) {
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
}
Value *VecPtr = Builder.CreateBitCast(PartPtr,
DataTy->getPointerTo(AddressSpace));
Value *LI = Builder.CreateLoad(VecPtr, "wide.load");
cast<LoadInst>(LI)->setAlignment(Alignment);
Entry[Part] = Reverse ? reverseVector(LI) : LI;
}
}
void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
// Holds vector parameters or scalars, in case of uniform vals.
SmallVector<VectorParts, 4> Params;
setDebugLocFromInst(Builder, Instr);
// Find all of the vectorized parameters.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
Value *SrcOp = Instr->getOperand(op);
// If we are accessing the old induction variable, use the new one.
if (SrcOp == OldInduction) {
Params.push_back(getVectorValue(SrcOp));
continue;
}
// Try using previously calculated values.
Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
// If the src is an instruction that appeared earlier in the basic block
// then it should already be vectorized.
if (SrcInst && OrigLoop->contains(SrcInst)) {
assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
// The parameter is a vector value from earlier.
Params.push_back(WidenMap.get(SrcInst));
} else {
// The parameter is a scalar from outside the loop. Maybe even a constant.
VectorParts Scalars;
Scalars.append(UF, SrcOp);
Params.push_back(Scalars);
}
}
assert(Params.size() == Instr->getNumOperands() &&
"Invalid number of operands");
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
Value *UndefVec = IsVoidRetTy ? 0 :
UndefValue::get(VectorType::get(Instr->getType(), VF));
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
// For each scalar that we create:
for (unsigned Width = 0; Width < VF; ++Width) {
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
// Replace the operands of the cloned instructions with extracted scalars.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
Value *Op = Params[op][Part];
// Param is a vector. Need to extract the right lane.
if (Op->getType()->isVectorTy())
Op = Builder.CreateExtractElement(Op, Builder.getInt32(Width));
Cloned->setOperand(op, Op);
}
// Place the cloned scalar in the new loop.
Builder.Insert(Cloned);
// If the original scalar returns a value we need to place it in a vector
// so that future users will be able to use it.
if (!IsVoidRetTy)
VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned,
Builder.getInt32(Width));
}
}
}
Instruction *
InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
Instruction *Loc) {
LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
Legal->getRuntimePointerCheck();
if (!PtrRtCheck->Need)
return NULL;
unsigned NumPointers = PtrRtCheck->Pointers.size();
SmallVector<TrackingVH<Value> , 2> Starts;
SmallVector<TrackingVH<Value> , 2> Ends;
LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, "induction");
for (unsigned i = 0; i < NumPointers; ++i) {
Value *Ptr = PtrRtCheck->Pointers[i];
const SCEV *Sc = SE->getSCEV(Ptr);
if (SE->isLoopInvariant(Sc, OrigLoop)) {
DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
*Ptr <<"\n");
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {
DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
unsigned AS = Ptr->getType()->getPointerAddressSpace();
// Use this type for pointer arithmetic.
Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
Starts.push_back(Start);
Ends.push_back(End);
}
}
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = 0;
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
// No need to check if two readonly pointers intersect.
if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
continue;
// Only need to check pointers between two different dependency sets.
if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
continue;
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
(AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
"Trying to bounds check pointers with different address spaces");
Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
if (MemoryRuntimeCheck)
IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
"conflict.rdx");
MemoryRuntimeCheck = IsConflict;
}
}
// We have to do this trickery because the IRBuilder might fold the check to a
// constant expression in which case there is no Instruction anchored in a
// the block.
Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
ConstantInt::getTrue(Ctx));
ChkBuilder.Insert(Check, "memcheck.conflict");
return Check;
}
void
InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
/*
In this function we generate a new loop. The new loop will contain
the vectorized instructions while the old loop will continue to run the
scalar remainder.
[ ] <-- vector loop bypass (may consist of multiple blocks).
/ |
/ v
| [ ] <-- vector pre header.
| |
| v
| [ ] \
| [ ]_| <-- vector loop.
| |
\ v
>[ ] <--- middle-block.
/ |
/ v
| [ ] <--- new preheader.
| |
| v
| [ ] \
| [ ]_| <-- old scalar loop to handle remainder.
\ |
\ v
>[ ] <-- exit block.
...
*/
BasicBlock *OldBasicBlock = OrigLoop->getHeader();
BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
assert(ExitBlock && "Must have an exit block");
// Some loops have a single integer induction variable, while other loops
// don't. One example is c++ iterators that often have multiple pointer
// induction variables. In the code below we also support a case where we
// don't have a single induction variable.
OldInduction = Legal->getInduction();
Type *IdxTy = Legal->getWidestInductionType();
// Find the loop boundaries.
const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
// The exit count might have the type of i64 while the phi is i32. This can
// happen if we have an induction variable that is sign extended before the
// compare. The only way that we get a backedge taken count is that the
// induction variable was signed and as such will not overflow. In such a case
// truncation is legal.
if (ExitCount->getType()->getPrimitiveSizeInBits() >
IdxTy->getPrimitiveSizeInBits())
ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
// Get the total trip count from the count by adding 1.
ExitCount = SE->getAddExpr(ExitCount,
SE->getConstant(ExitCount->getType(), 1));
// Expand the trip count and place the new instructions in the preheader.
// Notice that the pre-header does not change, only the loop body.
SCEVExpander Exp(*SE, "induction");
// Count holds the overall loop count (N).
Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
BypassBlock->getTerminator());
// The loop index does not have to start at Zero. Find the original start
// value from the induction PHI node. If we don't have an induction variable
// then we know that it starts at zero.
Builder.SetInsertPoint(BypassBlock->getTerminator());
Value *StartIdx = ExtendedIdx = OldInduction ?
Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock),
IdxTy):
ConstantInt::get(IdxTy, 0);
assert(BypassBlock && "Invalid loop structure");
LoopBypassBlocks.push_back(BypassBlock);
// Split the single block loop into the two loop structure described above.
BasicBlock *VectorPH =
BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
BasicBlock *VecBody =
VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
BasicBlock *MiddleBlock =
VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
BasicBlock *ScalarPH =
MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
// Create and register the new vector loop.
Loop* Lp = new Loop();
Loop *ParentLoop = OrigLoop->getParentLoop();
// Insert the new loop into the loop nest and register the new basic blocks
// before calling any utilities such as SCEV that require valid LoopInfo.
if (ParentLoop) {
ParentLoop->addChildLoop(Lp);
ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
} else {
LI->addTopLevelLoop(Lp);
}
Lp->addBasicBlockToLoop(VecBody, LI->getBase());
// Use this IR builder to create the loop instructions (Phi, Br, Cmp)
// inside the loop.
Builder.SetInsertPoint(VecBody->getFirstNonPHI());
// Generate the induction variable.
setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
Induction = Builder.CreatePHI(IdxTy, 2, "index");
// The loop step is equal to the vectorization factor (num of SIMD elements)
// times the unroll factor (num of SIMD instructions).
Constant *Step = ConstantInt::get(IdxTy, VF * UF);
// This is the IR builder that we use to add all of the logic for bypassing
// the new vector loop.
IRBuilder<> BypassBuilder(BypassBlock->getTerminator());
setDebugLocFromInst(BypassBuilder,
getDebugLocFromInstOrOperands(OldInduction));
// We may need to extend the index in case there is a type mismatch.
// We know that the count starts at zero and does not overflow.
if (Count->getType() != IdxTy) {
// The exit count can be of pointer type. Convert it to the correct
// integer type.
if (ExitCount->getType()->isPointerTy())
Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int");
else
Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast");
}
// Add the start index to the loop count to get the new end index.
Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx");
// Now we need to generate the expression for N - (N % VF), which is
// the part that the vectorized body will execute.
Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf");
Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec");
Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx,
"end.idx.rnd.down");
// Now, compare the new count to zero. If it is zero skip the vector loop and
// jump to the scalar loop.
Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx,
"cmp.zero");
BasicBlock *LastBypassBlock = BypassBlock;
// Generate the code that checks in runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
// faster.
Instruction *MemRuntimeCheck = addRuntimeCheck(Legal,
BypassBlock->getTerminator());
if (MemRuntimeCheck) {
// Create a new block containing the memory check.
BasicBlock *CheckBlock = BypassBlock->splitBasicBlock(MemRuntimeCheck,
"vector.memcheck");
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
LoopBypassBlocks.push_back(CheckBlock);
// Replace the branch into the memory check block with a conditional branch
// for the "few elements case".
Instruction *OldTerm = BypassBlock->getTerminator();
BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
OldTerm->eraseFromParent();
Cmp = MemRuntimeCheck;
LastBypassBlock = CheckBlock;
}
LastBypassBlock->getTerminator()->eraseFromParent();
BranchInst::Create(MiddleBlock, VectorPH, Cmp,
LastBypassBlock);
// We are going to resume the execution of the scalar loop.
// Go over all of the induction variables that we found and fix the
// PHIs that are left in the scalar version of the loop.
// The starting values of PHI nodes depend on the counter of the last
// iteration in the vectorized loop.
// If we come from a bypass edge then we need to start from the original
// start value.
// This variable saves the new starting index for the scalar loop.
PHINode *ResumeIndex = 0;
LoopVectorizationLegality::InductionList::iterator I, E;
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
// Set builder to point to last bypass block.
BypassBuilder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
for (I = List->begin(), E = List->end(); I != E; ++I) {
PHINode *OrigPhi = I->first;
LoopVectorizationLegality::InductionInfo II = I->second;
Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType();
PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val",
MiddleBlock->getTerminator());
// We might have extended the type of the induction variable but we need a
// truncated version for the scalar loop.
PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
MiddleBlock->getTerminator()) : 0;
Value *EndValue = 0;
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
case LoopVectorizationLegality::IK_IntInduction: {
// Handle the integer induction counter.
assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
// We have the canonical induction variable.
if (OrigPhi == OldInduction) {
// Create a truncated version of the resume value for the scalar loop,
// we might have promoted the type to a larger width.
EndValue =
BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
TruncResumeVal->addIncoming(EndValue, VecBody);
// We know what the end value is.
EndValue = IdxEndRoundDown;
// We also know which PHI node holds it.
ResumeIndex = ResumeVal;
break;
}
// Not the canonical induction variable - add the vector loop count to the
// start value.
Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
II.StartValue->getType(),
"cast.crd");
EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end");
break;
}
case LoopVectorizationLegality::IK_ReverseIntInduction: {
// Convert the CountRoundDown variable to the PHI size.
Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
II.StartValue->getType(),
"cast.crd");
// Handle reverse integer induction counter.
EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end");
break;
}
case LoopVectorizationLegality::IK_PtrInduction: {
// For pointer induction variables, calculate the offset using
// the end index.
EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown,
"ptr.ind.end");
break;
}
case LoopVectorizationLegality::IK_ReversePtrInduction: {
// The value at the end of the loop for the reverse pointer is calculated
// by creating a GEP with a negative index starting from the start value.
Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown,
"rev.ind.end");
EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx,
"rev.ptr.ind.end");
break;
}
}// end of case
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) {
if (OrigPhi == OldInduction)
ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
else
ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
}
ResumeVal->addIncoming(EndValue, VecBody);
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
// The old inductions phi node in the scalar body needs the truncated value.
if (OrigPhi == OldInduction)
OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal);
else
OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
}
// If we are generating a new induction variable then we also need to
// generate the code that calculates the exit value. This value is not
// simply the end of the counter because we may skip the vectorized body
// in case of a runtime check.
if (!OldInduction){
assert(!ResumeIndex && "Unexpected resume value found");
ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
MiddleBlock->getTerminator());
for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]);
ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
}
// Make sure that we found the index where scalar loop needs to continue.
assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
"Invalid resume Index");
// Add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop.
// If (N - N%VF) == N, then we *don't* need to run the remainder.
Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
ResumeIndex, "cmp.n",
MiddleBlock->getTerminator());
BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator());
// Remove the old terminator.
MiddleBlock->getTerminator()->eraseFromParent();
// Create i+1 and fill the PHINode.
Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
Induction->addIncoming(StartIdx, VectorPH);
Induction->addIncoming(NextIdx, VecBody);
// Create the compare.
Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown);
Builder.CreateCondBr(ICmp, MiddleBlock, VecBody);
// Now we have two terminators. Remove the old one from the block.
VecBody->getTerminator()->eraseFromParent();
// Get ready to start creating new instructions into the vectorized body.
Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
// Save the state.
LoopVectorPreHeader = VectorPH;
LoopScalarPreHeader = ScalarPH;
LoopMiddleBlock = MiddleBlock;
LoopExitBlock = ExitBlock;
LoopVectorBody = VecBody;
LoopScalarBody = OldBasicBlock;
LoopVectorizeHints Hints(Lp, true);
Hints.setAlreadyVectorized(Lp);
}
/// This function returns the identity element (or neutral element) for
/// the operation K.
Constant*
LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
switch (K) {
case RK_IntegerXor:
case RK_IntegerAdd:
case RK_IntegerOr:
// Adding, Xoring, Oring zero to a number does not change it.
return ConstantInt::get(Tp, 0);
case RK_IntegerMult:
// Multiplying a number by 1 does not change it.
return ConstantInt::get(Tp, 1);
case RK_IntegerAnd:
// AND-ing a number with an all-1 value does not change it.
return ConstantInt::get(Tp, -1, true);
case RK_FloatMult:
// Multiplying a number by 1 does not change it.
return ConstantFP::get(Tp, 1.0L);
case RK_FloatAdd:
// Adding zero to a number does not change it.
return ConstantFP::get(Tp, 0.0L);
default:
llvm_unreachable("Unknown reduction kind");
}
}
static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
Intrinsic::ID ValidIntrinsicID) {
if (I.getNumArgOperands() != 1 ||
!I.getArgOperand(0)->getType()->isFloatingPointTy() ||
I.getType() != I.getArgOperand(0)->getType() ||
!I.onlyReadsMemory())
return Intrinsic::not_intrinsic;
return ValidIntrinsicID;
}
static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
Intrinsic::ID ValidIntrinsicID) {
if (I.getNumArgOperands() != 2 ||
!I.getArgOperand(0)->getType()->isFloatingPointTy() ||
!I.getArgOperand(1)->getType()->isFloatingPointTy() ||
I.getType() != I.getArgOperand(0)->getType() ||
I.getType() != I.getArgOperand(1)->getType() ||
!I.onlyReadsMemory())
return Intrinsic::not_intrinsic;
return ValidIntrinsicID;
}
static Intrinsic::ID
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
// If we have an intrinsic call, check if it is trivially vectorizable.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
switch (II->getIntrinsicID()) {
case Intrinsic::sqrt:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::fabs:
case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
return II->getIntrinsicID();
default:
return Intrinsic::not_intrinsic;
}
}
if (!TLI)
return Intrinsic::not_intrinsic;
LibFunc::Func Func;
Function *F = CI->getCalledFunction();
// We're going to make assumptions on the semantics of the functions, check
// that the target knows that it's available in this environment and it does
// not have local linkage.
if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
return Intrinsic::not_intrinsic;
// Otherwise check if we have a call to a function that can be turned into a
// vector intrinsic.
switch (Func) {
default:
break;
case LibFunc::sin:
case LibFunc::sinf:
case LibFunc::sinl:
return checkUnaryFloatSignature(*CI, Intrinsic::sin);
case LibFunc::cos:
case LibFunc::cosf:
case LibFunc::cosl:
return checkUnaryFloatSignature(*CI, Intrinsic::cos);
case LibFunc::exp:
case LibFunc::expf:
case LibFunc::expl:
return checkUnaryFloatSignature(*CI, Intrinsic::exp);
case LibFunc::exp2:
case LibFunc::exp2f:
case LibFunc::exp2l:
return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
case LibFunc::log:
case LibFunc::logf:
case LibFunc::logl:
return checkUnaryFloatSignature(*CI, Intrinsic::log);
case LibFunc::log10:
case LibFunc::log10f:
case LibFunc::log10l:
return checkUnaryFloatSignature(*CI, Intrinsic::log10);
case LibFunc::log2:
case LibFunc::log2f:
case LibFunc::log2l:
return checkUnaryFloatSignature(*CI, Intrinsic::log2);
case LibFunc::fabs:
case LibFunc::fabsf:
case LibFunc::fabsl:
return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
case LibFunc::copysign:
case LibFunc::copysignf:
case LibFunc::copysignl:
return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
case LibFunc::floor:
case LibFunc::floorf:
case LibFunc::floorl:
return checkUnaryFloatSignature(*CI, Intrinsic::floor);
case LibFunc::ceil:
case LibFunc::ceilf:
case LibFunc::ceill:
return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
case LibFunc::rint:
case LibFunc::rintf:
case LibFunc::rintl:
return checkUnaryFloatSignature(*CI, Intrinsic::rint);
case LibFunc::nearbyint:
case LibFunc::nearbyintf:
case LibFunc::nearbyintl:
return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
case LibFunc::round:
case LibFunc::roundf:
case LibFunc::roundl:
return checkUnaryFloatSignature(*CI, Intrinsic::round);
case LibFunc::pow:
case LibFunc::powf:
case LibFunc::powl:
return checkBinaryFloatSignature(*CI, Intrinsic::pow);
}
return Intrinsic::not_intrinsic;
}
/// This function translates the reduction kind to an LLVM binary operator.
static unsigned
getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
switch (Kind) {
case LoopVectorizationLegality::RK_IntegerAdd:
return Instruction::Add;
case LoopVectorizationLegality::RK_IntegerMult:
return Instruction::Mul;
case LoopVectorizationLegality::RK_IntegerOr:
return Instruction::Or;
case LoopVectorizationLegality::RK_IntegerAnd:
return Instruction::And;
case LoopVectorizationLegality::RK_IntegerXor:
return Instruction::Xor;
case LoopVectorizationLegality::RK_FloatMult:
return Instruction::FMul;
case LoopVectorizationLegality::RK_FloatAdd:
return Instruction::FAdd;
case LoopVectorizationLegality::RK_IntegerMinMax:
return Instruction::ICmp;
case LoopVectorizationLegality::RK_FloatMinMax:
return Instruction::FCmp;
default:
llvm_unreachable("Unknown reduction operation");
}
}
Value *createMinMaxOp(IRBuilder<> &Builder,
LoopVectorizationLegality::MinMaxReductionKind RK,
Value *Left,
Value *Right) {
CmpInst::Predicate P = CmpInst::ICMP_NE;
switch (RK) {
default:
llvm_unreachable("Unknown min/max reduction kind");
case LoopVectorizationLegality::MRK_UIntMin:
P = CmpInst::ICMP_ULT;
break;
case LoopVectorizationLegality::MRK_UIntMax:
P = CmpInst::ICMP_UGT;
break;
case LoopVectorizationLegality::MRK_SIntMin:
P = CmpInst::ICMP_SLT;
break;
case LoopVectorizationLegality::MRK_SIntMax:
P = CmpInst::ICMP_SGT;
break;
case LoopVectorizationLegality::MRK_FloatMin:
P = CmpInst::FCMP_OLT;
break;
case LoopVectorizationLegality::MRK_FloatMax:
P = CmpInst::FCMP_OGT;
break;
}
Value *Cmp;
if (RK == LoopVectorizationLegality::MRK_FloatMin ||
RK == LoopVectorizationLegality::MRK_FloatMax)
Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
else
Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
return Select;
}
namespace {
struct CSEDenseMapInfo {
static bool canHandle(Instruction *I) {
return isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
isa<ShuffleVectorInst>(I) || isa<GetElementPtrInst>(I);
}
static inline Instruction *getEmptyKey() {
return DenseMapInfo<Instruction *>::getEmptyKey();
}
static inline Instruction *getTombstoneKey() {
return DenseMapInfo<Instruction *>::getTombstoneKey();
}
static unsigned getHashValue(Instruction *I) {
assert(canHandle(I) && "Unknown instruction!");
return hash_combine(I->getOpcode(), hash_combine_range(I->value_op_begin(),
I->value_op_end()));
}
static bool isEqual(Instruction *LHS, Instruction *RHS) {
if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
LHS == getTombstoneKey() || RHS == getTombstoneKey())
return LHS == RHS;
return LHS->isIdenticalTo(RHS);
}
};
}
///\brief Perform cse of induction variable instructions.
static void cse(BasicBlock *BB) {
// Perform simple cse.
SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *In = I++;
if (!CSEDenseMapInfo::canHandle(In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
if (Instruction *V = CSEMap.lookup(In)) {
In->replaceAllUsesWith(V);
In->eraseFromParent();
continue;
}
CSEMap[In] = In;
}
}
void
InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
//===------------------------------------------------===//
//
// Notice: any optimization or new instruction that go
// into the code below should be also be implemented in
// the cost-model.
//
//===------------------------------------------------===//
Constant *Zero = Builder.getInt32(0);
// In order to support reduction variables we need to be able to vectorize
// Phi nodes. Phi nodes have cycles, so we need to vectorize them in two
// stages. First, we create a new vector PHI node with no incoming edges.
// We use this value when we vectorize all of the instructions that use the
// PHI. Next, after all of the instructions in the block are complete we
// add the new incoming edges to the PHI. At this point all of the
// instructions in the basic block are vectorized, so we can use them to
// construct the PHI.
PhiVector RdxPHIsToFix;
// Scan the loop in a topological order to ensure that defs are vectorized
// before users.
LoopBlocksDFS DFS(OrigLoop);
DFS.perform(LI);
// Vectorize all of the blocks in the original loop.
for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
be = DFS.endRPO(); bb != be; ++bb)
vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
// At this point every instruction in the original loop is widened to
// a vector form. We are almost done. Now, we need to fix the PHI nodes
// that we vectorized. The PHI nodes are currently empty because we did
// not want to introduce cycles. Notice that the remaining PHI nodes
// that we need to fix are reduction variables.
// Create the 'reduced' values for each of the induction vars.
// The reduced values are the vector values that we scalarize and combine
// after the loop is finished.
for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
it != e; ++it) {
PHINode *RdxPhi = *it;
assert(RdxPhi && "Unable to recover vectorized PHI");
// Find the reduction variable descriptor.
assert(Legal->getReductionVars()->count(RdxPhi) &&
"Unable to find the reduction variable");
LoopVectorizationLegality::ReductionDescriptor RdxDesc =
(*Legal->getReductionVars())[RdxPhi];
setDebugLocFromInst(Builder, RdxDesc.StartValue);
// We need to generate a reduction vector from the incoming scalar.
// To do so, we need to generate the 'identity' vector and overide
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
// This is the vector-clone of the value that leaves the loop.
VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
Type *VecTy = VectorExit[0]->getType();
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
Value *Identity;
Value *VectorStart;
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
// MinMax reduction have the start value as their identify.
if (VF == 1) {
VectorStart = Identity = RdxDesc.StartValue;
} else {
VectorStart = Identity = Builder.CreateVectorSplat(VF,
RdxDesc.StartValue,
"minmax.ident");
}
} else {
// Handle other reduction kinds:
Constant *Iden =
LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
VecTy->getScalarType());
if (VF == 1) {
Identity = Iden;
// This vector is the Identity vector where the first element is the
// incoming scalar reduction.
VectorStart = RdxDesc.StartValue;
} else {
Identity = ConstantVector::getSplat(VF, Iden);
// This vector is the Identity vector where the first element is the
// incoming scalar reduction.
VectorStart = Builder.CreateInsertElement(Identity,
RdxDesc.StartValue, Zero);
}
}
// Fix the vector-loop phi.
// We created the induction variable so we know that the
// preheader is the first entry.
BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
// Reductions do not have to start at zero. They can start with
// any loop invariant values.
VectorParts &VecRdxPhi = WidenMap.get(RdxPhi);
BasicBlock *Latch = OrigLoop->getLoopLatch();
Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
VectorParts &Val = getVectorValue(LoopVal);
for (unsigned part = 0; part < UF; ++part) {
// Make sure to add the reduction stat value only to the
// first unroll part.
Value *StartVal = (part == 0) ? VectorStart : Identity;
cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
}
// Before each round, move the insertion point right between
// the PHIs and the values we are going to write.
// This allows us to write both PHINodes and the extractelement
// instructions.
Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
VectorParts RdxParts;
setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
for (unsigned part = 0; part < UF; ++part) {
// This PHINode contains the vectorized reduction variable, or
// the initial value vector, if we bypass the vector loop.
VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
Value *StartVal = (part == 0) ? VectorStart : Identity;
for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
RdxParts.push_back(NewPhi);
}
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
unsigned Op = getReductionBinOp(RdxDesc.Kind);
setDebugLocFromInst(Builder, ReducedPartRdx);
for (unsigned part = 1; part < UF; ++part) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
RdxParts[part], ReducedPartRdx,
"bin.rdx");
else
ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
ReducedPartRdx, RdxParts[part]);
}
if (VF > 1) {
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each
// round.
assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!");
Value *TmpVec = ReducedPartRdx;
SmallVector<Constant*, 32> ShuffleMask(VF, 0);
for (unsigned i = VF; i != 1; i >>= 1) {
// Move the upper half of the vector to the lower half.
for (unsigned j = 0; j != i/2; ++j)
ShuffleMask[j] = Builder.getInt32(i/2 + j);
// Fill the rest of the mask with undef.
std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
UndefValue::get(Builder.getInt32Ty()));
Value *Shuf =
Builder.CreateShuffleVector(TmpVec,
UndefValue::get(TmpVec->getType()),
ConstantVector::get(ShuffleMask),
"rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
"bin.rdx");
else
TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
}
// The result is in the first element of the vector.
ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
Builder.getInt32(0));
}
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
// We know that the loop is in LCSSA form. We need to update the
// PHI nodes in the exit blocks.
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
if (!LCSSAPhi) break;
// All PHINodes need to have a single entry edge, or two if
// we already fixed them.
assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
// We found our reduction value exit-PHI. Update it with the
// incoming bypass edge.
if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
// Add an edge coming from the bypass.
LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
break;
}
}// end of the LCSSA phi scan.
// Fix the scalar loop reduction variable with the incoming reduction sum
// from the vector body and from the backedge value.
int IncomingEdgeBlockIdx =
(RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
// Pick the other block.
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
(RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
}// end of for each redux variable.
fixLCSSAPHIs();
// Remove redundant induction instructions.
cse(LoopVectorBody);
}
void InnerLoopVectorizer::fixLCSSAPHIs() {
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
if (!LCSSAPhi) break;
if (LCSSAPhi->getNumIncomingValues() == 1)
LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
LoopMiddleBlock);
}
}
InnerLoopVectorizer::VectorParts
InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) &&
"Invalid edge");
// Look for cached value.
std::pair<BasicBlock*, BasicBlock*> Edge(Src, Dst);
EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge);
if (ECEntryIt != MaskCache.end())
return ECEntryIt->second;
VectorParts SrcMask = createBlockInMask(Src);
// The terminator has to be a branch inst!
BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
assert(BI && "Unexpected terminator found");
if (BI->isConditional()) {
VectorParts EdgeMask = getVectorValue(BI->getCondition());
if (BI->getSuccessor(0) != Dst)
for (unsigned part = 0; part < UF; ++part)
EdgeMask[part] = Builder.CreateNot(EdgeMask[part]);
for (unsigned part = 0; part < UF; ++part)
EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]);
MaskCache[Edge] = EdgeMask;
return EdgeMask;
}
MaskCache[Edge] = SrcMask;
return SrcMask;
}
InnerLoopVectorizer::VectorParts
InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
// Loop incoming mask is all-one.
if (OrigLoop->getHeader() == BB) {
Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
return getVectorValue(C);
}
// This is the block mask. We OR all incoming edges, and with zero.
Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0);
VectorParts BlockMask = getVectorValue(Zero);
// For each pred:
for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) {
VectorParts EM = createEdgeMask(*it, BB);
for (unsigned part = 0; part < UF; ++part)
BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]);
}
return BlockMask;
}
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
InnerLoopVectorizer::VectorParts &Entry,
LoopVectorizationLegality *Legal,
unsigned UF, unsigned VF, PhiVector *PV) {
PHINode* P = cast<PHINode>(PN);
// Handle reduction variables:
if (Legal->getReductionVars()->count(P)) {
for (unsigned part = 0; part < UF; ++part) {
// This is phase one of vectorizing PHIs.
Type *VecTy = (VF == 1) ? PN->getType() :
VectorType::get(PN->getType(), VF);
Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
LoopVectorBody-> getFirstInsertionPt());
}
PV->push_back(P);
return;
}
setDebugLocFromInst(Builder, P);
// Check for PHI nodes that are lowered to vector selects.
if (P->getParent() != OrigLoop->getHeader()) {
// We know that all PHIs in non header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
// At this point we generate the predication tree. There may be
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
unsigned NumIncoming = P->getNumIncomingValues();
// Generate a sequence of selects of the form:
// SELECT(Mask3, In3,
// SELECT(Mask2, In2,
// ( ...)))
for (unsigned In = 0; In < NumIncoming; In++) {
VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
P->getParent());
VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
for (unsigned part = 0; part < UF; ++part) {
// We might have single edge PHIs (blocks) - use an identity
// 'select' for the first PHI operand.
if (In == 0)
Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
In0[part]);
else
// Select between the current value and the previous incoming edge
// based on the incoming mask.
Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
Entry[part], "predphi");
}
}
return;
}
// This PHINode must be an induction variable.
// Make sure that we know about it.
assert(Legal->getInductionVars()->count(P) &&
"Not an induction variable");
LoopVectorizationLegality::InductionInfo II =
Legal->getInductionVars()->lookup(P);
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
case LoopVectorizationLegality::IK_IntInduction: {
assert(P->getType() == II.StartValue->getType() && "Types must match");
Type *PhiTy = P->getType();
Value *Broadcasted;
if (P == OldInduction) {
// Handle the canonical induction variable. We might have had to
// extend the type.
Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
} else {
// Handle other induction variables that are now based on the
// canonical one.
Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
"normalized.idx");
NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
"offset.idx");
}
Broadcasted = getBroadcastInstrs(Broadcasted);
// After broadcasting the induction variable we need to make the vector
// consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part)
Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
return;
}
case LoopVectorizationLegality::IK_ReverseIntInduction:
case LoopVectorizationLegality::IK_PtrInduction:
case LoopVectorizationLegality::IK_ReversePtrInduction:
// Handle reverse integer and pointer inductions.
Value *StartIdx = ExtendedIdx;
// This is the normalized GEP that starts counting at zero.
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
"normalized.idx");
// Handle the reverse integer induction variable case.
if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
"resize.norm.idx");
Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
"reverse.idx");
// This is a new value so do not hoist it out.
Value *Broadcasted = getBroadcastInstrs(ReverseInd);
// After broadcasting the induction variable we need to make the
// vector consecutive by adding ... -3, -2, -1, 0.
for (unsigned part = 0; part < UF; ++part)
Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
true);
return;
}
// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");
// Is this a reverse induction ptr or a consecutive induction ptr.
bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
II.IK);
// This is the vector of results. Notice that we don't generate
// vector geps because scalar geps result in better code.
for (unsigned part = 0; part < UF; ++part) {
if (VF == 1) {
int EltIndex = (part) * (Reverse ? -1 : 1);
Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
Value *GlobalIdx;
if (Reverse)
GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
else
GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
"next.gep");
Entry[part] = SclrGep;
continue;
}
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
for (unsigned int i = 0; i < VF; ++i) {
int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
Value *GlobalIdx;
if (!Reverse)
GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
else
GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
"next.gep");
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
Builder.getInt32(i),
"insert.gep");
}
Entry[part] = VecVal;
}
return;
}
}
void
InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
BasicBlock *BB, PhiVector *PV) {
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
VectorParts &Entry = WidenMap.get(it);
switch (it->getOpcode()) {
case Instruction::Br:
// Nothing to do for PHIs and BR, since we already took care of the
// loop control flow instructions.
continue;
case Instruction::PHI:{
// Vectorize PHINodes.
widenPHIInstruction(it, Entry, Legal, UF, VF, PV);
continue;
}// End of PHI.
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::FDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
// Just widen binops.
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it);
setDebugLocFromInst(Builder, BinOp);
VectorParts &A = getVectorValue(it->getOperand(0));
VectorParts &B = getVectorValue(it->getOperand(1));
// Use this vector value for all users of the original instruction.
for (unsigned Part = 0; Part < UF; ++Part) {
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
// Update the NSW, NUW and Exact flags. Notice: V can be an Undef.
BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V);
if (VecOp && isa<OverflowingBinaryOperator>(BinOp)) {
VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
}
if (VecOp && isa<PossiblyExactOperator>(VecOp))
VecOp->setIsExact(BinOp->isExact());
Entry[Part] = V;
}
break;
}
case Instruction::Select: {
// Widen selects.
// If the selector is loop invariant we can create a select
// instruction with a scalar condition. Otherwise, use vector-select.
bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
OrigLoop);
setDebugLocFromInst(Builder, it);
// The condition can be loop invariant but still defined inside the
// loop. This means that we can't just use the original 'cond' value.
// We have to take the 'vectorized' value and pick the first lane.
// Instcombine will make this a no-op.
VectorParts &Cond = getVectorValue(it->getOperand(0));
VectorParts &Op0 = getVectorValue(it->getOperand(1));
VectorParts &Op1 = getVectorValue(it->getOperand(2));
Value *ScalarCond = (VF == 1) ? Cond[0] :
Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
for (unsigned Part = 0; Part < UF; ++Part) {
Entry[Part] = Builder.CreateSelect(
InvariantCond ? ScalarCond : Cond[Part],
Op0[Part],
Op1[Part]);
}
break;
}
case Instruction::ICmp:
case Instruction::FCmp: {
// Widen compares. Generate vector compares.
bool FCmp = (it->getOpcode() == Instruction::FCmp);
CmpInst *Cmp = dyn_cast<CmpInst>(it);
setDebugLocFromInst(Builder, it);
VectorParts &A = getVectorValue(it->getOperand(0));
VectorParts &B = getVectorValue(it->getOperand(1));
for (unsigned Part = 0; Part < UF; ++Part) {
Value *C = 0;
if (FCmp)
C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
else
C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
Entry[Part] = C;
}
break;
}
case Instruction::Store:
case Instruction::Load:
vectorizeMemoryInstruction(it, Legal);
break;
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
CastInst *CI = dyn_cast<CastInst>(it);
setDebugLocFromInst(Builder, it);
/// Optimize the special case where the source is the induction
/// variable. Notice that we can only optimize the 'trunc' case
/// because: a. FP conversions lose precision, b. sext/zext may wrap,
/// c. other casts depend on pointer size.
if (CI->getOperand(0) == OldInduction &&
it->getOpcode() == Instruction::Trunc) {
Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
CI->getType());
Value *Broadcasted = getBroadcastInstrs(ScalarCast);
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false);
break;
}
/// Vectorize casts.
Type *DestTy = (VF == 1) ? CI->getType() :
VectorType::get(CI->getType(), VF);
VectorParts &A = getVectorValue(it->getOperand(0));
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
break;
}
case Instruction::Call: {
// Ignore dbg intrinsics.
if (isa<DbgInfoIntrinsic>(it))
break;
setDebugLocFromInst(Builder, it);
Module *M = BB->getParent()->getParent();
CallInst *CI = cast<CallInst>(it);
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
assert(ID && "Not an intrinsic call!");
switch (ID) {
case Intrinsic::lifetime_end:
case Intrinsic::lifetime_start:
scalarizeInstruction(it);
break;
default:
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value *, 4> Args;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
Args.push_back(Arg[Part]);
}
Type *Tys[] = {CI->getType()};
if (VF > 1)
Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF);
Function *F = Intrinsic::getDeclaration(M, ID, Tys);
Entry[Part] = Builder.CreateCall(F, Args);
}
break;
}
break;
}
default:
// All other instructions are unsupported. Scalarize them.
scalarizeInstruction(it);
break;
}// end of switch.
}// end of for_each instr.
}
void InnerLoopVectorizer::updateAnalysis() {
// Forget the original basic block.
SE->forgetLoop(OrigLoop);
// Update the dominator tree information.
assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
"Entry does not dominate exit.");
for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader);
DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front());
DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock);
DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
DEBUG(DT->verifyAnalysis());
}
/// \brief Check whether it is safe to if-convert this phi node.
///
/// Phi nodes with constant expressions that can trap are not safe to if
/// convert.
static bool canIfConvertPHINodes(BasicBlock *BB) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
PHINode *Phi = dyn_cast<PHINode>(I);
if (!Phi)
return true;
for (unsigned p = 0, e = Phi->getNumIncomingValues(); p != e; ++p)
if (Constant *C = dyn_cast<Constant>(Phi->getIncomingValue(p)))
if (C->canTrap())
return false;
}
return true;
}
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion)
return false;
assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
// A list of pointers that we can safely read and write to.
SmallPtrSet<Value *, 8> SafePointes;
// Collect safe addresses.
for (Loop::block_iterator BI = TheLoop->block_begin(),
BE = TheLoop->block_end(); BI != BE; ++BI) {
BasicBlock *BB = *BI;
if (blockNeedsPredication(BB))
continue;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
if (LoadInst *LI = dyn_cast<LoadInst>(I))
SafePointes.insert(LI->getPointerOperand());
else if (StoreInst *SI = dyn_cast<StoreInst>(I))
SafePointes.insert(SI->getPointerOperand());
}
}
// Collect the blocks that need predication.
BasicBlock *Header = TheLoop->getHeader();
for (Loop::block_iterator BI = TheLoop->block_begin(),
BE = TheLoop->block_end(); BI != BE; ++BI) {
BasicBlock *BB = *BI;
// We don't support switch statements inside loops.
if (!isa<BranchInst>(BB->getTerminator()))
return false;
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB)) {
if (!blockCanBePredicated(BB, SafePointes))
return false;
} else if (BB != Header && !canIfConvertPHINodes(BB))
return false;
}
// We can if-convert this loop.
return true;
}
bool LoopVectorizationLegality::canVectorize() {
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
if (!TheLoop->getLoopPreheader())
return false;
// We can only vectorize innermost loops.
if (TheLoop->getSubLoopsVector().size())
return false;
// We must have a single backedge.
if (TheLoop->getNumBackEdges() != 1)
return false;
// We must have a single exiting block.
if (!TheLoop->getExitingBlock())
return false;
// We need to have a loop header.
DEBUG(dbgs() << "LV: Found a loop: " <<
TheLoop->getHeader()->getName() << '\n');
// Check if we can if-convert non single-bb loops.
unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
return false;
}
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
if (ExitCount == SE->getCouldNotCompute()) {
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
return false;
}
// Do not loop-vectorize loops with a tiny trip count.
BasicBlock *Latch = TheLoop->getLoopLatch();
unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
if (TC > 0u && TC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
"This loop is not worth vectorizing.\n");
return false;
}
// Check if we can vectorize the instructions and CFG in this loop.
if (!canVectorizeInstrs()) {
DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
return false;
}
// Go over each instruction and look at memory deps.
if (!canVectorizeMemory()) {
DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
return false;
}
// Collect all of the variables that remain uniform after vectorization.
collectLoopUniforms();
DEBUG(dbgs() << "LV: We can vectorize this loop" <<
(PtrRtCheck.Need ? " (with a runtime bound check)" : "")
<<"!\n");
// Okay! We can vectorize. At this point we don't have any other mem analysis
// which may limit our maximum vectorization factor, so just return true with
// no restrictions.
return true;
}
static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
if (Ty->isPointerTy())
return DL.getIntPtrType(Ty);
// It is possible that char's or short's overflow when we ask for the loop's
// trip count, work around this by changing the type size.
if (Ty->getScalarSizeInBits() < 32)
return Type::getInt32Ty(Ty->getContext());
return Ty;
}
static Type* getWiderType(DataLayout &DL, Type *Ty0, Type *Ty1) {
Ty0 = convertPointerToIntegerType(DL, Ty0);
Ty1 = convertPointerToIntegerType(DL, Ty1);
if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
return Ty0;
return Ty1;
}
/// \brief Check that the instruction has outside loop users and is not an
/// identified reduction variable.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
SmallPtrSet<Value *, 4> &Reductions) {
// Reduction instructions are allowed to have exit users. All other
// instructions must not have external users.
if (!Reductions.count(Inst))
//Check that all of the users of the loop are inside the BB.
for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
I != E; ++I) {
Instruction *U = cast<Instruction>(*I);
// This user may be a reduction exit value.
if (!TheLoop->contains(U)) {
DEBUG(dbgs() << "LV: Found an outside user for : " << *U << '\n');
return true;
}
}
return false;
}
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
// Look for the attribute signaling the absence of NaNs.
Function &F = *Header->getParent();
if (F.hasFnAttribute("no-nans-fp-math"))
HasFunNoNaNAttr = F.getAttributes().getAttribute(
AttributeSet::FunctionIndex,
"no-nans-fp-math").getValueAsString() == "true";
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
// Scan the instructions in the block and look for hazards.
for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
++it) {
if (PHINode *Phi = dyn_cast<PHINode>(it)) {
Type *PhiTy = Phi->getType();
// Check that this PHI type is allowed.
if (!PhiTy->isIntegerTy() &&
!PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
}
// If this PHINode is not in the header block, then we know that we
// can convert it to select during if-conversion. No need to check if
// the PHIs in this block are induction or reduction variables.
if (*bb != Header) {
// Check that this instruction has no outside users or is an
// identified reduction value with an outside user.
if(!hasOutsideLoopUser(TheLoop, it, AllowedExit))
continue;
return false;
}
// We only allow if-converted PHIs with more than two incoming values.
if (Phi->getNumIncomingValues() != 2) {
DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
return false;
}
// This is the value coming from the preheader.
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
// Check if this is an induction variable.
InductionKind IK = isInductionVariable(Phi);
if (IK_NoInduction != IK) {
// Get the widest type.
if (!WidestIndTy)
WidestIndTy = convertPointerToIntegerType(*DL, PhiTy);
else
WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
// Int inductions are special because we only allow one IV.
if (IK == IK_IntInduction) {
// Use the phi node with the widest type as induction. Use the last
// one if there are multiple (no good reason for doing this other
// than it is expedient).
if (!Induction || PhiTy == WidestIndTy)
Induction = Phi;
}
DEBUG(dbgs() << "LV: Found an induction variable.\n");
Inductions[Phi] = InductionInfo(StartValue, IK);
// Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop.
if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
return false;
continue;
}
if (AddReductionVar(Phi, RK_IntegerAdd)) {
DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
continue;
}
if (AddReductionVar(Phi, RK_IntegerMult)) {
DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
continue;
}
if (AddReductionVar(Phi, RK_IntegerOr)) {
DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
continue;
}
if (AddReductionVar(Phi, RK_IntegerAnd)) {
DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
continue;
}
if (AddReductionVar(Phi, RK_IntegerXor)) {
DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
continue;
}
if (AddReductionVar(Phi, RK_IntegerMinMax)) {
DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n");
continue;
}
if (AddReductionVar(Phi, RK_FloatMult)) {
DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
continue;
}
if (AddReductionVar(Phi, RK_FloatAdd)) {
DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
continue;
}
if (AddReductionVar(Phi, RK_FloatMinMax)) {
DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<
"\n");
continue;
}
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
}// end of PHI handling
// We still don't handle functions. However, we can ignore dbg intrinsic
// calls and we do handle certain intrinsic and libm functions.
CallInst *CI = dyn_cast<CallInst>(it);
if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
DEBUG(dbgs() << "LV: Found a call site.\n");
return false;
}
// Check that the instruction return type is vectorizable.
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(it->getType()) &&
!it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
return false;
}
// Check that the stored type is vectorizable.
if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
Type *T = ST->getValueOperand()->getType();
if (!VectorType::isValidElementType(T))
return false;
}
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
return false;
} // next instr.
}
if (!Induction) {
DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
if (Inductions.empty())
return false;
}
return true;
}
void LoopVectorizationLegality::collectLoopUniforms() {
// We now know that the loop is vectorizable!
// Collect variables that will remain uniform after vectorization.
std::vector<Value*> Worklist;
BasicBlock *Latch = TheLoop->getLoopLatch();
// Start with the conditional branch and walk up the block.
Worklist.push_back(Latch->getTerminator()->getOperand(0));
while (Worklist.size()) {
Instruction *I = dyn_cast<Instruction>(Worklist.back());
Worklist.pop_back();
// Look at instructions inside this loop.
// Stop when reaching PHI nodes.
// TODO: we need to follow values all over the loop, not only in this block.
if (!I || !TheLoop->contains(I) || isa<PHINode>(I))
continue;
// This is a known uniform.
Uniforms.insert(I);
// Insert all operands.
Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
}
}
namespace {
/// \brief Analyses memory accesses in a loop.
///
/// Checks whether run time pointer checks are needed and builds sets for data
/// dependence checking.
class AccessAnalysis {
public:
/// \brief Read or write access location.
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
/// \brief Set of potential dependent memory accesses.
typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
AccessAnalysis(DataLayout *Dl, DepCandidates &DA) :
DL(Dl), DepCands(DA), AreAllWritesIdentified(true),
AreAllReadsIdentified(true), IsRTCheckNeeded(false) {}
/// \brief Register a load and whether it is only read from.
void addLoad(Value *Ptr, bool IsReadOnly) {
Accesses.insert(MemAccessInfo(Ptr, false));
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
}
/// \brief Register a store.
void addStore(Value *Ptr) {
Accesses.insert(MemAccessInfo(Ptr, true));
}
/// \brief Check whether we can check the pointers at runtime for
/// non-intersection.
bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
unsigned &NumComparisons, ScalarEvolution *SE,
Loop *TheLoop, bool ShouldCheckStride = false);
/// \brief Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
void buildDependenceSets() {
// Process read-write pointers first.
processMemAccesses(false);
// Next, process read pointers.
processMemAccesses(true);
}
bool isRTCheckNeeded() { return IsRTCheckNeeded; }
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
void resetDepChecks() { CheckDeps.clear(); }
MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
private:
typedef SetVector<MemAccessInfo> PtrAccessSet;
typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
/// \brief Go over all memory access or only the deferred ones if
/// \p UseDeferred is true and check whether runtime pointer checks are needed
/// and build sets of dependency check candidates.
void processMemAccesses(bool UseDeferred);
/// Set of all accesses.
PtrAccessSet Accesses;
/// Set of access to check after all writes have been processed.
PtrAccessSet DeferredAccesses;
/// Map of pointers to last access encountered.
UnderlyingObjToAccessMap ObjToLastAccess;
/// Set of accesses that need a further dependence check.
MemAccessInfoSet CheckDeps;
/// Set of pointers that are read only.
SmallPtrSet<Value*, 16> ReadOnlyPtr;
/// Set of underlying objects already written to.
SmallPtrSet<Value*, 16> WriteObjects;
DataLayout *DL;
/// Sets of potentially dependent accesses - members of one set share an
/// underlying pointer. The set "CheckDeps" identfies which sets really need a
/// dependence check.
DepCandidates &DepCands;
bool AreAllWritesIdentified;
bool AreAllReadsIdentified;
bool IsRTCheckNeeded;
};
} // end anonymous namespace
/// \brief Check whether a pointer can participate in a runtime bounds check.
static bool hasComputableBounds(ScalarEvolution *SE, Value *Ptr) {
const SCEV *PtrScev = SE->getSCEV(Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR)
return false;
return AR->isAffine();
}
/// \brief Check the stride of the pointer and ensure that it does not wrap in
/// the address space.
static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
const Loop *Lp);
bool AccessAnalysis::canCheckPtrAtRT(
LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
unsigned &NumComparisons, ScalarEvolution *SE,
Loop *TheLoop, bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
unsigned NumReadPtrChecks = 0;
unsigned NumWritePtrChecks = 0;
bool CanDoRT = true;
bool IsDepCheckNeeded = isDependencyCheckNeeded();
// We assign consecutive id to access from different dependence sets.
// Accesses within the same set don't need a runtime check.
unsigned RunningDepId = 1;
DenseMap<Value *, unsigned> DepSetId;
for (PtrAccessSet::iterator AI = Accesses.begin(), AE = Accesses.end();
AI != AE; ++AI) {
const MemAccessInfo &Access = *AI;
Value *Ptr = Access.getPointer();
bool IsWrite = Access.getInt();
// Just add write checks if we have both.
if (!IsWrite && Accesses.count(MemAccessInfo(Ptr, true)))
continue;
if (IsWrite)
++NumWritePtrChecks;
else
++NumReadPtrChecks;
if (hasComputableBounds(SE, Ptr) &&
// When we run after a failing dependency check we have to make sure we
// don't have wrapping pointers.
(!ShouldCheckStride || isStridedPtr(SE, DL, Ptr, TheLoop) == 1)) {
// The id of the dependence set.
unsigned DepId;
if (IsDepCheckNeeded) {
Value *Leader = DepCands.getLeaderValue(Access).getPointer();
unsigned &LeaderId = DepSetId[Leader];
if (!LeaderId)
LeaderId = RunningDepId++;
DepId = LeaderId;
} else
// Each access has its own dependence set.
DepId = RunningDepId++;
RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
} else {
CanDoRT = false;
}
}
if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
NumComparisons = 0; // Only one dependence set.
else {
NumComparisons = (NumWritePtrChecks * (NumReadPtrChecks +
NumWritePtrChecks - 1));
}
// If the pointers that we would use for the bounds comparison have different
// address spaces, assume the values aren't directly comparable, so we can't
// use them for the runtime check. We also have to assume they could
// overlap. In the future there should be metadata for whether address spaces
// are disjoint.
unsigned NumPointers = RtCheck.Pointers.size();
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i + 1; j < NumPointers; ++j) {
// Only need to check pointers between two different dependency sets.
if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
continue;
Value *PtrI = RtCheck.Pointers[i];
Value *PtrJ = RtCheck.Pointers[j];
unsigned ASi = PtrI->getType()->getPointerAddressSpace();
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
if (ASi != ASj) {
DEBUG(dbgs() << "LV: Runtime check would require comparison between"
" different address spaces\n");
return false;
}
}
}
return CanDoRT;
}
static bool isFunctionScopeIdentifiedObject(Value *Ptr) {
return isNoAliasArgument(Ptr) || isNoAliasCall(Ptr) || isa<AllocaInst>(Ptr);
}
void AccessAnalysis::processMemAccesses(bool UseDeferred) {
// We process the set twice: first we process read-write pointers, last we
// process read-only pointers. This allows us to skip dependence tests for
// read-only pointers.
PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
for (PtrAccessSet::iterator AI = S.begin(), AE = S.end(); AI != AE; ++AI) {
const MemAccessInfo &Access = *AI;
Value *Ptr = Access.getPointer();
bool IsWrite = Access.getInt();
DepCands.insert(Access);
// Memorize read-only pointers for later processing and skip them in the
// first round (they need to be checked after we have seen all write
// pointers). Note: we also mark pointer that are not consecutive as
// "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need the
// second check for "!IsWrite".
bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
if (!UseDeferred && IsReadOnlyPtr) {
DeferredAccesses.insert(Access);
continue;
}
bool NeedDepCheck = false;
// Check whether there is the possiblity of dependency because of underlying
// objects being the same.
typedef SmallVector<Value*, 16> ValueVector;
ValueVector TempObjects;
GetUnderlyingObjects(Ptr, TempObjects, DL);
for (ValueVector::iterator UI = TempObjects.begin(), UE = TempObjects.end();
UI != UE; ++UI) {
Value *UnderlyingObj = *UI;
// If this is a write then it needs to be an identified object. If this a
// read and all writes (so far) are identified function scope objects we
// don't need an identified underlying object but only an Argument (the
// next write is going to invalidate this assumption if it is
// unidentified).
// This is a micro-optimization for the case where all writes are
// identified and we have one argument pointer.
// Otherwise, we do need a runtime check.
if ((IsWrite && !isFunctionScopeIdentifiedObject(UnderlyingObj)) ||
(!IsWrite && (!AreAllWritesIdentified ||
!isa<Argument>(UnderlyingObj)) &&
!isIdentifiedObject(UnderlyingObj))) {
DEBUG(dbgs() << "LV: Found an unidentified " <<
(IsWrite ? "write" : "read" ) << " ptr: " << *UnderlyingObj <<
"\n");
IsRTCheckNeeded = (IsRTCheckNeeded ||
!isIdentifiedObject(UnderlyingObj) ||
!AreAllReadsIdentified);
if (IsWrite)
AreAllWritesIdentified = false;
if (!IsWrite)
AreAllReadsIdentified = false;
}
// If this is a write - check other reads and writes for conflicts. If
// this is a read only check other writes for conflicts (but only if there
// is no other write to the ptr - this is an optimization to catch "a[i] =
// a[i] + " without having to do a dependence check).
if ((IsWrite || IsReadOnlyPtr) && WriteObjects.count(UnderlyingObj))
NeedDepCheck = true;
if (IsWrite)
WriteObjects.insert(UnderlyingObj);
// Create sets of pointers connected by shared underlying objects.
UnderlyingObjToAccessMap::iterator Prev =
ObjToLastAccess.find(UnderlyingObj);
if (Prev != ObjToLastAccess.end())
DepCands.unionSets(Access, Prev->second);
ObjToLastAccess[UnderlyingObj] = Access;
}
if (NeedDepCheck)
CheckDeps.insert(Access);
}
}
namespace {
/// \brief Checks memory dependences among accesses to the same underlying
/// object to determine whether there vectorization is legal or not (and at
/// which vectorization factor).
///
/// This class works under the assumption that we already checked that memory
/// locations with different underlying pointers are "must-not alias".
/// We use the ScalarEvolution framework to symbolically evalutate access
/// functions pairs. Since we currently don't restructure the loop we can rely
/// on the program order of memory accesses to determine their safety.
/// At the moment we will only deem accesses as safe for:
/// * A negative constant distance assuming program order.
///
/// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
/// a[i] = tmp; y = a[i];
///
/// The latter case is safe because later checks guarantuee that there can't
/// be a cycle through a phi node (that is, we check that "x" and "y" is not
/// the same variable: a header phi can only be an induction or a reduction, a
/// reduction can't have a memory sink, an induction can't have a memory
/// source). This is important and must not be violated (or we have to
/// resort to checking for cycles through memory).
///
/// * A positive constant distance assuming program order that is bigger
/// than the biggest memory access.
///
/// tmp = a[i] OR b[i] = x
/// a[i+2] = tmp y = b[i+2];
///
/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
///
/// * Zero distances and all accesses have the same size.
///
class MemoryDepChecker {
public:
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L)
: SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
ShouldRetryWithRuntimeCheck(false) {}
/// \brief Register the location (instructions are given increasing numbers)
/// of a write access.
void addAccess(StoreInst *SI) {
Value *Ptr = SI->getPointerOperand();
Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
InstMap.push_back(SI);
++AccessIdx;
}
/// \brief Register the location (instructions are given increasing numbers)
/// of a write access.
void addAccess(LoadInst *LI) {
Value *Ptr = LI->getPointerOperand();
Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
InstMap.push_back(LI);
++AccessIdx;
}
/// \brief Check whether the dependencies between the accesses are safe.
///
/// Only checks sets with elements in \p CheckDeps.
bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
MemAccessInfoSet &CheckDeps);
/// \brief The maximum number of bytes of a vector register we can vectorize
/// the accesses safely with.
unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
/// \brief In same cases when the dependency check fails we can still
/// vectorize the loop with a dynamic array access check.
bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
private:
ScalarEvolution *SE;
DataLayout *DL;
const Loop *InnermostLoop;
/// \brief Maps access locations (ptr, read/write) to program order.
DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
/// \brief Memory access instructions in program order.
SmallVector<Instruction *, 16> InstMap;
/// \brief The program order index to be used for the next instruction.
unsigned AccessIdx;
// We can access this many bytes in parallel safely.
unsigned MaxSafeDepDistBytes;
/// \brief If we see a non constant dependence distance we can still try to
/// vectorize this loop with runtime checks.
bool ShouldRetryWithRuntimeCheck;
/// \brief Check whether there is a plausible dependence between the two
/// accesses.
///
/// Access \p A must happen before \p B in program order. The two indices
/// identify the index into the program order map.
///
/// This function checks whether there is a plausible dependence (or the
/// absence of such can't be proved) between the two accesses. If there is a
/// plausible dependence but the dependence distance is bigger than one
/// element access it records this distance in \p MaxSafeDepDistBytes (if this
/// distance is smaller than any other distance encountered so far).
/// Otherwise, this function returns true signaling a possible dependence.
bool isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx);
/// \brief Check whether the data dependence could prevent store-load
/// forwarding.
bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
};
} // end anonymous namespace
static bool isInBoundsGep(Value *Ptr) {
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
return GEP->isInBounds();
return false;
}
/// \brief Check whether the access through \p Ptr has a constant stride.
static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
const Loop *Lp) {
const Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non ptr");
// Make sure that the pointer does not point to aggregate types.
const PointerType *PtrTy = cast<PointerType>(Ty);
if (PtrTy->getElementType()->isAggregateType()) {
DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
"\n");
return 0;
}
const SCEV *PtrScev = SE->getSCEV(Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR) {
DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
<< *Ptr << " SCEV: " << *PtrScev << "\n");
return 0;
}
// The accesss function must stride over the innermost loop.
if (Lp != AR->getLoop()) {
DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " <<
*Ptr << " SCEV: " << *PtrScev << "\n");
}
// The address calculation must not wrap. Otherwise, a dependence could be
// inverted.
// An inbounds getelementptr that is a AddRec with a unit stride
// cannot wrap per definition. The unit stride requirement is checked later.
// An getelementptr without an inbounds attribute and unit stride would have
// to access the pointer value "0" which is undefined behavior in address
// space 0, therefore we can also vectorize this case.
bool IsInBoundsGEP = isInBoundsGep(Ptr);
bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *PtrScev << "\n");
return 0;
}
// Check the step is constant.
const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C) {
DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<
" SCEV: " << *PtrScev << "\n");
return 0;
}
int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
const APInt &APStepVal = C->getValue()->getValue();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
return 0;
int64_t StepVal = APStepVal.getSExtValue();
// Strided access.
int64_t Stride = StepVal / Size;
int64_t Rem = StepVal % Size;
if (Rem)
return 0;
// If the SCEV could wrap but we have an inbounds gep with a unit stride we
// know we can't "wrap around the address space". In case of address space
// zero we know that this won't happen without triggering undefined behavior.
if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
Stride != 1 && Stride != -1)
return 0;
return Stride;
}
bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
unsigned TypeByteSize) {
// If loads occur at a distance that is not a multiple of a feasible vector
// factor store-load forwarding does not take place.
// Positive dependences might cause troubles because vectorizing them might
// prevent store-load forwarding making vectorized code run a lot slower.
// a[i] = a[i-3] ^ a[i-8];
// The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
// hence on your typical architecture store-load forwarding does not take
// place. Vectorizing in such cases does not make sense.
// Store-load forwarding distance.
const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
// Maximum vector factor.
unsigned MaxVFWithoutSLForwardIssues = MaxVectorWidth*TypeByteSize;
if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
vf *= 2) {
if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
MaxVFWithoutSLForwardIssues = (vf >>=1);
break;
}
}
if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
DEBUG(dbgs() << "LV: Distance " << Distance <<
" that could cause a store-load forwarding conflict\n");
return true;
}
if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
MaxVFWithoutSLForwardIssues != MaxVectorWidth*TypeByteSize)
MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
return false;
}
bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx) {
assert (AIdx < BIdx && "Must pass arguments in program order");
Value *APtr = A.getPointer();
Value *BPtr = B.getPointer();
bool AIsWrite = A.getInt();
bool BIsWrite = B.getInt();
// Two reads are independent.
if (!AIsWrite && !BIsWrite)
return false;
const SCEV *AScev = SE->getSCEV(APtr);
const SCEV *BScev = SE->getSCEV(BPtr);
int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop);
int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop);
const SCEV *Src = AScev;
const SCEV *Sink = BScev;
// If the induction step is negative we have to invert source and sink of the
// dependence.
if (StrideAPtr < 0) {
//Src = BScev;
//Sink = AScev;
std::swap(APtr, BPtr);
std::swap(Src, Sink);
std::swap(AIsWrite, BIsWrite);
std::swap(AIdx, BIdx);
std::swap(StrideAPtr, StrideBPtr);
}
const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink
<< "(Induction step: " << StrideAPtr << ")\n");
DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "
<< *InstMap[BIdx] << ": " << *Dist << "\n");
// Need consecutive accesses. We don't want to vectorize
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
// the address space.
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
DEBUG(dbgs() << "Non-consecutive pointer access\n");
return true;
}
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
DEBUG(dbgs() << "LV: Dependence because of non constant distance\n");
ShouldRetryWithRuntimeCheck = true;
return true;
}
Type *ATy = APtr->getType()->getPointerElementType();
Type *BTy = BPtr->getType()->getPointerElementType();
unsigned TypeByteSize = DL->getTypeAllocSize(ATy);
// Negative distances are not plausible dependencies.
const APInt &Val = C->getValue()->getValue();
if (Val.isNegative()) {
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
if (IsTrueDataDependence &&
(couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
ATy != BTy))
return true;
DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n");
return false;
}
// Write to the same location with the same size.
// Could be improved to assert type sizes are the same (i32 == float, etc).
if (Val == 0) {
if (ATy == BTy)
return false;
DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
return true;
}
assert(Val.isStrictlyPositive() && "Expect a positive value");
// Positive distance bigger than max vectorization factor.
if (ATy != BTy) {
DEBUG(dbgs() <<
"LV: ReadWrite-Write positive dependency with different types\n");
return false;
}
unsigned Distance = (unsigned) Val.getZExtValue();
// Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = VectorizationFactor ? VectorizationFactor : 1;
unsigned ForcedUnroll = VectorizationUnroll ? VectorizationUnroll : 1;
// The distance must be bigger than the size needed for a vectorized version
// of the operation and the size of the vectorized operation must not be
// bigger than the currrent maximum size.
if (Distance < 2*TypeByteSize ||
2*TypeByteSize > MaxSafeDepDistBytes ||
Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
DEBUG(dbgs() << "LV: Failure because of Positive distance "
<< Val.getSExtValue() << '\n');
return true;
}
MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
Distance : MaxSafeDepDistBytes;
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
if (IsTrueDataDependence &&
couldPreventStoreLoadForward(Distance, TypeByteSize))
return true;
DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
" with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
return false;
}
bool
MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
MemAccessInfoSet &CheckDeps) {
MaxSafeDepDistBytes = -1U;
while (!CheckDeps.empty()) {
MemAccessInfo CurAccess = *CheckDeps.begin();
// Get the relevant memory access set.
EquivalenceClasses<MemAccessInfo>::iterator I =
AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
// Check accesses within this set.
EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
// Check every access pair.
while (AI != AE) {
CheckDeps.erase(*AI);
EquivalenceClasses<MemAccessInfo>::member_iterator OI = llvm::next(AI);
while (OI != AE) {
// Check every accessing instruction pair in program order.
for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2))
return false;
if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1))
return false;
}
++OI;
}
AI++;
}
}
return true;
}
bool LoopVectorizationLegality::canVectorizeMemory() {
typedef SmallVector<Value*, 16> ValueVector;
typedef SmallPtrSet<Value*, 16> ValueSet;
// Holds the Load and Store *instructions*.
ValueVector Loads;
ValueVector Stores;
// Holds all the different accesses in the loop.
unsigned NumReads = 0;
unsigned NumReadWrites = 0;
PtrRtCheck.Pointers.clear();
PtrRtCheck.Need = false;
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
MemoryDepChecker DepChecker(SE, DL, TheLoop);
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
// Scan the BB and collect legal loads and stores.
for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
++it) {
// If this is a load, save it. If this instruction can read from memory
// but is not a load, then we quit. Notice that we don't handle function
// calls that read or write.
if (it->mayReadFromMemory()) {
// Many math library functions read the rounding mode. We will only
// vectorize a loop if it contains known function calls that don't set
// the flag. Therefore, it is safe to ignore this read from memory.
CallInst *Call = dyn_cast<CallInst>(it);
if (Call && getIntrinsicIDForCall(Call, TLI))
continue;
LoadInst *Ld = dyn_cast<LoadInst>(it);
if (!Ld) return false;
if (!Ld->isSimple() && !IsAnnotatedParallel) {
DEBUG(dbgs() << "LV: Found a non-simple load.\n");
return false;
}
Loads.push_back(Ld);
DepChecker.addAccess(Ld);
continue;
}
// Save 'store' instructions. Abort if other instructions write to memory.
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
if (!St) return false;
if (!St->isSimple() && !IsAnnotatedParallel) {
DEBUG(dbgs() << "LV: Found a non-simple store.\n");
return false;
}
Stores.push_back(St);
DepChecker.addAccess(St);
}
} // Next instr.
} // Next block.
// Now we have two lists that hold the loads and the stores.
// Next, we find the pointers that they use.
// Check if we see any stores. If there are no stores, then we don't
// care if the pointers are *restrict*.
if (!Stores.size()) {
DEBUG(dbgs() << "LV: Found a read-only loop!\n");
return true;
}
AccessAnalysis::DepCandidates DependentAccesses;
AccessAnalysis Accesses(DL, DependentAccesses);
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
// for read and once for write, it will only appear once (on the write
// list). This is okay, since we are going to check for conflicts between
// writes and between reads and writes, but not between reads and reads.
ValueSet Seen;
ValueVector::iterator I, IE;
for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
StoreInst *ST = cast<StoreInst>(*I);
Value* Ptr = ST->getPointerOperand();
if (isUniform(Ptr)) {
DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
return false;
}
// If we did *not* see this pointer before, insert it to the read-write
// list. At this phase it is only a 'write' list.
if (Seen.insert(Ptr)) {
++NumReadWrites;
Accesses.addStore(Ptr);
}
}
if (IsAnnotatedParallel) {
DEBUG(dbgs()
<< "LV: A loop annotated parallel, ignore memory dependency "
<< "checks.\n");
return true;
}
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
LoadInst *LD = cast<LoadInst>(*I);
Value* Ptr = LD->getPointerOperand();
// If we did *not* see this pointer before, insert it to the
// read list. If we *did* see it before, then it is already in
// the read-write list. This allows us to vectorize expressions
// such as A[i] += x; Because the address of A[i] is a read-write
// pointer. This only works if the index of A[i] is consecutive.
// If the address of i is unknown (for example A[B[i]]) then we may
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
bool IsReadOnlyPtr = false;
if (Seen.insert(Ptr) || !isStridedPtr(SE, DL, Ptr, TheLoop)) {
++NumReads;
IsReadOnlyPtr = true;
}
Accesses.addLoad(Ptr, IsReadOnlyPtr);
}
// If we write (or read-write) to a single destination and there are no
// other reads in this loop then is it safe to vectorize.
if (NumReadWrites == 1 && NumReads == 0) {
DEBUG(dbgs() << "LV: Found a write-only loop!\n");
return true;
}
// Build dependence sets and check whether we need a runtime pointer bounds
// check.
Accesses.buildDependenceSets();
bool NeedRTCheck = Accesses.isRTCheckNeeded();
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
unsigned NumComparisons = 0;
bool CanDoRT = false;
if (NeedRTCheck)
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop);
DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
" pointer comparisons.\n");
// If we only have one set of dependences to check pointers among we don't
// need a runtime check.
if (NumComparisons == 0 && NeedRTCheck)
NeedRTCheck = false;
// Check that we did not collect too many pointers or found an unsizeable
// pointer.
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
PtrRtCheck.reset();
CanDoRT = false;
}
if (CanDoRT) {
DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
}
if (NeedRTCheck && !CanDoRT) {
DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
"the array bounds.\n");
PtrRtCheck.reset();
return false;
}
PtrRtCheck.Need = NeedRTCheck;
bool CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
DEBUG(dbgs() << "LV: Checking memory dependencies\n");
CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
Accesses.getDependenciesToCheck());
MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
DEBUG(dbgs() << "LV: Retrying with memory checks\n");
NeedRTCheck = true;
// Clear the dependency checks. We assume they are not needed.
Accesses.resetDepChecks();
PtrRtCheck.reset();
PtrRtCheck.Need = true;
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
TheLoop, true);
// Check that we did not collect too many pointers or found an unsizeable
// pointer.
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
PtrRtCheck.reset();
return false;
}
CanVecMem = true;
}
}
DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
" need a runtime memory check.\n");
return CanVecMem;
}
static bool hasMultipleUsesOf(Instruction *I,
SmallPtrSet<Instruction *, 8> &Insts) {
unsigned NumUses = 0;
for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) {
if (Insts.count(dyn_cast<Instruction>(*Use)))
++NumUses;
if (NumUses > 1)
return true;
}
return false;
}
static bool areAllUsesIn(Instruction *I, SmallPtrSet<Instruction *, 8> &Set) {
for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
if (!Set.count(dyn_cast<Instruction>(*Use)))
return false;
return true;
}
bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
ReductionKind Kind) {
if (Phi->getNumIncomingValues() != 2)
return false;
// Reduction variables are only found in the loop header block.
if (Phi->getParent() != TheLoop->getHeader())
return false;
// Obtain the reduction start value from the value that comes from the loop
// preheader.
Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
// ExitInstruction is the single value which is used outside the loop.
// We only allow for a single reduction value to be used outside the loop.
// This includes users of the reduction, variables (which form a cycle
// which ends in the phi node).
Instruction *ExitInstruction = 0;
// Indicates that we found a reduction operation in our scan.
bool FoundReduxOp = false;
// We start with the PHI node and scan for all of the users of this
// instruction. All users must be instructions that can be used as reduction
// variables (such as ADD). We must have a single out-of-block user. The cycle
// must include the original PHI.
bool FoundStartPHI = false;
// To recognize min/max patterns formed by a icmp select sequence, we store
// the number of instruction we saw from the recognized min/max pattern,
// to make sure we only see exactly the two instructions.
unsigned NumCmpSelectPatternInst = 0;
ReductionInstDesc ReduxDesc(false, 0);
SmallPtrSet<Instruction *, 8> VisitedInsts;
SmallVector<Instruction *, 8> Worklist;
Worklist.push_back(Phi);
VisitedInsts.insert(Phi);
// A value in the reduction can be used:
// - By the reduction:
// - Reduction operation:
// - One use of reduction value (safe).
// - Multiple use of reduction value (not safe).
// - PHI:
// - All uses of the PHI must be the reduction (safe).
// - Otherwise, not safe.
// - By one instruction outside of the loop (safe).
// - By further instructions outside of the loop (not safe).
// - By an instruction that is not part of the reduction (not safe).
// This is either:
// * An instruction type other than PHI or the reduction operation.
// * A PHI in the header other than the initial PHI.
while (!Worklist.empty()) {
Instruction *Cur = Worklist.back();
Worklist.pop_back();
// No Users.
// If the instruction has no users then this is a broken chain and can't be
// a reduction variable.
if (Cur->use_empty())
return false;
bool IsAPhi = isa<PHINode>(Cur);
// A header PHI use other than the original PHI.
if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
return false;
// Reductions of instructions such as Div, and Sub is only possible if the
// LHS is the reduction variable.
if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
!isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
!VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
return false;
// Any reduction instruction must be of one of the allowed kinds.
ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc);
if (!ReduxDesc.IsReduction)
return false;
// A reduction operation must only have one use of the reduction value.
if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
hasMultipleUsesOf(Cur, VisitedInsts))
return false;
// All inputs to a PHI node must be a reduction value.
if(IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
return false;
if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(Cur) ||
isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) ||
isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
// Check whether we found a reduction operator.
FoundReduxOp |= !IsAPhi;
// Process users of current instruction. Push non PHI nodes after PHI nodes
// onto the stack. This way we are going to have seen all inputs to PHI
// nodes once we get to them.
SmallVector<Instruction *, 8> NonPHIs;
SmallVector<Instruction *, 8> PHIs;
for (Value::use_iterator UI = Cur->use_begin(), E = Cur->use_end(); UI != E;
++UI) {
Instruction *Usr = cast<Instruction>(*UI);
// Check if we found the exit user.
BasicBlock *Parent = Usr->getParent();
if (!TheLoop->contains(Parent)) {
// Exit if you find multiple outside users or if the header phi node is
// being used. In this case the user uses the value of the previous
// iteration, in which case we would loose "VF-1" iterations of the
// reduction operation if we vectorize.
if (ExitInstruction != 0 || Cur == Phi)
return false;
// The instruction used by an outside user must be the last instruction
// before we feed back to the reduction phi. Otherwise, we loose VF-1
// operations on the value.
if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
return false;
ExitInstruction = Cur;
continue;
}
- // Process instructions only once (termination).
+ // Process instructions only once (termination). Each reduction cycle
+ // value must only be used once, except by phi nodes and min/max
+ // reductions which are represented as a cmp followed by a select.
+ ReductionInstDesc IgnoredVal(false, 0);
if (VisitedInsts.insert(Usr)) {
if (isa<PHINode>(Usr))
PHIs.push_back(Usr);
else
NonPHIs.push_back(Usr);
- }
+ } else if (!isa<PHINode>(Usr) &&
+ ((!isa<FCmpInst>(Usr) &&
+ !isa<ICmpInst>(Usr) &&
+ !isa<SelectInst>(Usr)) ||
+ !isMinMaxSelectCmpPattern(Usr, IgnoredVal).IsReduction))
+ return false;
+
// Remember that we completed the cycle.
if (Usr == Phi)
FoundStartPHI = true;
}
Worklist.append(PHIs.begin(), PHIs.end());
Worklist.append(NonPHIs.begin(), NonPHIs.end());
}
// This means we have seen one but not the other instruction of the
// pattern or more than just a select and cmp.
if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
NumCmpSelectPatternInst != 2)
return false;
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
return false;
// We found a reduction var if we have reached the original phi node and we
// only have a single instruction with out-of-loop users.
// This instruction is allowed to have out-of-loop users.
AllowedExit.insert(ExitInstruction);
// Save the description of this reduction variable.
ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
ReduxDesc.MinMaxKind);
Reductions[Phi] = RD;
// We've ended the cycle. This is a reduction variable if we have an
// outside user and it has a binary op.
return true;
}
/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
/// pattern corresponding to a min(X, Y) or max(X, Y).
LoopVectorizationLegality::ReductionInstDesc
LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
ReductionInstDesc &Prev) {
assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
"Expect a select instruction");
Instruction *Cmp = 0;
SelectInst *Select = 0;
// We must handle the select(cmp()) as a single instruction. Advance to the
// select.
if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
return ReductionInstDesc(false, I);
return ReductionInstDesc(Select, Prev.MinMaxKind);
}
// Only handle single use cases for now.
if (!(Select = dyn_cast<SelectInst>(I)))
return ReductionInstDesc(false, I);
if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
!(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
return ReductionInstDesc(false, I);
if (!Cmp->hasOneUse())
return ReductionInstDesc(false, I);
Value *CmpLeft;
Value *CmpRight;
// Look for a min/max pattern.
if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_UIntMin);
else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_UIntMax);
else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_SIntMax);
else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_SIntMin);
else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_FloatMin);
else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_FloatMax);
else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_FloatMin);
else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_FloatMax);
return ReductionInstDesc(false, I);
}
LoopVectorizationLegality::ReductionInstDesc
LoopVectorizationLegality::isReductionInstr(Instruction *I,
ReductionKind Kind,
ReductionInstDesc &Prev) {
bool FP = I->getType()->isFloatingPointTy();
bool FastMath = (FP && I->isCommutative() && I->isAssociative());
switch (I->getOpcode()) {
default:
return ReductionInstDesc(false, I);
case Instruction::PHI:
if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
Kind != RK_FloatMinMax))
return ReductionInstDesc(false, I);
return ReductionInstDesc(I, Prev.MinMaxKind);
case Instruction::Sub:
case Instruction::Add:
return ReductionInstDesc(Kind == RK_IntegerAdd, I);
case Instruction::Mul:
return ReductionInstDesc(Kind == RK_IntegerMult, I);
case Instruction::And:
return ReductionInstDesc(Kind == RK_IntegerAnd, I);
case Instruction::Or:
return ReductionInstDesc(Kind == RK_IntegerOr, I);
case Instruction::Xor:
return ReductionInstDesc(Kind == RK_IntegerXor, I);
case Instruction::FMul:
return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
case Instruction::FAdd:
return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
if (Kind != RK_IntegerMinMax &&
(!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
return ReductionInstDesc(false, I);
return isMinMaxSelectCmpPattern(I, Prev);
}
}
LoopVectorizationLegality::InductionKind
LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
Type *PhiTy = Phi->getType();
// We only handle integer and pointer inductions variables.
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
return IK_NoInduction;
// Check that the PHI is consecutive.
const SCEV *PhiScev = SE->getSCEV(Phi);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
if (!AR) {
DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
return IK_NoInduction;
}
const SCEV *Step = AR->getStepRecurrence(*SE);
// Integer inductions need to have a stride of one.
if (PhiTy->isIntegerTy()) {
if (Step->isOne())
return IK_IntInduction;
if (Step->isAllOnesValue())
return IK_ReverseIntInduction;
return IK_NoInduction;
}
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C)
return IK_NoInduction;
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
if (C->getValue()->equalsInt(Size))
return IK_PtrInduction;
else if (C->getValue()->equalsInt(0 - Size))
return IK_ReversePtrInduction;
return IK_NoInduction;
}
bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
Value *In0 = const_cast<Value*>(V);
PHINode *PN = dyn_cast_or_null<PHINode>(In0);
if (!PN)
return false;
return Inductions.count(PN);
}
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
assert(TheLoop->contains(BB) && "Unknown block used");
// Blocks that do not dominate the latch need predication.
BasicBlock* Latch = TheLoop->getLoopLatch();
return !DT->dominates(BB, Latch);
}
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
SmallPtrSet<Value *, 8>& SafePtrs) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
// We might be able to hoist the load.
if (it->mayReadFromMemory()) {
LoadInst *LI = dyn_cast<LoadInst>(it);
if (!LI || !SafePtrs.count(LI->getPointerOperand()))
return false;
}
// We don't predicate stores at the moment.
if (it->mayWriteToMemory() || it->mayThrow())
return false;
// Check that we don't have a constant expression that can trap as operand.
for (Instruction::op_iterator OI = it->op_begin(), OE = it->op_end();
OI != OE; ++OI) {
if (Constant *C = dyn_cast<Constant>(*OI))
if (C->canTrap())
return false;
}
// The instructions below can trap.
switch (it->getOpcode()) {
default: continue;
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
return false;
}
}
return true;
}
LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
unsigned UserVF) {
// Width 1 means no vectorize
VectorizationFactor Factor = { 1U, 0U };
if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
return Factor;
}
// Find the trip count.
unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
unsigned WidestType = getWidestType();
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
unsigned MaxSafeDepDist = -1U;
if (Legal->getMaxSafeDepDistBytes() != -1U)
MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
WidestRegister = ((WidestRegister < MaxSafeDepDist) ?
WidestRegister : MaxSafeDepDist);
unsigned MaxVectorSize = WidestRegister / WidestType;
DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
DEBUG(dbgs() << "LV: The Widest register is: "
<< WidestRegister << " bits.\n");
if (MaxVectorSize == 0) {
DEBUG(dbgs() << "LV: The target has no vector registers.\n");
MaxVectorSize = 1;
}
assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements"
" into one vector!");
unsigned VF = MaxVectorSize;
// If we optimize the program for size, avoid creating the tail loop.
if (OptForSize) {
// If we are unable to calculate the trip count then don't try to vectorize.
if (TC < 2) {
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
return Factor;
}
// Find the maximum SIMD width that can fit within the trip count.
VF = TC % MaxVectorSize;
if (VF == 0)
VF = MaxVectorSize;
// If the trip count that we found modulo the vectorization factor is not
// zero then we require a tail.
if (VF < 2) {
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
return Factor;
}
}
if (UserVF != 0) {
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
Factor.Width = UserVF;
return Factor;
}
float Cost = expectedCost(1);
unsigned Width = 1;
DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n");
for (unsigned i=2; i <= VF; i*=2) {
// Notice that the vector loop needs to be executed less times, so
// we need to divide the cost of the vector loops by the width of
// the vector elements.
float VectorCost = expectedCost(i) / (float)i;
DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " <<
(int)VectorCost << ".\n");
if (VectorCost < Cost) {
Cost = VectorCost;
Width = i;
}
}
DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
Factor.Width = Width;
Factor.Cost = Width * Cost;
return Factor;
}
unsigned LoopVectorizationCostModel::getWidestType() {
unsigned MaxWidth = 8;
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
BasicBlock *BB = *bb;
// For each instruction in the loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
Type *T = it->getType();
// Only examine Loads, Stores and PHINodes.
if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
continue;
// Examine PHI nodes that are reduction variables.
if (PHINode *PN = dyn_cast<PHINode>(it))
if (!Legal->getReductionVars()->count(PN))
continue;
// Examine the stored values.
if (StoreInst *ST = dyn_cast<StoreInst>(it))
T = ST->getValueOperand()->getType();
// Ignore loaded pointer types and stored pointer types that are not
// consecutive. However, we do want to take consecutive stores/loads of
// pointer vectors into account.
if (T->isPointerTy() && !isConsecutiveLoadOrStore(it))
continue;
MaxWidth = std::max(MaxWidth,
(unsigned)DL->getTypeSizeInBits(T->getScalarType()));
}
}
return MaxWidth;
}
unsigned
LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
unsigned UserUF,
unsigned VF,
unsigned LoopCost) {
// -- The unroll heuristics --
// We unroll the loop in order to expose ILP and reduce the loop overhead.
// There are many micro-architectural considerations that we can't predict
// at this level. For example frontend pressure (on decode or fetch) due to
// code size, or the number and capabilities of the execution ports.
//
// We use the following heuristics to select the unroll factor:
// 1. If the code has reductions the we unroll in order to break the cross
// iteration dependency.
// 2. If the loop is really small then we unroll in order to reduce the loop
// overhead.
// 3. We don't unroll if we think that we will spill registers to memory due
// to the increased register pressure.
// Use the user preference, unless 'auto' is selected.
if (UserUF != 0)
return UserUF;
// When we optimize for size we don't unroll.
if (OptForSize)
return 1;
// We used the distance for the unroll factor.
if (Legal->getMaxSafeDepDistBytes() != -1U)
return 1;
// Do not unroll loops with a relatively small trip count.
unsigned TC = SE->getSmallConstantTripCount(TheLoop,
TheLoop->getLoopLatch());
if (TC > 1 && TC < TinyTripCountUnrollThreshold)
return 1;
unsigned TargetVectorRegisters = TTI.getNumberOfRegisters(true);
DEBUG(dbgs() << "LV: The target has " << TargetVectorRegisters <<
" vector registers\n");
LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage();
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
R.NumInstructions = std::max(R.NumInstructions, 1U);
// We calculate the unroll factor using the following formula.
// Subtract the number of loop invariants from the number of available
// registers. These registers are used by all of the unrolled instances.
// Next, divide the remaining registers by the number of registers that is
// required by the loop, in order to estimate how many parallel instances
// fit without causing spills.
unsigned UF = (TargetVectorRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers;
// Clamp the unroll factor ranges to reasonable factors.
unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
// If we did not calculate the cost for VF (because the user selected the VF)
// then we calculate the cost of VF here.
if (LoopCost == 0)
LoopCost = expectedCost(VF);
// Clamp the calculated UF to be between the 1 and the max unroll factor
// that the target allows.
if (UF > MaxUnrollSize)
UF = MaxUnrollSize;
else if (UF < 1)
UF = 1;
bool HasReductions = Legal->getReductionVars()->size();
// Decide if we want to unroll if we decided that it is legal to vectorize
// but not profitable.
if (VF == 1) {
if (TheLoop->getNumBlocks() > 1 || !HasReductions ||
LoopCost > SmallLoopCost)
return 1;
return UF;
}
if (HasReductions) {
DEBUG(dbgs() << "LV: Unrolling because of reductions.\n");
return UF;
}
// We want to unroll tiny loops in order to reduce the loop overhead.
// We assume that the cost overhead is 1 and we use the cost model
// to estimate the cost of the loop and unroll until the cost of the
// loop overhead is about 5% of the cost of the loop.
DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
if (LoopCost < SmallLoopCost) {
DEBUG(dbgs() << "LV: Unrolling to reduce branch cost.\n");
unsigned NewUF = SmallLoopCost / (LoopCost + 1);
return std::min(NewUF, UF);
}
DEBUG(dbgs() << "LV: Not Unrolling.\n");
return 1;
}
LoopVectorizationCostModel::RegisterUsage
LoopVectorizationCostModel::calculateRegisterUsage() {
// This function calculates the register usage by measuring the highest number
// of values that are alive at a single location. Obviously, this is a very
// rough estimation. We scan the loop in a topological order in order and
// assign a number to each instruction. We use RPO to ensure that defs are
// met before their users. We assume that each instruction that has in-loop
// users starts an interval. We record every time that an in-loop value is
// used, so we have a list of the first and last occurrences of each
// instruction. Next, we transpose this data structure into a multi map that
// holds the list of intervals that *end* at a specific location. This multi
// map allows us to perform a linear search. We scan the instructions linearly
// and record each time that a new interval starts, by placing it in a set.
// If we find this value in the multi-map then we remove it from the set.
// The max register usage is the maximum size of the set.
// We also search for instructions that are defined outside the loop, but are
// used inside the loop. We need this number separately from the max-interval
// usage number because when we unroll, loop-invariant values do not take
// more register.
LoopBlocksDFS DFS(TheLoop);
DFS.perform(LI);
RegisterUsage R;
R.NumInstructions = 0;
// Each 'key' in the map opens a new interval. The values
// of the map are the index of the 'last seen' usage of the
// instruction that is the key.
typedef DenseMap<Instruction*, unsigned> IntervalMap;
// Maps instruction to its index.
DenseMap<unsigned, Instruction*> IdxToInstr;
// Marks the end of each interval.
IntervalMap EndPoint;
// Saves the list of instruction indices that are used in the loop.
SmallSet<Instruction*, 8> Ends;
// Saves the list of values that are used in the loop but are
// defined outside the loop, such as arguments and constants.
SmallPtrSet<Value*, 8> LoopInvariants;
unsigned Index = 0;
for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
be = DFS.endRPO(); bb != be; ++bb) {
R.NumInstructions += (*bb)->size();
for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
++it) {
Instruction *I = it;
IdxToInstr[Index++] = I;
// Save the end location of each USE.
for (unsigned i = 0; i < I->getNumOperands(); ++i) {
Value *U = I->getOperand(i);
Instruction *Instr = dyn_cast<Instruction>(U);
// Ignore non-instruction values such as arguments, constants, etc.
if (!Instr) continue;
// If this instruction is outside the loop then record it and continue.
if (!TheLoop->contains(Instr)) {
LoopInvariants.insert(Instr);
continue;
}
// Overwrite previous end points.
EndPoint[Instr] = Index;
Ends.insert(Instr);
}
}
}
// Saves the list of intervals that end with the index in 'key'.
typedef SmallVector<Instruction*, 2> InstrList;
DenseMap<unsigned, InstrList> TransposeEnds;
// Transpose the EndPoints to a list of values that end at each index.
for (IntervalMap::iterator it = EndPoint.begin(), e = EndPoint.end();
it != e; ++it)
TransposeEnds[it->second].push_back(it->first);
SmallSet<Instruction*, 8> OpenIntervals;
unsigned MaxUsage = 0;
DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
for (unsigned int i = 0; i < Index; ++i) {
Instruction *I = IdxToInstr[i];
// Ignore instructions that are never used within the loop.
if (!Ends.count(I)) continue;
// Remove all of the instructions that end at this location.
InstrList &List = TransposeEnds[i];
for (unsigned int j=0, e = List.size(); j < e; ++j)
OpenIntervals.erase(List[j]);
// Count the number of live interals.
MaxUsage = std::max(MaxUsage, OpenIntervals.size());
DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
OpenIntervals.size() << '\n');
// Add the current instruction to the list of open intervals.
OpenIntervals.insert(I);
}
unsigned Invariant = LoopInvariants.size();
DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n');
DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n');
R.LoopInvariantRegs = Invariant;
R.MaxLocalUsers = MaxUsage;
return R;
}
unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
unsigned Cost = 0;
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
unsigned BlockCost = 0;
BasicBlock *BB = *bb;
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
// Skip dbg intrinsics.
if (isa<DbgInfoIntrinsic>(it))
continue;
unsigned C = getInstructionCost(it, VF);
BlockCost += C;
DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " <<
VF << " For instruction: " << *it << '\n');
}
// We assume that if-converted blocks have a 50% chance of being executed.
// When the code is scalar then some of the blocks are avoided due to CF.
// When the code is vectorized we execute all code paths.
if (VF == 1 && Legal->blockNeedsPredication(*bb))
BlockCost /= 2;
Cost += BlockCost;
}
return Cost;
}
/// \brief Check whether the address computation for a non-consecutive memory
/// access looks like an unlikely candidate for being merged into the indexing
/// mode.
///
/// We look for a GEP which has one index that is an induction variable and all
/// other indices are loop invariant. If the stride of this access is also
/// within a small bound we decide that this address computation can likely be
/// merged into the addressing mode.
/// In all other cases, we identify the address computation as complex.
static bool isLikelyComplexAddressComputation(Value *Ptr,
LoopVectorizationLegality *Legal,
ScalarEvolution *SE,
const Loop *TheLoop) {
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (!Gep)
return true;
// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
unsigned NumOperands = Gep->getNumOperands();
for (unsigned i = 1; i < NumOperands; ++i) {
Value *Opd = Gep->getOperand(i);
if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
!Legal->isInductionVariable(Opd))
return true;
}
// Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
// can likely be merged into the address computation.
unsigned MaxMergeDistance = 64;
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
if (!AddRec)
return true;
// Check the step is constant.
const SCEV *Step = AddRec->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C)
return true;
const APInt &APStepVal = C->getValue()->getValue();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
return true;
int64_t StepVal = APStepVal.getSExtValue();
return StepVal > MaxMergeDistance;
}
unsigned
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// If we know that this instruction will remain uniform, check the cost of
// the scalar version.
if (Legal->isUniformAfterVectorization(I))
VF = 1;
Type *RetTy = I->getType();
Type *VectorTy = ToVectorTy(RetTy, VF);
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
// We mark this instruction as zero-cost because the cost of GEPs in
// vectorized code depends on whether the corresponding memory instruction
// is scalarized or not. Therefore, we handle GEPs with the memory
// instruction cost.
return 0;
case Instruction::Br: {
return TTI.getCFInstrCost(I->getOpcode());
}
case Instruction::PHI:
//TODO: IF-converted IFs become selects.
return 0;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::FDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
TargetTransformInfo::OperandValueKind Op1VK =
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_AnyValue;
if (isa<ConstantInt>(I->getOperand(1)))
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();
if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
}
case Instruction::ICmp:
case Instruction::FCmp: {
Type *ValTy = I->getOperand(0)->getType();
VectorTy = ToVectorTy(ValTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
}
case Instruction::Store:
case Instruction::Load: {
StoreInst *SI = dyn_cast<StoreInst>(I);
LoadInst *LI = dyn_cast<LoadInst>(I);
Type *ValTy = (SI ? SI->getValueOperand()->getType() :
LI->getType());
VectorTy = ToVectorTy(ValTy, VF);
unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment();
unsigned AS = SI ? SI->getPointerAddressSpace() :
LI->getPointerAddressSpace();
Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand();
// We add the cost of address computation here instead of with the gep
// instruction because only here we know whether the operation is
// scalarized.
if (VF == 1)
return TTI.getAddressComputationCost(VectorTy) +
TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
// Scalarized loads/stores.
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
bool Reverse = ConsecutiveStride < 0;
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
bool IsComplexComputation =
isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
unsigned Cost = 0;
// The cost of extracting from the value vector and pointer vector.
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
for (unsigned i = 0; i < VF; ++i) {
// The cost of extracting the pointer operand.
Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i);
// In case of STORE, the cost of ExtractElement from the vector.
// In case of LOAD, the cost of InsertElement into the returned
// vector.
Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement :
Instruction::InsertElement,
VectorTy, i);
}
// The cost of the scalar loads/stores.
Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);
return Cost;
}
// Wide load/stores.
unsigned Cost = TTI.getAddressComputationCost(VectorTy);
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
if (Reverse)
Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
VectorTy, 0);
return Cost;
}
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
// We optimize the truncation of induction variable.
// The cost of these is the same as the scalar operation.
if (I->getOpcode() == Instruction::Trunc &&
Legal->isInductionVariable(I->getOperand(0)))
return TTI.getCastInstrCost(I->getOpcode(), I->getType(),
I->getOperand(0)->getType());
Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(I);
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
assert(ID && "Not an intrinsic call!");
Type *RetTy = ToVectorTy(CI->getType(), VF);
SmallVector<Type*, 4> Tys;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
}
default: {
// We are scalarizing the instruction. Return the cost of the scalar
// instruction, plus the cost of insert and extract into vector
// elements, times the vector width.
unsigned Cost = 0;
if (!RetTy->isVoidTy() && VF != 1) {
unsigned InsCost = TTI.getVectorInstrCost(Instruction::InsertElement,
VectorTy);
unsigned ExtCost = TTI.getVectorInstrCost(Instruction::ExtractElement,
VectorTy);
// The cost of inserting the results plus extracting each one of the
// operands.
Cost += VF * (InsCost + ExtCost * I->getNumOperands());
}
// The cost of executing VF copies of the scalar instruction. This opcode
// is unknown. Assume that it is the same as 'mul'.
Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy);
return Cost;
}
}// end of switch.
}
Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
if (Scalar->isVoidTy() || VF == 1)
return Scalar;
return VectorType::get(Scalar, VF);
}
char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
Pass *createLoopVectorizePass(bool NoUnrolling) {
return new LoopVectorize(NoUnrolling);
}
}
bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
// Check for a store.
if (StoreInst *ST = dyn_cast<StoreInst>(Inst))
return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0;
// Check for a load.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0;
return false;
}
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
// Holds vector parameters or scalars, in case of uniform vals.
SmallVector<VectorParts, 4> Params;
setDebugLocFromInst(Builder, Instr);
// Find all of the vectorized parameters.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
Value *SrcOp = Instr->getOperand(op);
// If we are accessing the old induction variable, use the new one.
if (SrcOp == OldInduction) {
Params.push_back(getVectorValue(SrcOp));
continue;
}
// Try using previously calculated values.
Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
// If the src is an instruction that appeared earlier in the basic block
// then it should already be vectorized.
if (SrcInst && OrigLoop->contains(SrcInst)) {
assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
// The parameter is a vector value from earlier.
Params.push_back(WidenMap.get(SrcInst));
} else {
// The parameter is a scalar from outside the loop. Maybe even a constant.
VectorParts Scalars;
Scalars.append(UF, SrcOp);
Params.push_back(Scalars);
}
}
assert(Params.size() == Instr->getNumOperands() &&
"Invalid number of operands");
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
Value *UndefVec = IsVoidRetTy ? 0 :
UndefValue::get(Instr->getType());
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
// For each scalar that we create:
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
// Replace the operands of the cloned instructions with extracted scalars.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
Value *Op = Params[op][Part];
Cloned->setOperand(op, Op);
}
// Place the cloned scalar in the new loop.
Builder.Insert(Cloned);
// If the original scalar returns a value we need to place it in a vector
// so that future users will be able to use it.
if (!IsVoidRetTy)
VecResults[Part] = Cloned;
}
}
void
InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr,
LoopVectorizationLegality*) {
return scalarizeInstruction(Instr);
}
Value *InnerLoopUnroller::reverseVector(Value *Vec) {
return Vec;
}
Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
return V;
}
Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
bool Negate) {
// When unrolling and the VF is 1, we only need to add a simple scalar.
Type *ITy = Val->getType();
assert(!ITy->isVectorTy() && "Val must be a scalar");
Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
return Builder.CreateAdd(Val, C, "induction");
}
Index: head/contrib/llvm/patches/patch-r262809-clang-r203007-destructor-calling-conv.diff
===================================================================
--- head/contrib/llvm/patches/patch-r262809-clang-r203007-destructor-calling-conv.diff (revision 265924)
+++ head/contrib/llvm/patches/patch-r262809-clang-r203007-destructor-calling-conv.diff (nonexistent)
@@ -1,61 +0,0 @@
-Pull in r203007 from upstream clang trunk (by Rafael Espindola):
-
- Don't produce an alias between destructors with different calling conventions.
-
- Fixes pr19007.
-
-Introduced here: http://svn.freebsd.org/changeset/base/262809
-
-Index: tools/clang/lib/CodeGen/CGCXX.cpp
-===================================================================
---- tools/clang/lib/CodeGen/CGCXX.cpp
-+++ tools/clang/lib/CodeGen/CGCXX.cpp
-@@ -92,7 +92,13 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(c
- if (!ClassLayout.getBaseClassOffset(UniqueBase).isZero())
- return true;
-
-+ // Give up if the calling conventions don't match. We could update the call,
-+ // but it is probably not worth it.
- const CXXDestructorDecl *BaseD = UniqueBase->getDestructor();
-+ if (BaseD->getType()->getAs<FunctionType>()->getCallConv() !=
-+ D->getType()->getAs<FunctionType>()->getCallConv())
-+ return true;
-+
- return TryEmitDefinitionAsAlias(GlobalDecl(D, Dtor_Base),
- GlobalDecl(BaseD, Dtor_Base),
- false);
-Index: tools/clang/test/CodeGenCXX/ctor-dtor-alias.cpp
-===================================================================
---- tools/clang/test/CodeGenCXX/ctor-dtor-alias.cpp
-+++ tools/clang/test/CodeGenCXX/ctor-dtor-alias.cpp
-@@ -1,5 +1,5 @@
--// RUN: %clang_cc1 %s -triple x86_64-linux -emit-llvm -o - -mconstructor-aliases -O1 -disable-llvm-optzns | FileCheck %s
--// RUN: %clang_cc1 %s -triple x86_64-linux -emit-llvm -o - -mconstructor-aliases | FileCheck --check-prefix=NOOPT %s
-+// RUN: %clang_cc1 %s -triple i686-linux -emit-llvm -o - -mconstructor-aliases -O1 -disable-llvm-optzns | FileCheck %s
-+// RUN: %clang_cc1 %s -triple i686-linux -emit-llvm -o - -mconstructor-aliases | FileCheck --check-prefix=NOOPT %s
-
- // RUN: %clang_cc1 -cc1 -triple x86_64--netbsd -emit-llvm \
- // RUN: -mconstructor-aliases -O2 %s -o - | FileCheck --check-prefix=CHECK-RAUW %s
-@@ -133,6 +133,22 @@ namespace test8 {
- zed foo;
- }
-
-+namespace test9 {
-+struct foo {
-+ __attribute__((stdcall)) ~foo() {
-+ }
-+};
-+
-+struct bar : public foo {};
-+
-+void zed() {
-+ // Test that we produce a call to bar's destructor. We used to call foo's, but
-+ // it has a different calling conversion.
-+ // CHECK-DAG: call void @_ZN5test93barD2Ev
-+ bar ptr;
-+}
-+}
-+
- // CHECK-RAUW: @_ZTV1C = linkonce_odr unnamed_addr constant [4 x i8*] [{{[^@]*}}@_ZTI1C {{[^@]*}}@_ZN1CD2Ev {{[^@]*}}@_ZN1CD0Ev {{[^@]*}}]
- // r194296 replaced C::~C with B::~B without emitting the later.
-
Property changes on: head/contrib/llvm/patches/patch-r262809-clang-r203007-destructor-calling-conv.diff
___________________________________________________________________
Deleted: svn:mime-type
## -1 +0,0 ##
-text/x-diff
\ No newline at end of property
Index: head/contrib/llvm/tools/clang/include/clang/Driver/Driver.h
===================================================================
--- head/contrib/llvm/tools/clang/include/clang/Driver/Driver.h (revision 265924)
+++ head/contrib/llvm/tools/clang/include/clang/Driver/Driver.h (revision 265925)
@@ -1,421 +1,425 @@
//===--- Driver.h - Clang GCC Compatible Driver -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef CLANG_DRIVER_DRIVER_H_
#define CLANG_DRIVER_DRIVER_H_
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LLVM.h"
#include "clang/Driver/Phases.h"
#include "clang/Driver/Types.h"
#include "clang/Driver/Util.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Path.h" // FIXME: Kill when CompilationInfo
// lands.
#include <list>
#include <set>
#include <string>
namespace llvm {
namespace opt {
class Arg;
class ArgList;
class DerivedArgList;
class InputArgList;
class OptTable;
}
}
namespace clang {
namespace driver {
class Action;
class Command;
class Compilation;
class InputInfo;
class JobAction;
class SanitizerArgs;
class ToolChain;
/// Driver - Encapsulate logic for constructing compilation processes
/// from a set of gcc-driver-like command line arguments.
class Driver {
llvm::opt::OptTable *Opts;
DiagnosticsEngine &Diags;
enum DriverMode {
GCCMode,
GXXMode,
CPPMode,
CLMode
} Mode;
public:
// Diag - Forwarding function for diagnostics.
DiagnosticBuilder Diag(unsigned DiagID) const {
return Diags.Report(DiagID);
}
// FIXME: Privatize once interface is stable.
public:
/// The name the driver was invoked as.
std::string Name;
/// The path the driver executable was in, as invoked from the
/// command line.
std::string Dir;
/// The original path to the clang executable.
std::string ClangExecutable;
/// The path to the installed clang directory, if any.
std::string InstalledDir;
/// The path to the compiler resource directory.
std::string ResourceDir;
/// A prefix directory used to emulated a limited subset of GCC's '-Bprefix'
/// functionality.
/// FIXME: This type of customization should be removed in favor of the
/// universal driver when it is ready.
typedef SmallVector<std::string, 4> prefix_list;
prefix_list PrefixDirs;
/// sysroot, if present
std::string SysRoot;
/// Dynamic loader prefix, if present
std::string DyldPrefix;
/// If the standard library is used
bool UseStdLib;
/// Default target triple.
std::string DefaultTargetTriple;
/// Default name for linked images (e.g., "a.out").
std::string DefaultImageName;
/// Driver title to use with help.
std::string DriverTitle;
/// Information about the host which can be overridden by the user.
std::string HostBits, HostMachine, HostSystem, HostRelease;
/// The file to log CC_PRINT_OPTIONS output to, if enabled.
const char *CCPrintOptionsFilename;
/// The file to log CC_PRINT_HEADERS output to, if enabled.
const char *CCPrintHeadersFilename;
/// The file to log CC_LOG_DIAGNOSTICS output to, if enabled.
const char *CCLogDiagnosticsFilename;
/// A list of inputs and their types for the given arguments.
typedef SmallVector<std::pair<types::ID, const llvm::opt::Arg *>, 16>
InputList;
/// Whether the driver should follow g++ like behavior.
bool CCCIsCXX() const { return Mode == GXXMode; }
/// Whether the driver is just the preprocessor.
bool CCCIsCPP() const { return Mode == CPPMode; }
/// Whether the driver should follow cl.exe like behavior.
bool IsCLMode() const { return Mode == CLMode; }
/// Only print tool bindings, don't build any jobs.
unsigned CCCPrintBindings : 1;
/// Set CC_PRINT_OPTIONS mode, which is like -v but logs the commands to
/// CCPrintOptionsFilename or to stderr.
unsigned CCPrintOptions : 1;
/// Set CC_PRINT_HEADERS mode, which causes the frontend to log header include
/// information to CCPrintHeadersFilename or to stderr.
unsigned CCPrintHeaders : 1;
/// Set CC_LOG_DIAGNOSTICS mode, which causes the frontend to log diagnostics
/// to CCLogDiagnosticsFilename or to stderr, in a stable machine readable
/// format.
unsigned CCLogDiagnostics : 1;
/// Whether the driver is generating diagnostics for debugging purposes.
unsigned CCGenDiagnostics : 1;
private:
/// Name to use when invoking gcc/g++.
std::string CCCGenericGCCName;
/// Whether to check that input files exist when constructing compilation
/// jobs.
unsigned CheckInputsExist : 1;
public:
/// Use lazy precompiled headers for PCH support.
unsigned CCCUsePCH : 1;
private:
/// Certain options suppress the 'no input files' warning.
bool SuppressMissingInputWarning : 1;
std::list<std::string> TempFiles;
std::list<std::string> ResultFiles;
/// \brief Cache of all the ToolChains in use by the driver.
///
/// This maps from the string representation of a triple to a ToolChain
/// created targeting that triple. The driver owns all the ToolChain objects
/// stored in it, and will clean them up when torn down.
mutable llvm::StringMap<ToolChain *> ToolChains;
private:
/// TranslateInputArgs - Create a new derived argument list from the input
/// arguments, after applying the standard argument translations.
llvm::opt::DerivedArgList *
TranslateInputArgs(const llvm::opt::InputArgList &Args) const;
// getFinalPhase - Determine which compilation mode we are in and record
// which option we used to determine the final phase.
phases::ID getFinalPhase(const llvm::opt::DerivedArgList &DAL,
llvm::opt::Arg **FinalPhaseArg = 0) const;
public:
Driver(StringRef _ClangExecutable,
StringRef _DefaultTargetTriple,
StringRef _DefaultImageName,
DiagnosticsEngine &_Diags);
~Driver();
/// @name Accessors
/// @{
/// Name to use when invoking gcc/g++.
const std::string &getCCCGenericGCCName() const { return CCCGenericGCCName; }
const llvm::opt::OptTable &getOpts() const { return *Opts; }
const DiagnosticsEngine &getDiags() const { return Diags; }
bool getCheckInputsExist() const { return CheckInputsExist; }
void setCheckInputsExist(bool Value) { CheckInputsExist = Value; }
const std::string &getTitle() { return DriverTitle; }
void setTitle(std::string Value) { DriverTitle = Value; }
/// \brief Get the path to the main clang executable.
const char *getClangProgramPath() const {
return ClangExecutable.c_str();
}
/// \brief Get the path to where the clang executable was installed.
const char *getInstalledDir() const {
if (!InstalledDir.empty())
return InstalledDir.c_str();
return Dir.c_str();
}
void setInstalledDir(StringRef Value) {
InstalledDir = Value;
}
/// @}
/// @name Primary Functionality
/// @{
/// BuildCompilation - Construct a compilation object for a command
/// line argument vector.
///
/// \return A compilation, or 0 if none was built for the given
/// argument vector. A null return value does not necessarily
/// indicate an error condition, the diagnostics should be queried
/// to determine if an error occurred.
Compilation *BuildCompilation(ArrayRef<const char *> Args);
/// @name Driver Steps
/// @{
/// ParseDriverMode - Look for and handle the driver mode option in Args.
void ParseDriverMode(ArrayRef<const char *> Args);
/// ParseArgStrings - Parse the given list of strings into an
/// ArgList.
llvm::opt::InputArgList *ParseArgStrings(ArrayRef<const char *> Args);
/// BuildInputs - Construct the list of inputs and their types from
/// the given arguments.
///
/// \param TC - The default host tool chain.
/// \param Args - The input arguments.
/// \param Inputs - The list to store the resulting compilation
/// inputs onto.
void BuildInputs(const ToolChain &TC, const llvm::opt::DerivedArgList &Args,
InputList &Inputs) const;
/// BuildActions - Construct the list of actions to perform for the
/// given arguments, which are only done for a single architecture.
///
/// \param TC - The default host tool chain.
/// \param Args - The input arguments.
/// \param Actions - The list to store the resulting actions onto.
void BuildActions(const ToolChain &TC, llvm::opt::DerivedArgList &Args,
const InputList &Inputs, ActionList &Actions) const;
/// BuildUniversalActions - Construct the list of actions to perform
/// for the given arguments, which may require a universal build.
///
/// \param TC - The default host tool chain.
/// \param Args - The input arguments.
/// \param Actions - The list to store the resulting actions onto.
void BuildUniversalActions(const ToolChain &TC,
llvm::opt::DerivedArgList &Args,
const InputList &BAInputs,
ActionList &Actions) const;
/// BuildJobs - Bind actions to concrete tools and translate
/// arguments to form the list of jobs to run.
///
/// \param C - The compilation that is being built.
void BuildJobs(Compilation &C) const;
/// ExecuteCompilation - Execute the compilation according to the command line
/// arguments and return an appropriate exit code.
///
/// This routine handles additional processing that must be done in addition
/// to just running the subprocesses, for example reporting errors, removing
/// temporary files, etc.
int ExecuteCompilation(const Compilation &C,
SmallVectorImpl< std::pair<int, const Command *> > &FailingCommands) const;
/// generateCompilationDiagnostics - Generate diagnostics information
/// including preprocessed source file(s).
///
void generateCompilationDiagnostics(Compilation &C,
const Command *FailingCommand);
/// @}
/// @name Helper Methods
/// @{
/// PrintActions - Print the list of actions.
void PrintActions(const Compilation &C) const;
/// PrintHelp - Print the help text.
///
/// \param ShowHidden - Show hidden options.
void PrintHelp(bool ShowHidden) const;
/// PrintVersion - Print the driver version.
void PrintVersion(const Compilation &C, raw_ostream &OS) const;
/// GetFilePath - Lookup \p Name in the list of file search paths.
///
/// \param TC - The tool chain for additional information on
/// directories to search.
//
// FIXME: This should be in CompilationInfo.
std::string GetFilePath(const char *Name, const ToolChain &TC) const;
/// GetProgramPath - Lookup \p Name in the list of program search paths.
///
/// \param TC - The provided tool chain for additional information on
/// directories to search.
//
// FIXME: This should be in CompilationInfo.
std::string GetProgramPath(const char *Name, const ToolChain &TC) const;
/// HandleImmediateArgs - Handle any arguments which should be
/// treated before building actions or binding tools.
///
/// \return Whether any compilation should be built for this
/// invocation.
bool HandleImmediateArgs(const Compilation &C);
/// ConstructAction - Construct the appropriate action to do for
/// \p Phase on the \p Input, taking in to account arguments
/// like -fsyntax-only or --analyze.
Action *ConstructPhaseAction(const llvm::opt::ArgList &Args, phases::ID Phase,
Action *Input) const;
/// BuildJobsForAction - Construct the jobs to perform for the
/// action \p A.
void BuildJobsForAction(Compilation &C,
const Action *A,
const ToolChain *TC,
const char *BoundArch,
bool AtTopLevel,
bool MultipleArchs,
const char *LinkingOutput,
InputInfo &Result) const;
/// GetNamedOutputPath - Return the name to use for the output of
/// the action \p JA. The result is appended to the compilation's
/// list of temporary or result files, as appropriate.
///
/// \param C - The compilation.
/// \param JA - The action of interest.
/// \param BaseInput - The original input file that this action was
/// triggered by.
/// \param BoundArch - The bound architecture.
/// \param AtTopLevel - Whether this is a "top-level" action.
/// \param MultipleArchs - Whether multiple -arch options were supplied.
const char *GetNamedOutputPath(Compilation &C,
const JobAction &JA,
const char *BaseInput,
const char *BoundArch,
bool AtTopLevel,
bool MultipleArchs) const;
/// GetTemporaryPath - Return the pathname of a temporary file to use
/// as part of compilation; the file will have the given prefix and suffix.
///
/// GCC goes to extra lengths here to be a bit more robust.
std::string GetTemporaryPath(StringRef Prefix, const char *Suffix) const;
/// ShouldUseClangCompiler - Should the clang compiler be used to
/// handle this action.
bool ShouldUseClangCompiler(const JobAction &JA) const;
bool IsUsingLTO(const llvm::opt::ArgList &Args) const;
private:
/// \brief Retrieves a ToolChain for a particular target triple.
///
/// Will cache ToolChains for the life of the driver object, and create them
/// on-demand.
const ToolChain &getToolChain(const llvm::opt::ArgList &Args,
StringRef DarwinArchName = "") const;
/// @}
/// \brief Get bitmasks for which option flags to include and exclude based on
/// the driver mode.
std::pair<unsigned, unsigned> getIncludeExcludeOptionFlagMasks() const;
public:
/// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and
/// return the grouped values as integers. Numbers which are not
/// provided are set to 0.
///
/// \return True if the entire string was parsed (9.2), or all
/// groups were parsed (10.3.5extrastuff). HadExtra is true if all
/// groups were parsed but extra characters remain at the end.
static bool GetReleaseVersion(const char *Str, unsigned &Major,
unsigned &Minor, unsigned &Micro,
bool &HadExtra);
};
+/// \return True if the last defined optimization level is -Ofast.
+/// And False otherwise.
+bool isOptimizationLevelFast(const llvm::opt::ArgList &Args);
+
} // end namespace driver
} // end namespace clang
#endif
Index: head/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h
===================================================================
--- head/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h (revision 265924)
+++ head/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h (revision 265925)
@@ -1,315 +1,315 @@
//===--- ToolChain.h - Collections of tools for one platform ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef CLANG_DRIVER_TOOLCHAIN_H_
#define CLANG_DRIVER_TOOLCHAIN_H_
#include "clang/Driver/Action.h"
#include "clang/Driver/Types.h"
#include "clang/Driver/Util.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Path.h"
#include <string>
namespace llvm {
namespace opt {
class ArgList;
class DerivedArgList;
class InputArgList;
}
}
namespace clang {
class ObjCRuntime;
namespace driver {
class Compilation;
class Driver;
class JobAction;
class SanitizerArgs;
class Tool;
/// ToolChain - Access to tools for a single platform.
class ToolChain {
public:
typedef SmallVector<std::string, 4> path_list;
enum CXXStdlibType {
CST_Libcxx,
CST_Libstdcxx
};
enum RuntimeLibType {
RLT_CompilerRT,
RLT_Libgcc
};
private:
const Driver &D;
const llvm::Triple Triple;
const llvm::opt::ArgList &Args;
/// The list of toolchain specific path prefixes to search for
/// files.
path_list FilePaths;
/// The list of toolchain specific path prefixes to search for
/// programs.
path_list ProgramPaths;
mutable OwningPtr<Tool> Clang;
mutable OwningPtr<Tool> Assemble;
mutable OwningPtr<Tool> Link;
Tool *getClang() const;
Tool *getAssemble() const;
Tool *getLink() const;
Tool *getClangAs() const;
mutable OwningPtr<SanitizerArgs> SanitizerArguments;
protected:
ToolChain(const Driver &D, const llvm::Triple &T,
const llvm::opt::ArgList &Args);
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
virtual Tool *getTool(Action::ActionClass AC) const;
/// \name Utilities for implementing subclasses.
///@{
static void addSystemInclude(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
const Twine &Path);
static void addExternCSystemInclude(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
const Twine &Path);
static void
addExternCSystemIncludeIfExists(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
const Twine &Path);
static void addSystemIncludes(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
ArrayRef<StringRef> Paths);
///@}
public:
virtual ~ToolChain();
// Accessors
const Driver &getDriver() const;
const llvm::Triple &getTriple() const { return Triple; }
llvm::Triple::ArchType getArch() const { return Triple.getArch(); }
StringRef getArchName() const { return Triple.getArchName(); }
StringRef getPlatform() const { return Triple.getVendorName(); }
StringRef getOS() const { return Triple.getOSName(); }
/// \brief Provide the default architecture name (as expected by -arch) for
/// this toolchain. Note t
std::string getDefaultUniversalArchName() const;
std::string getTripleString() const {
return Triple.getTriple();
}
path_list &getFilePaths() { return FilePaths; }
const path_list &getFilePaths() const { return FilePaths; }
path_list &getProgramPaths() { return ProgramPaths; }
const path_list &getProgramPaths() const { return ProgramPaths; }
const SanitizerArgs& getSanitizerArgs() const;
// Tool access.
/// TranslateArgs - Create a new derived argument list for any argument
/// translations this ToolChain may wish to perform, or 0 if no tool chain
/// specific translations are needed.
///
/// \param BoundArch - The bound architecture name, or 0.
virtual llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args,
const char *BoundArch) const {
return 0;
}
/// Choose a tool to use to handle the action \p JA.
Tool *SelectTool(const JobAction &JA) const;
// Helper methods
std::string GetFilePath(const char *Name) const;
std::string GetProgramPath(const char *Name) const;
/// \brief Dispatch to the specific toolchain for verbose printing.
///
/// This is used when handling the verbose option to print detailed,
/// toolchain-specific information useful for understanding the behavior of
/// the driver on a specific platform.
virtual void printVerboseInfo(raw_ostream &OS) const {};
// Platform defaults information
/// HasNativeLTOLinker - Check whether the linker and related tools have
/// native LLVM support.
virtual bool HasNativeLLVMSupport() const;
/// LookupTypeForExtension - Return the default language type to use for the
/// given extension.
virtual types::ID LookupTypeForExtension(const char *Ext) const;
/// IsBlocksDefault - Does this tool chain enable -fblocks by default.
virtual bool IsBlocksDefault() const { return false; }
/// IsIntegratedAssemblerDefault - Does this tool chain enable -integrated-as
/// by default.
virtual bool IsIntegratedAssemblerDefault() const { return false; }
/// \brief Check if the toolchain should use the integrated assembler.
bool useIntegratedAs() const;
/// IsMathErrnoDefault - Does this tool chain use -fmath-errno by default.
virtual bool IsMathErrnoDefault() const { return true; }
/// IsEncodeExtendedBlockSignatureDefault - Does this tool chain enable
/// -fencode-extended-block-signature by default.
virtual bool IsEncodeExtendedBlockSignatureDefault() const { return false; }
/// IsObjCNonFragileABIDefault - Does this tool chain set
/// -fobjc-nonfragile-abi by default.
virtual bool IsObjCNonFragileABIDefault() const { return false; }
/// UseObjCMixedDispatchDefault - When using non-legacy dispatch, should the
/// mixed dispatch method be used?
virtual bool UseObjCMixedDispatch() const { return false; }
/// GetDefaultStackProtectorLevel - Get the default stack protector level for
/// this tool chain (0=off, 1=on, 2=all).
virtual unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const {
return 0;
}
/// GetDefaultRuntimeLibType - Get the default runtime library variant to use.
virtual RuntimeLibType GetDefaultRuntimeLibType() const {
return ToolChain::RLT_Libgcc;
}
/// IsUnwindTablesDefault - Does this tool chain use -funwind-tables
/// by default.
virtual bool IsUnwindTablesDefault() const;
/// \brief Test whether this toolchain defaults to PIC.
virtual bool isPICDefault() const = 0;
/// \brief Test whether this toolchain defaults to PIE.
virtual bool isPIEDefault() const = 0;
/// \brief Tests whether this toolchain forces its default for PIC, PIE or
/// non-PIC. If this returns true, any PIC related flags should be ignored
/// and instead the results of \c isPICDefault() and \c isPIEDefault() are
/// used exclusively.
virtual bool isPICDefaultForced() const = 0;
/// SupportsProfiling - Does this tool chain support -pg.
virtual bool SupportsProfiling() const { return true; }
/// Does this tool chain support Objective-C garbage collection.
virtual bool SupportsObjCGC() const { return true; }
/// Complain if this tool chain doesn't support Objective-C ARC.
virtual void CheckObjCARC() const {}
/// UseDwarfDebugFlags - Embed the compile options to clang into the Dwarf
/// compile unit information.
virtual bool UseDwarfDebugFlags() const { return false; }
/// UseSjLjExceptions - Does this tool chain use SjLj exceptions.
virtual bool UseSjLjExceptions() const { return false; }
/// ComputeLLVMTriple - Return the LLVM target triple to use, after taking
/// command line arguments into account.
virtual std::string
ComputeLLVMTriple(const llvm::opt::ArgList &Args,
types::ID InputType = types::TY_INVALID) const;
/// ComputeEffectiveClangTriple - Return the Clang triple to use for this
/// target, which may take into account the command line arguments. For
/// example, on Darwin the -mmacosx-version-min= command line argument (which
/// sets the deployment target) determines the version in the triple passed to
/// Clang.
virtual std::string ComputeEffectiveClangTriple(
const llvm::opt::ArgList &Args,
types::ID InputType = types::TY_INVALID) const;
/// getDefaultObjCRuntime - Return the default Objective-C runtime
/// for this platform.
///
/// FIXME: this really belongs on some sort of DeploymentTarget abstraction
virtual ObjCRuntime getDefaultObjCRuntime(bool isNonFragile) const;
/// hasBlocksRuntime - Given that the user is compiling with
/// -fblocks, does this tool chain guarantee the existence of a
/// blocks runtime?
///
/// FIXME: this really belongs on some sort of DeploymentTarget abstraction
virtual bool hasBlocksRuntime() const { return true; }
/// \brief Add the clang cc1 arguments for system include paths.
///
/// This routine is responsible for adding the necessary cc1 arguments to
/// include headers from standard system header directories.
virtual void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
/// \brief Add options that need to be passed to cc1 for this target.
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
// GetRuntimeLibType - Determine the runtime library type to use with the
// given compilation arguments.
virtual RuntimeLibType
GetRuntimeLibType(const llvm::opt::ArgList &Args) const;
// GetCXXStdlibType - Determine the C++ standard library type to use with the
// given compilation arguments.
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const;
/// AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set
/// the include paths to use for the given C++ standard library type.
virtual void
AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
/// AddCXXStdlibLibArgs - Add the system specific linker arguments to use
/// for the given C++ standard library type.
virtual void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
/// AddCCKextLibArgs - Add the system specific linker arguments to use
/// for kernel extensions (Darwin-specific).
virtual void AddCCKextLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
/// AddFastMathRuntimeIfAvailable - If a runtime library exists that sets
/// global flags for unsafe floating point math, add it and return true.
///
- /// This checks for presence of the -ffast-math or -funsafe-math flags.
+ /// This checks for presence of the -Ofast, -ffast-math or -funsafe-math flags.
virtual bool
AddFastMathRuntimeIfAvailable(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
};
} // end namespace driver
} // end namespace clang
#endif
Index: head/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp (revision 265925)
@@ -1,2174 +1,2185 @@
//===--- ASTDumper.cpp - Dumping implementation for ASTs ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the AST dump methods, which dump out the
// AST in a form that exposes type details and other fields.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/CommentVisitor.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclLookups.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclVisitor.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
using namespace clang::comments;
//===----------------------------------------------------------------------===//
// ASTDumper Visitor
//===----------------------------------------------------------------------===//
namespace {
// Colors used for various parts of the AST dump
+ // Do not use bold yellow for any text. It is hard to read on white screens.
struct TerminalColor {
raw_ostream::Colors Color;
bool Bold;
};
+ // Red - CastColor
+ // Green - TypeColor
+ // Bold Green - DeclKindNameColor, UndeserializedColor
+ // Yellow - AddressColor, LocationColor
+ // Blue - CommentColor, NullColor, IndentColor
+ // Bold Blue - AttrColor
+ // Bold Magenta - StmtColor
+ // Cyan - ValueKindColor, ObjectKindColor
+ // Bold Cyan - ValueColor, DeclNameColor
+
// Decl kind names (VarDecl, FunctionDecl, etc)
static const TerminalColor DeclKindNameColor = { raw_ostream::GREEN, true };
// Attr names (CleanupAttr, GuardedByAttr, etc)
static const TerminalColor AttrColor = { raw_ostream::BLUE, true };
// Statement names (DeclStmt, ImplicitCastExpr, etc)
static const TerminalColor StmtColor = { raw_ostream::MAGENTA, true };
// Comment names (FullComment, ParagraphComment, TextComment, etc)
- static const TerminalColor CommentColor = { raw_ostream::YELLOW, true };
+ static const TerminalColor CommentColor = { raw_ostream::BLUE, false };
// Type names (int, float, etc, plus user defined types)
static const TerminalColor TypeColor = { raw_ostream::GREEN, false };
// Pointer address
static const TerminalColor AddressColor = { raw_ostream::YELLOW, false };
// Source locations
static const TerminalColor LocationColor = { raw_ostream::YELLOW, false };
// lvalue/xvalue
static const TerminalColor ValueKindColor = { raw_ostream::CYAN, false };
// bitfield/objcproperty/objcsubscript/vectorcomponent
static const TerminalColor ObjectKindColor = { raw_ostream::CYAN, false };
// Null statements
static const TerminalColor NullColor = { raw_ostream::BLUE, false };
// Undeserialized entities
static const TerminalColor UndeserializedColor = { raw_ostream::GREEN, true };
// CastKind from CastExpr's
static const TerminalColor CastColor = { raw_ostream::RED, false };
// Value of the statement
static const TerminalColor ValueColor = { raw_ostream::CYAN, true };
// Decl names
static const TerminalColor DeclNameColor = { raw_ostream::CYAN, true };
// Indents ( `, -. | )
static const TerminalColor IndentColor = { raw_ostream::BLUE, false };
class ASTDumper
: public ConstDeclVisitor<ASTDumper>, public ConstStmtVisitor<ASTDumper>,
public ConstCommentVisitor<ASTDumper> {
raw_ostream &OS;
const CommandTraits *Traits;
const SourceManager *SM;
bool IsFirstLine;
// Indicates whether more child are expected at the current tree depth
enum IndentType { IT_Child, IT_LastChild };
/// Indents[i] indicates if another child exists at level i.
/// Used by Indent() to print the tree structure.
llvm::SmallVector<IndentType, 32> Indents;
/// Indicates that more children will be needed at this indent level.
/// If true, prevents lastChild() from marking the node as the last child.
/// This is used when there are multiple collections of children to be
/// dumped as well as during conditional node dumping.
bool MoreChildren;
/// Keep track of the last location we print out so that we can
/// print out deltas from then on out.
const char *LastLocFilename;
unsigned LastLocLine;
/// The \c FullComment parent of the comment being dumped.
const FullComment *FC;
bool ShowColors;
class IndentScope {
ASTDumper &Dumper;
// Preserve the Dumper's MoreChildren value from the previous IndentScope
bool MoreChildren;
public:
IndentScope(ASTDumper &Dumper) : Dumper(Dumper) {
MoreChildren = Dumper.hasMoreChildren();
Dumper.setMoreChildren(false);
Dumper.indent();
}
~IndentScope() {
Dumper.setMoreChildren(MoreChildren);
Dumper.unindent();
}
};
class ColorScope {
ASTDumper &Dumper;
public:
ColorScope(ASTDumper &Dumper, TerminalColor Color)
: Dumper(Dumper) {
if (Dumper.ShowColors)
Dumper.OS.changeColor(Color.Color, Color.Bold);
}
~ColorScope() {
if (Dumper.ShowColors)
Dumper.OS.resetColor();
}
};
public:
ASTDumper(raw_ostream &OS, const CommandTraits *Traits,
const SourceManager *SM)
: OS(OS), Traits(Traits), SM(SM), IsFirstLine(true), MoreChildren(false),
LastLocFilename(""), LastLocLine(~0U), FC(0),
ShowColors(SM && SM->getDiagnostics().getShowColors()) { }
ASTDumper(raw_ostream &OS, const CommandTraits *Traits,
const SourceManager *SM, bool ShowColors)
: OS(OS), Traits(Traits), SM(SM), IsFirstLine(true), MoreChildren(false),
LastLocFilename(""), LastLocLine(~0U),
ShowColors(ShowColors) { }
~ASTDumper() {
OS << "\n";
}
void dumpDecl(const Decl *D);
void dumpStmt(const Stmt *S);
void dumpFullComment(const FullComment *C);
// Formatting
void indent();
void unindent();
void lastChild();
bool hasMoreChildren();
void setMoreChildren(bool Value);
// Utilities
void dumpPointer(const void *Ptr);
void dumpSourceRange(SourceRange R);
void dumpLocation(SourceLocation Loc);
void dumpBareType(QualType T);
void dumpType(QualType T);
void dumpBareDeclRef(const Decl *Node);
void dumpDeclRef(const Decl *Node, const char *Label = 0);
void dumpName(const NamedDecl *D);
bool hasNodes(const DeclContext *DC);
void dumpDeclContext(const DeclContext *DC);
void dumpLookups(const DeclContext *DC);
void dumpAttr(const Attr *A);
// C++ Utilities
void dumpAccessSpecifier(AccessSpecifier AS);
void dumpCXXCtorInitializer(const CXXCtorInitializer *Init);
void dumpTemplateParameters(const TemplateParameterList *TPL);
void dumpTemplateArgumentListInfo(const TemplateArgumentListInfo &TALI);
void dumpTemplateArgumentLoc(const TemplateArgumentLoc &A);
void dumpTemplateArgumentList(const TemplateArgumentList &TAL);
void dumpTemplateArgument(const TemplateArgument &A,
SourceRange R = SourceRange());
// Decls
void VisitLabelDecl(const LabelDecl *D);
void VisitTypedefDecl(const TypedefDecl *D);
void VisitEnumDecl(const EnumDecl *D);
void VisitRecordDecl(const RecordDecl *D);
void VisitEnumConstantDecl(const EnumConstantDecl *D);
void VisitIndirectFieldDecl(const IndirectFieldDecl *D);
void VisitFunctionDecl(const FunctionDecl *D);
void VisitFieldDecl(const FieldDecl *D);
void VisitVarDecl(const VarDecl *D);
void VisitFileScopeAsmDecl(const FileScopeAsmDecl *D);
void VisitImportDecl(const ImportDecl *D);
// C++ Decls
void VisitNamespaceDecl(const NamespaceDecl *D);
void VisitUsingDirectiveDecl(const UsingDirectiveDecl *D);
void VisitNamespaceAliasDecl(const NamespaceAliasDecl *D);
void VisitTypeAliasDecl(const TypeAliasDecl *D);
void VisitTypeAliasTemplateDecl(const TypeAliasTemplateDecl *D);
void VisitCXXRecordDecl(const CXXRecordDecl *D);
void VisitStaticAssertDecl(const StaticAssertDecl *D);
void VisitFunctionTemplateDecl(const FunctionTemplateDecl *D);
void VisitClassTemplateDecl(const ClassTemplateDecl *D);
void VisitClassTemplateSpecializationDecl(
const ClassTemplateSpecializationDecl *D);
void VisitClassTemplatePartialSpecializationDecl(
const ClassTemplatePartialSpecializationDecl *D);
void VisitClassScopeFunctionSpecializationDecl(
const ClassScopeFunctionSpecializationDecl *D);
void VisitVarTemplateDecl(const VarTemplateDecl *D);
void VisitVarTemplateSpecializationDecl(
const VarTemplateSpecializationDecl *D);
void VisitVarTemplatePartialSpecializationDecl(
const VarTemplatePartialSpecializationDecl *D);
void VisitTemplateTypeParmDecl(const TemplateTypeParmDecl *D);
void VisitNonTypeTemplateParmDecl(const NonTypeTemplateParmDecl *D);
void VisitTemplateTemplateParmDecl(const TemplateTemplateParmDecl *D);
void VisitUsingDecl(const UsingDecl *D);
void VisitUnresolvedUsingTypenameDecl(const UnresolvedUsingTypenameDecl *D);
void VisitUnresolvedUsingValueDecl(const UnresolvedUsingValueDecl *D);
void VisitUsingShadowDecl(const UsingShadowDecl *D);
void VisitLinkageSpecDecl(const LinkageSpecDecl *D);
void VisitAccessSpecDecl(const AccessSpecDecl *D);
void VisitFriendDecl(const FriendDecl *D);
// ObjC Decls
void VisitObjCIvarDecl(const ObjCIvarDecl *D);
void VisitObjCMethodDecl(const ObjCMethodDecl *D);
void VisitObjCCategoryDecl(const ObjCCategoryDecl *D);
void VisitObjCCategoryImplDecl(const ObjCCategoryImplDecl *D);
void VisitObjCProtocolDecl(const ObjCProtocolDecl *D);
void VisitObjCInterfaceDecl(const ObjCInterfaceDecl *D);
void VisitObjCImplementationDecl(const ObjCImplementationDecl *D);
void VisitObjCCompatibleAliasDecl(const ObjCCompatibleAliasDecl *D);
void VisitObjCPropertyDecl(const ObjCPropertyDecl *D);
void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D);
void VisitBlockDecl(const BlockDecl *D);
// Stmts.
void VisitStmt(const Stmt *Node);
void VisitDeclStmt(const DeclStmt *Node);
void VisitAttributedStmt(const AttributedStmt *Node);
void VisitLabelStmt(const LabelStmt *Node);
void VisitGotoStmt(const GotoStmt *Node);
void VisitCXXCatchStmt(const CXXCatchStmt *Node);
// Exprs
void VisitExpr(const Expr *Node);
void VisitCastExpr(const CastExpr *Node);
void VisitDeclRefExpr(const DeclRefExpr *Node);
void VisitPredefinedExpr(const PredefinedExpr *Node);
void VisitCharacterLiteral(const CharacterLiteral *Node);
void VisitIntegerLiteral(const IntegerLiteral *Node);
void VisitFloatingLiteral(const FloatingLiteral *Node);
void VisitStringLiteral(const StringLiteral *Str);
void VisitUnaryOperator(const UnaryOperator *Node);
void VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *Node);
void VisitMemberExpr(const MemberExpr *Node);
void VisitExtVectorElementExpr(const ExtVectorElementExpr *Node);
void VisitBinaryOperator(const BinaryOperator *Node);
void VisitCompoundAssignOperator(const CompoundAssignOperator *Node);
void VisitAddrLabelExpr(const AddrLabelExpr *Node);
void VisitBlockExpr(const BlockExpr *Node);
void VisitOpaqueValueExpr(const OpaqueValueExpr *Node);
// C++
void VisitCXXNamedCastExpr(const CXXNamedCastExpr *Node);
void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *Node);
void VisitCXXThisExpr(const CXXThisExpr *Node);
void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *Node);
void VisitCXXConstructExpr(const CXXConstructExpr *Node);
void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *Node);
void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *Node);
void VisitExprWithCleanups(const ExprWithCleanups *Node);
void VisitUnresolvedLookupExpr(const UnresolvedLookupExpr *Node);
void dumpCXXTemporary(const CXXTemporary *Temporary);
void VisitLambdaExpr(const LambdaExpr *Node) {
VisitExpr(Node);
dumpDecl(Node->getLambdaClass());
}
// ObjC
void VisitObjCAtCatchStmt(const ObjCAtCatchStmt *Node);
void VisitObjCEncodeExpr(const ObjCEncodeExpr *Node);
void VisitObjCMessageExpr(const ObjCMessageExpr *Node);
void VisitObjCBoxedExpr(const ObjCBoxedExpr *Node);
void VisitObjCSelectorExpr(const ObjCSelectorExpr *Node);
void VisitObjCProtocolExpr(const ObjCProtocolExpr *Node);
void VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *Node);
void VisitObjCSubscriptRefExpr(const ObjCSubscriptRefExpr *Node);
void VisitObjCIvarRefExpr(const ObjCIvarRefExpr *Node);
void VisitObjCBoolLiteralExpr(const ObjCBoolLiteralExpr *Node);
// Comments.
const char *getCommandName(unsigned CommandID);
void dumpComment(const Comment *C);
// Inline comments.
void visitTextComment(const TextComment *C);
void visitInlineCommandComment(const InlineCommandComment *C);
void visitHTMLStartTagComment(const HTMLStartTagComment *C);
void visitHTMLEndTagComment(const HTMLEndTagComment *C);
// Block comments.
void visitBlockCommandComment(const BlockCommandComment *C);
void visitParamCommandComment(const ParamCommandComment *C);
void visitTParamCommandComment(const TParamCommandComment *C);
void visitVerbatimBlockComment(const VerbatimBlockComment *C);
void visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C);
void visitVerbatimLineComment(const VerbatimLineComment *C);
};
}
//===----------------------------------------------------------------------===//
// Utilities
//===----------------------------------------------------------------------===//
// Print out the appropriate tree structure using the Indents vector.
// Example of tree and the Indents vector at each level.
// A { }
// |-B { IT_Child }
// | `-C { IT_Child, IT_LastChild }
// `-D { IT_LastChild }
// |-E { IT_LastChild, IT_Child }
// `-F { IT_LastChild, IT_LastChild }
// Type non-last element, last element
// IT_Child "| " "|-"
// IT_LastChild " " "`-"
void ASTDumper::indent() {
if (IsFirstLine)
IsFirstLine = false;
else
OS << "\n";
ColorScope Color(*this, IndentColor);
for (SmallVectorImpl<IndentType>::const_iterator I = Indents.begin(),
E = Indents.end();
I != E; ++I) {
switch (*I) {
case IT_Child:
if (I == E - 1)
OS << "|-";
else
OS << "| ";
continue;
case IT_LastChild:
if (I == E - 1)
OS << "`-";
else
OS << " ";
continue;
}
llvm_unreachable("Invalid IndentType");
}
Indents.push_back(IT_Child);
}
void ASTDumper::unindent() {
Indents.pop_back();
}
// Call before each potential last child node is to be dumped. If MoreChildren
// is false, then this is the last child, otherwise treat as a regular node.
void ASTDumper::lastChild() {
if (!hasMoreChildren())
Indents.back() = IT_LastChild;
}
// MoreChildren should be set before calling another function that may print
// additional nodes to prevent conflicting final child nodes.
bool ASTDumper::hasMoreChildren() {
return MoreChildren;
}
void ASTDumper::setMoreChildren(bool Value) {
MoreChildren = Value;
}
void ASTDumper::dumpPointer(const void *Ptr) {
ColorScope Color(*this, AddressColor);
OS << ' ' << Ptr;
}
void ASTDumper::dumpLocation(SourceLocation Loc) {
ColorScope Color(*this, LocationColor);
SourceLocation SpellingLoc = SM->getSpellingLoc(Loc);
// The general format we print out is filename:line:col, but we drop pieces
// that haven't changed since the last loc printed.
PresumedLoc PLoc = SM->getPresumedLoc(SpellingLoc);
if (PLoc.isInvalid()) {
OS << "<invalid sloc>";
return;
}
if (strcmp(PLoc.getFilename(), LastLocFilename) != 0) {
OS << PLoc.getFilename() << ':' << PLoc.getLine()
<< ':' << PLoc.getColumn();
LastLocFilename = PLoc.getFilename();
LastLocLine = PLoc.getLine();
} else if (PLoc.getLine() != LastLocLine) {
OS << "line" << ':' << PLoc.getLine()
<< ':' << PLoc.getColumn();
LastLocLine = PLoc.getLine();
} else {
OS << "col" << ':' << PLoc.getColumn();
}
}
void ASTDumper::dumpSourceRange(SourceRange R) {
// Can't translate locations if a SourceManager isn't available.
if (!SM)
return;
OS << " <";
dumpLocation(R.getBegin());
if (R.getBegin() != R.getEnd()) {
OS << ", ";
dumpLocation(R.getEnd());
}
OS << ">";
// <t2.c:123:421[blah], t2.c:412:321>
}
void ASTDumper::dumpBareType(QualType T) {
ColorScope Color(*this, TypeColor);
SplitQualType T_split = T.split();
OS << "'" << QualType::getAsString(T_split) << "'";
if (!T.isNull()) {
// If the type is sugared, also dump a (shallow) desugared type.
SplitQualType D_split = T.getSplitDesugaredType();
if (T_split != D_split)
OS << ":'" << QualType::getAsString(D_split) << "'";
}
}
void ASTDumper::dumpType(QualType T) {
OS << ' ';
dumpBareType(T);
}
void ASTDumper::dumpBareDeclRef(const Decl *D) {
{
ColorScope Color(*this, DeclKindNameColor);
OS << D->getDeclKindName();
}
dumpPointer(D);
if (const NamedDecl *ND = dyn_cast<NamedDecl>(D)) {
ColorScope Color(*this, DeclNameColor);
OS << " '" << ND->getDeclName() << '\'';
}
if (const ValueDecl *VD = dyn_cast<ValueDecl>(D))
dumpType(VD->getType());
}
void ASTDumper::dumpDeclRef(const Decl *D, const char *Label) {
if (!D)
return;
IndentScope Indent(*this);
if (Label)
OS << Label << ' ';
dumpBareDeclRef(D);
}
void ASTDumper::dumpName(const NamedDecl *ND) {
if (ND->getDeclName()) {
ColorScope Color(*this, DeclNameColor);
OS << ' ' << ND->getNameAsString();
}
}
bool ASTDumper::hasNodes(const DeclContext *DC) {
if (!DC)
return false;
return DC->hasExternalLexicalStorage() ||
DC->noload_decls_begin() != DC->noload_decls_end();
}
void ASTDumper::dumpDeclContext(const DeclContext *DC) {
if (!DC)
return;
bool HasUndeserializedDecls = DC->hasExternalLexicalStorage();
for (DeclContext::decl_iterator I = DC->noload_decls_begin(),
E = DC->noload_decls_end();
I != E; ++I) {
DeclContext::decl_iterator Next = I;
++Next;
if (Next == E && !HasUndeserializedDecls)
lastChild();
dumpDecl(*I);
}
if (HasUndeserializedDecls) {
lastChild();
IndentScope Indent(*this);
ColorScope Color(*this, UndeserializedColor);
OS << "<undeserialized declarations>";
}
}
void ASTDumper::dumpLookups(const DeclContext *DC) {
IndentScope Indent(*this);
OS << "StoredDeclsMap ";
dumpBareDeclRef(cast<Decl>(DC));
const DeclContext *Primary = DC->getPrimaryContext();
if (Primary != DC) {
OS << " primary";
dumpPointer(cast<Decl>(Primary));
}
bool HasUndeserializedLookups = Primary->hasExternalVisibleStorage();
DeclContext::all_lookups_iterator I = Primary->noload_lookups_begin(),
E = Primary->noload_lookups_end();
while (I != E) {
DeclarationName Name = I.getLookupName();
DeclContextLookupResult R = *I++;
if (I == E && !HasUndeserializedLookups)
lastChild();
IndentScope Indent(*this);
OS << "DeclarationName ";
{
ColorScope Color(*this, DeclNameColor);
OS << '\'' << Name << '\'';
}
for (DeclContextLookupResult::iterator RI = R.begin(), RE = R.end();
RI != RE; ++RI) {
if (RI + 1 == RE)
lastChild();
dumpDeclRef(*RI);
if ((*RI)->isHidden())
OS << " hidden";
}
}
if (HasUndeserializedLookups) {
lastChild();
IndentScope Indent(*this);
ColorScope Color(*this, UndeserializedColor);
OS << "<undeserialized lookups>";
}
}
void ASTDumper::dumpAttr(const Attr *A) {
IndentScope Indent(*this);
{
ColorScope Color(*this, AttrColor);
switch (A->getKind()) {
#define ATTR(X) case attr::X: OS << #X; break;
#include "clang/Basic/AttrList.inc"
default: llvm_unreachable("unexpected attribute kind");
}
OS << "Attr";
}
dumpPointer(A);
dumpSourceRange(A->getRange());
#include "clang/AST/AttrDump.inc"
}
static void dumpPreviousDeclImpl(raw_ostream &OS, ...) {}
template<typename T>
static void dumpPreviousDeclImpl(raw_ostream &OS, const Mergeable<T> *D) {
const T *First = D->getFirstDecl();
if (First != D)
OS << " first " << First;
}
template<typename T>
static void dumpPreviousDeclImpl(raw_ostream &OS, const Redeclarable<T> *D) {
const T *Prev = D->getPreviousDecl();
if (Prev)
OS << " prev " << Prev;
}
/// Dump the previous declaration in the redeclaration chain for a declaration,
/// if any.
static void dumpPreviousDecl(raw_ostream &OS, const Decl *D) {
switch (D->getKind()) {
#define DECL(DERIVED, BASE) \
case Decl::DERIVED: \
return dumpPreviousDeclImpl(OS, cast<DERIVED##Decl>(D));
#define ABSTRACT_DECL(DECL)
#include "clang/AST/DeclNodes.inc"
}
llvm_unreachable("Decl that isn't part of DeclNodes.inc!");
}
//===----------------------------------------------------------------------===//
// C++ Utilities
//===----------------------------------------------------------------------===//
void ASTDumper::dumpAccessSpecifier(AccessSpecifier AS) {
switch (AS) {
case AS_none:
break;
case AS_public:
OS << "public";
break;
case AS_protected:
OS << "protected";
break;
case AS_private:
OS << "private";
break;
}
}
void ASTDumper::dumpCXXCtorInitializer(const CXXCtorInitializer *Init) {
IndentScope Indent(*this);
OS << "CXXCtorInitializer";
if (Init->isAnyMemberInitializer()) {
OS << ' ';
dumpBareDeclRef(Init->getAnyMember());
} else {
dumpType(QualType(Init->getBaseClass(), 0));
}
dumpStmt(Init->getInit());
}
void ASTDumper::dumpTemplateParameters(const TemplateParameterList *TPL) {
if (!TPL)
return;
for (TemplateParameterList::const_iterator I = TPL->begin(), E = TPL->end();
I != E; ++I)
dumpDecl(*I);
}
void ASTDumper::dumpTemplateArgumentListInfo(
const TemplateArgumentListInfo &TALI) {
for (unsigned i = 0, e = TALI.size(); i < e; ++i) {
if (i + 1 == e)
lastChild();
dumpTemplateArgumentLoc(TALI[i]);
}
}
void ASTDumper::dumpTemplateArgumentLoc(const TemplateArgumentLoc &A) {
dumpTemplateArgument(A.getArgument(), A.getSourceRange());
}
void ASTDumper::dumpTemplateArgumentList(const TemplateArgumentList &TAL) {
for (unsigned i = 0, e = TAL.size(); i < e; ++i)
dumpTemplateArgument(TAL[i]);
}
void ASTDumper::dumpTemplateArgument(const TemplateArgument &A, SourceRange R) {
IndentScope Indent(*this);
OS << "TemplateArgument";
if (R.isValid())
dumpSourceRange(R);
switch (A.getKind()) {
case TemplateArgument::Null:
OS << " null";
break;
case TemplateArgument::Type:
OS << " type";
lastChild();
dumpType(A.getAsType());
break;
case TemplateArgument::Declaration:
OS << " decl";
lastChild();
dumpDeclRef(A.getAsDecl());
break;
case TemplateArgument::NullPtr:
OS << " nullptr";
break;
case TemplateArgument::Integral:
OS << " integral " << A.getAsIntegral();
break;
case TemplateArgument::Template:
OS << " template ";
A.getAsTemplate().dump(OS);
break;
case TemplateArgument::TemplateExpansion:
OS << " template expansion";
A.getAsTemplateOrTemplatePattern().dump(OS);
break;
case TemplateArgument::Expression:
OS << " expr";
lastChild();
dumpStmt(A.getAsExpr());
break;
case TemplateArgument::Pack:
OS << " pack";
for (TemplateArgument::pack_iterator I = A.pack_begin(), E = A.pack_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpTemplateArgument(*I);
}
break;
}
}
//===----------------------------------------------------------------------===//
// Decl dumping methods.
//===----------------------------------------------------------------------===//
void ASTDumper::dumpDecl(const Decl *D) {
IndentScope Indent(*this);
if (!D) {
ColorScope Color(*this, NullColor);
OS << "<<<NULL>>>";
return;
}
{
ColorScope Color(*this, DeclKindNameColor);
OS << D->getDeclKindName() << "Decl";
}
dumpPointer(D);
if (D->getLexicalDeclContext() != D->getDeclContext())
OS << " parent " << cast<Decl>(D->getDeclContext());
dumpPreviousDecl(OS, D);
dumpSourceRange(D->getSourceRange());
if (Module *M = D->getOwningModule())
OS << " in " << M->getFullModuleName();
if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
if (ND->isHidden())
OS << " hidden";
bool HasAttrs = D->attr_begin() != D->attr_end();
const FullComment *Comment =
D->getASTContext().getLocalCommentForDeclUncached(D);
// Decls within functions are visited by the body
bool HasDeclContext = !isa<FunctionDecl>(*D) && !isa<ObjCMethodDecl>(*D) &&
hasNodes(dyn_cast<DeclContext>(D));
setMoreChildren(HasAttrs || Comment || HasDeclContext);
ConstDeclVisitor<ASTDumper>::Visit(D);
setMoreChildren(Comment || HasDeclContext);
for (Decl::attr_iterator I = D->attr_begin(), E = D->attr_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpAttr(*I);
}
setMoreChildren(HasDeclContext);
lastChild();
dumpFullComment(Comment);
if (D->isInvalidDecl())
OS << " invalid";
setMoreChildren(false);
if (HasDeclContext)
dumpDeclContext(cast<DeclContext>(D));
}
void ASTDumper::VisitLabelDecl(const LabelDecl *D) {
dumpName(D);
}
void ASTDumper::VisitTypedefDecl(const TypedefDecl *D) {
dumpName(D);
dumpType(D->getUnderlyingType());
if (D->isModulePrivate())
OS << " __module_private__";
}
void ASTDumper::VisitEnumDecl(const EnumDecl *D) {
if (D->isScoped()) {
if (D->isScopedUsingClassTag())
OS << " class";
else
OS << " struct";
}
dumpName(D);
if (D->isModulePrivate())
OS << " __module_private__";
if (D->isFixed())
dumpType(D->getIntegerType());
}
void ASTDumper::VisitRecordDecl(const RecordDecl *D) {
OS << ' ' << D->getKindName();
dumpName(D);
if (D->isModulePrivate())
OS << " __module_private__";
if (D->isCompleteDefinition())
OS << " definition";
}
void ASTDumper::VisitEnumConstantDecl(const EnumConstantDecl *D) {
dumpName(D);
dumpType(D->getType());
if (const Expr *Init = D->getInitExpr()) {
lastChild();
dumpStmt(Init);
}
}
void ASTDumper::VisitIndirectFieldDecl(const IndirectFieldDecl *D) {
dumpName(D);
dumpType(D->getType());
for (IndirectFieldDecl::chain_iterator I = D->chain_begin(),
E = D->chain_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpDeclRef(*I);
}
}
void ASTDumper::VisitFunctionDecl(const FunctionDecl *D) {
dumpName(D);
dumpType(D->getType());
StorageClass SC = D->getStorageClass();
if (SC != SC_None)
OS << ' ' << VarDecl::getStorageClassSpecifierString(SC);
if (D->isInlineSpecified())
OS << " inline";
if (D->isVirtualAsWritten())
OS << " virtual";
if (D->isModulePrivate())
OS << " __module_private__";
if (D->isPure())
OS << " pure";
else if (D->isDeletedAsWritten())
OS << " delete";
if (const FunctionProtoType *FPT = D->getType()->getAs<FunctionProtoType>()) {
FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
switch (EPI.ExceptionSpecType) {
default: break;
case EST_Unevaluated:
OS << " noexcept-unevaluated " << EPI.ExceptionSpecDecl;
break;
case EST_Uninstantiated:
OS << " noexcept-uninstantiated " << EPI.ExceptionSpecTemplate;
break;
}
}
bool OldMoreChildren = hasMoreChildren();
const FunctionTemplateSpecializationInfo *FTSI =
D->getTemplateSpecializationInfo();
bool HasTemplateSpecialization = FTSI;
bool HasNamedDecls = D->getDeclsInPrototypeScope().begin() !=
D->getDeclsInPrototypeScope().end();
bool HasFunctionDecls = D->param_begin() != D->param_end();
const CXXConstructorDecl *C = dyn_cast<CXXConstructorDecl>(D);
bool HasCtorInitializers = C && C->init_begin() != C->init_end();
bool HasDeclarationBody = D->doesThisDeclarationHaveABody();
setMoreChildren(OldMoreChildren || HasNamedDecls || HasFunctionDecls ||
HasCtorInitializers || HasDeclarationBody);
if (HasTemplateSpecialization) {
lastChild();
dumpTemplateArgumentList(*FTSI->TemplateArguments);
}
setMoreChildren(OldMoreChildren || HasFunctionDecls ||
HasCtorInitializers || HasDeclarationBody);
for (ArrayRef<NamedDecl *>::iterator
I = D->getDeclsInPrototypeScope().begin(),
E = D->getDeclsInPrototypeScope().end(); I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpDecl(*I);
}
setMoreChildren(OldMoreChildren || HasCtorInitializers || HasDeclarationBody);
for (FunctionDecl::param_const_iterator I = D->param_begin(),
E = D->param_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpDecl(*I);
}
setMoreChildren(OldMoreChildren || HasDeclarationBody);
if (HasCtorInitializers)
for (CXXConstructorDecl::init_const_iterator I = C->init_begin(),
E = C->init_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpCXXCtorInitializer(*I);
}
setMoreChildren(OldMoreChildren);
if (HasDeclarationBody) {
lastChild();
dumpStmt(D->getBody());
}
}
void ASTDumper::VisitFieldDecl(const FieldDecl *D) {
dumpName(D);
dumpType(D->getType());
if (D->isMutable())
OS << " mutable";
if (D->isModulePrivate())
OS << " __module_private__";
bool OldMoreChildren = hasMoreChildren();
bool IsBitField = D->isBitField();
Expr *Init = D->getInClassInitializer();
bool HasInit = Init;
setMoreChildren(OldMoreChildren || HasInit);
if (IsBitField) {
lastChild();
dumpStmt(D->getBitWidth());
}
setMoreChildren(OldMoreChildren);
if (HasInit) {
lastChild();
dumpStmt(Init);
}
}
void ASTDumper::VisitVarDecl(const VarDecl *D) {
dumpName(D);
dumpType(D->getType());
StorageClass SC = D->getStorageClass();
if (SC != SC_None)
OS << ' ' << VarDecl::getStorageClassSpecifierString(SC);
switch (D->getTLSKind()) {
case VarDecl::TLS_None: break;
case VarDecl::TLS_Static: OS << " tls"; break;
case VarDecl::TLS_Dynamic: OS << " tls_dynamic"; break;
}
if (D->isModulePrivate())
OS << " __module_private__";
if (D->isNRVOVariable())
OS << " nrvo";
if (D->hasInit()) {
lastChild();
dumpStmt(D->getInit());
}
}
void ASTDumper::VisitFileScopeAsmDecl(const FileScopeAsmDecl *D) {
lastChild();
dumpStmt(D->getAsmString());
}
void ASTDumper::VisitImportDecl(const ImportDecl *D) {
OS << ' ' << D->getImportedModule()->getFullModuleName();
}
//===----------------------------------------------------------------------===//
// C++ Declarations
//===----------------------------------------------------------------------===//
void ASTDumper::VisitNamespaceDecl(const NamespaceDecl *D) {
dumpName(D);
if (D->isInline())
OS << " inline";
if (!D->isOriginalNamespace())
dumpDeclRef(D->getOriginalNamespace(), "original");
}
void ASTDumper::VisitUsingDirectiveDecl(const UsingDirectiveDecl *D) {
OS << ' ';
dumpBareDeclRef(D->getNominatedNamespace());
}
void ASTDumper::VisitNamespaceAliasDecl(const NamespaceAliasDecl *D) {
dumpName(D);
dumpDeclRef(D->getAliasedNamespace());
}
void ASTDumper::VisitTypeAliasDecl(const TypeAliasDecl *D) {
dumpName(D);
dumpType(D->getUnderlyingType());
}
void ASTDumper::VisitTypeAliasTemplateDecl(const TypeAliasTemplateDecl *D) {
dumpName(D);
dumpTemplateParameters(D->getTemplateParameters());
dumpDecl(D->getTemplatedDecl());
}
void ASTDumper::VisitCXXRecordDecl(const CXXRecordDecl *D) {
VisitRecordDecl(D);
if (!D->isCompleteDefinition())
return;
for (CXXRecordDecl::base_class_const_iterator I = D->bases_begin(),
E = D->bases_end();
I != E; ++I) {
IndentScope Indent(*this);
if (I->isVirtual())
OS << "virtual ";
dumpAccessSpecifier(I->getAccessSpecifier());
dumpType(I->getType());
if (I->isPackExpansion())
OS << "...";
}
}
void ASTDumper::VisitStaticAssertDecl(const StaticAssertDecl *D) {
dumpStmt(D->getAssertExpr());
lastChild();
dumpStmt(D->getMessage());
}
void ASTDumper::VisitFunctionTemplateDecl(const FunctionTemplateDecl *D) {
dumpName(D);
dumpTemplateParameters(D->getTemplateParameters());
dumpDecl(D->getTemplatedDecl());
for (FunctionTemplateDecl::spec_iterator I = D->spec_begin(),
E = D->spec_end();
I != E; ++I) {
FunctionTemplateDecl::spec_iterator Next = I;
++Next;
if (Next == E)
lastChild();
switch (I->getTemplateSpecializationKind()) {
case TSK_Undeclared:
case TSK_ImplicitInstantiation:
case TSK_ExplicitInstantiationDeclaration:
case TSK_ExplicitInstantiationDefinition:
if (D == D->getCanonicalDecl())
dumpDecl(*I);
else
dumpDeclRef(*I);
break;
case TSK_ExplicitSpecialization:
dumpDeclRef(*I);
break;
}
}
}
void ASTDumper::VisitClassTemplateDecl(const ClassTemplateDecl *D) {
dumpName(D);
dumpTemplateParameters(D->getTemplateParameters());
ClassTemplateDecl::spec_iterator I = D->spec_begin();
ClassTemplateDecl::spec_iterator E = D->spec_end();
if (I == E)
lastChild();
dumpDecl(D->getTemplatedDecl());
for (; I != E; ++I) {
ClassTemplateDecl::spec_iterator Next = I;
++Next;
if (Next == E)
lastChild();
switch (I->getTemplateSpecializationKind()) {
case TSK_Undeclared:
case TSK_ImplicitInstantiation:
if (D == D->getCanonicalDecl())
dumpDecl(*I);
else
dumpDeclRef(*I);
break;
case TSK_ExplicitSpecialization:
case TSK_ExplicitInstantiationDeclaration:
case TSK_ExplicitInstantiationDefinition:
dumpDeclRef(*I);
break;
}
}
}
void ASTDumper::VisitClassTemplateSpecializationDecl(
const ClassTemplateSpecializationDecl *D) {
VisitCXXRecordDecl(D);
dumpTemplateArgumentList(D->getTemplateArgs());
}
void ASTDumper::VisitClassTemplatePartialSpecializationDecl(
const ClassTemplatePartialSpecializationDecl *D) {
VisitClassTemplateSpecializationDecl(D);
dumpTemplateParameters(D->getTemplateParameters());
}
void ASTDumper::VisitClassScopeFunctionSpecializationDecl(
const ClassScopeFunctionSpecializationDecl *D) {
dumpDeclRef(D->getSpecialization());
if (D->hasExplicitTemplateArgs())
dumpTemplateArgumentListInfo(D->templateArgs());
}
void ASTDumper::VisitVarTemplateDecl(const VarTemplateDecl *D) {
dumpName(D);
dumpTemplateParameters(D->getTemplateParameters());
VarTemplateDecl::spec_iterator I = D->spec_begin();
VarTemplateDecl::spec_iterator E = D->spec_end();
if (I == E)
lastChild();
dumpDecl(D->getTemplatedDecl());
for (; I != E; ++I) {
VarTemplateDecl::spec_iterator Next = I;
++Next;
if (Next == E)
lastChild();
switch (I->getTemplateSpecializationKind()) {
case TSK_Undeclared:
case TSK_ImplicitInstantiation:
if (D == D->getCanonicalDecl())
dumpDecl(*I);
else
dumpDeclRef(*I);
break;
case TSK_ExplicitSpecialization:
case TSK_ExplicitInstantiationDeclaration:
case TSK_ExplicitInstantiationDefinition:
dumpDeclRef(*I);
break;
}
}
}
void ASTDumper::VisitVarTemplateSpecializationDecl(
const VarTemplateSpecializationDecl *D) {
dumpTemplateArgumentList(D->getTemplateArgs());
VisitVarDecl(D);
}
void ASTDumper::VisitVarTemplatePartialSpecializationDecl(
const VarTemplatePartialSpecializationDecl *D) {
dumpTemplateParameters(D->getTemplateParameters());
VisitVarTemplateSpecializationDecl(D);
}
void ASTDumper::VisitTemplateTypeParmDecl(const TemplateTypeParmDecl *D) {
if (D->wasDeclaredWithTypename())
OS << " typename";
else
OS << " class";
if (D->isParameterPack())
OS << " ...";
dumpName(D);
if (D->hasDefaultArgument())
dumpType(D->getDefaultArgument());
}
void ASTDumper::VisitNonTypeTemplateParmDecl(const NonTypeTemplateParmDecl *D) {
dumpType(D->getType());
if (D->isParameterPack())
OS << " ...";
dumpName(D);
if (D->hasDefaultArgument())
dumpStmt(D->getDefaultArgument());
}
void ASTDumper::VisitTemplateTemplateParmDecl(
const TemplateTemplateParmDecl *D) {
if (D->isParameterPack())
OS << " ...";
dumpName(D);
dumpTemplateParameters(D->getTemplateParameters());
if (D->hasDefaultArgument())
dumpTemplateArgumentLoc(D->getDefaultArgument());
}
void ASTDumper::VisitUsingDecl(const UsingDecl *D) {
OS << ' ';
D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy());
OS << D->getNameAsString();
}
void ASTDumper::VisitUnresolvedUsingTypenameDecl(
const UnresolvedUsingTypenameDecl *D) {
OS << ' ';
D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy());
OS << D->getNameAsString();
}
void ASTDumper::VisitUnresolvedUsingValueDecl(const UnresolvedUsingValueDecl *D) {
OS << ' ';
D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy());
OS << D->getNameAsString();
dumpType(D->getType());
}
void ASTDumper::VisitUsingShadowDecl(const UsingShadowDecl *D) {
OS << ' ';
dumpBareDeclRef(D->getTargetDecl());
}
void ASTDumper::VisitLinkageSpecDecl(const LinkageSpecDecl *D) {
switch (D->getLanguage()) {
case LinkageSpecDecl::lang_c: OS << " C"; break;
case LinkageSpecDecl::lang_cxx: OS << " C++"; break;
}
}
void ASTDumper::VisitAccessSpecDecl(const AccessSpecDecl *D) {
OS << ' ';
dumpAccessSpecifier(D->getAccess());
}
void ASTDumper::VisitFriendDecl(const FriendDecl *D) {
lastChild();
if (TypeSourceInfo *T = D->getFriendType())
dumpType(T->getType());
else
dumpDecl(D->getFriendDecl());
}
//===----------------------------------------------------------------------===//
// Obj-C Declarations
//===----------------------------------------------------------------------===//
void ASTDumper::VisitObjCIvarDecl(const ObjCIvarDecl *D) {
dumpName(D);
dumpType(D->getType());
if (D->getSynthesize())
OS << " synthesize";
if (D->getBackingIvarReferencedInAccessor())
OS << " BackingIvarReferencedInAccessor";
switch (D->getAccessControl()) {
case ObjCIvarDecl::None:
OS << " none";
break;
case ObjCIvarDecl::Private:
OS << " private";
break;
case ObjCIvarDecl::Protected:
OS << " protected";
break;
case ObjCIvarDecl::Public:
OS << " public";
break;
case ObjCIvarDecl::Package:
OS << " package";
break;
}
}
void ASTDumper::VisitObjCMethodDecl(const ObjCMethodDecl *D) {
if (D->isInstanceMethod())
OS << " -";
else
OS << " +";
dumpName(D);
dumpType(D->getResultType());
bool OldMoreChildren = hasMoreChildren();
bool IsVariadic = D->isVariadic();
bool HasBody = D->hasBody();
setMoreChildren(OldMoreChildren || IsVariadic || HasBody);
if (D->isThisDeclarationADefinition()) {
lastChild();
dumpDeclContext(D);
} else {
for (ObjCMethodDecl::param_const_iterator I = D->param_begin(),
E = D->param_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpDecl(*I);
}
}
setMoreChildren(OldMoreChildren || HasBody);
if (IsVariadic) {
lastChild();
IndentScope Indent(*this);
OS << "...";
}
setMoreChildren(OldMoreChildren);
if (HasBody) {
lastChild();
dumpStmt(D->getBody());
}
}
void ASTDumper::VisitObjCCategoryDecl(const ObjCCategoryDecl *D) {
dumpName(D);
dumpDeclRef(D->getClassInterface());
if (D->protocol_begin() == D->protocol_end())
lastChild();
dumpDeclRef(D->getImplementation());
for (ObjCCategoryDecl::protocol_iterator I = D->protocol_begin(),
E = D->protocol_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpDeclRef(*I);
}
}
void ASTDumper::VisitObjCCategoryImplDecl(const ObjCCategoryImplDecl *D) {
dumpName(D);
dumpDeclRef(D->getClassInterface());
lastChild();
dumpDeclRef(D->getCategoryDecl());
}
void ASTDumper::VisitObjCProtocolDecl(const ObjCProtocolDecl *D) {
dumpName(D);
for (ObjCProtocolDecl::protocol_iterator I = D->protocol_begin(),
E = D->protocol_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpDeclRef(*I);
}
}
void ASTDumper::VisitObjCInterfaceDecl(const ObjCInterfaceDecl *D) {
dumpName(D);
dumpDeclRef(D->getSuperClass(), "super");
if (D->protocol_begin() == D->protocol_end())
lastChild();
dumpDeclRef(D->getImplementation());
for (ObjCInterfaceDecl::protocol_iterator I = D->protocol_begin(),
E = D->protocol_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpDeclRef(*I);
}
}
void ASTDumper::VisitObjCImplementationDecl(const ObjCImplementationDecl *D) {
dumpName(D);
dumpDeclRef(D->getSuperClass(), "super");
if (D->init_begin() == D->init_end())
lastChild();
dumpDeclRef(D->getClassInterface());
for (ObjCImplementationDecl::init_const_iterator I = D->init_begin(),
E = D->init_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpCXXCtorInitializer(*I);
}
}
void ASTDumper::VisitObjCCompatibleAliasDecl(const ObjCCompatibleAliasDecl *D) {
dumpName(D);
lastChild();
dumpDeclRef(D->getClassInterface());
}
void ASTDumper::VisitObjCPropertyDecl(const ObjCPropertyDecl *D) {
dumpName(D);
dumpType(D->getType());
if (D->getPropertyImplementation() == ObjCPropertyDecl::Required)
OS << " required";
else if (D->getPropertyImplementation() == ObjCPropertyDecl::Optional)
OS << " optional";
ObjCPropertyDecl::PropertyAttributeKind Attrs = D->getPropertyAttributes();
if (Attrs != ObjCPropertyDecl::OBJC_PR_noattr) {
if (Attrs & ObjCPropertyDecl::OBJC_PR_readonly)
OS << " readonly";
if (Attrs & ObjCPropertyDecl::OBJC_PR_assign)
OS << " assign";
if (Attrs & ObjCPropertyDecl::OBJC_PR_readwrite)
OS << " readwrite";
if (Attrs & ObjCPropertyDecl::OBJC_PR_retain)
OS << " retain";
if (Attrs & ObjCPropertyDecl::OBJC_PR_copy)
OS << " copy";
if (Attrs & ObjCPropertyDecl::OBJC_PR_nonatomic)
OS << " nonatomic";
if (Attrs & ObjCPropertyDecl::OBJC_PR_atomic)
OS << " atomic";
if (Attrs & ObjCPropertyDecl::OBJC_PR_weak)
OS << " weak";
if (Attrs & ObjCPropertyDecl::OBJC_PR_strong)
OS << " strong";
if (Attrs & ObjCPropertyDecl::OBJC_PR_unsafe_unretained)
OS << " unsafe_unretained";
if (Attrs & ObjCPropertyDecl::OBJC_PR_getter) {
if (!(Attrs & ObjCPropertyDecl::OBJC_PR_setter))
lastChild();
dumpDeclRef(D->getGetterMethodDecl(), "getter");
}
if (Attrs & ObjCPropertyDecl::OBJC_PR_setter) {
lastChild();
dumpDeclRef(D->getSetterMethodDecl(), "setter");
}
}
}
void ASTDumper::VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D) {
dumpName(D->getPropertyDecl());
if (D->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize)
OS << " synthesize";
else
OS << " dynamic";
dumpDeclRef(D->getPropertyDecl());
lastChild();
dumpDeclRef(D->getPropertyIvarDecl());
}
void ASTDumper::VisitBlockDecl(const BlockDecl *D) {
for (BlockDecl::param_const_iterator I = D->param_begin(), E = D->param_end();
I != E; ++I)
dumpDecl(*I);
if (D->isVariadic()) {
IndentScope Indent(*this);
OS << "...";
}
if (D->capturesCXXThis()) {
IndentScope Indent(*this);
OS << "capture this";
}
for (BlockDecl::capture_iterator I = D->capture_begin(), E = D->capture_end();
I != E; ++I) {
IndentScope Indent(*this);
OS << "capture";
if (I->isByRef())
OS << " byref";
if (I->isNested())
OS << " nested";
if (I->getVariable()) {
OS << ' ';
dumpBareDeclRef(I->getVariable());
}
if (I->hasCopyExpr())
dumpStmt(I->getCopyExpr());
}
lastChild();
dumpStmt(D->getBody());
}
//===----------------------------------------------------------------------===//
// Stmt dumping methods.
//===----------------------------------------------------------------------===//
void ASTDumper::dumpStmt(const Stmt *S) {
IndentScope Indent(*this);
if (!S) {
ColorScope Color(*this, NullColor);
OS << "<<<NULL>>>";
return;
}
if (const DeclStmt *DS = dyn_cast<DeclStmt>(S)) {
VisitDeclStmt(DS);
return;
}
setMoreChildren(!S->children().empty());
ConstStmtVisitor<ASTDumper>::Visit(S);
setMoreChildren(false);
for (Stmt::const_child_range CI = S->children(); CI; ++CI) {
Stmt::const_child_range Next = CI;
++Next;
if (!Next)
lastChild();
dumpStmt(*CI);
}
}
void ASTDumper::VisitStmt(const Stmt *Node) {
{
ColorScope Color(*this, StmtColor);
OS << Node->getStmtClassName();
}
dumpPointer(Node);
dumpSourceRange(Node->getSourceRange());
}
void ASTDumper::VisitDeclStmt(const DeclStmt *Node) {
VisitStmt(Node);
for (DeclStmt::const_decl_iterator I = Node->decl_begin(),
E = Node->decl_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpDecl(*I);
}
}
void ASTDumper::VisitAttributedStmt(const AttributedStmt *Node) {
VisitStmt(Node);
for (ArrayRef<const Attr *>::iterator I = Node->getAttrs().begin(),
E = Node->getAttrs().end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpAttr(*I);
}
}
void ASTDumper::VisitLabelStmt(const LabelStmt *Node) {
VisitStmt(Node);
OS << " '" << Node->getName() << "'";
}
void ASTDumper::VisitGotoStmt(const GotoStmt *Node) {
VisitStmt(Node);
OS << " '" << Node->getLabel()->getName() << "'";
dumpPointer(Node->getLabel());
}
void ASTDumper::VisitCXXCatchStmt(const CXXCatchStmt *Node) {
VisitStmt(Node);
dumpDecl(Node->getExceptionDecl());
}
//===----------------------------------------------------------------------===//
// Expr dumping methods.
//===----------------------------------------------------------------------===//
void ASTDumper::VisitExpr(const Expr *Node) {
VisitStmt(Node);
dumpType(Node->getType());
{
ColorScope Color(*this, ValueKindColor);
switch (Node->getValueKind()) {
case VK_RValue:
break;
case VK_LValue:
OS << " lvalue";
break;
case VK_XValue:
OS << " xvalue";
break;
}
}
{
ColorScope Color(*this, ObjectKindColor);
switch (Node->getObjectKind()) {
case OK_Ordinary:
break;
case OK_BitField:
OS << " bitfield";
break;
case OK_ObjCProperty:
OS << " objcproperty";
break;
case OK_ObjCSubscript:
OS << " objcsubscript";
break;
case OK_VectorComponent:
OS << " vectorcomponent";
break;
}
}
}
static void dumpBasePath(raw_ostream &OS, const CastExpr *Node) {
if (Node->path_empty())
return;
OS << " (";
bool First = true;
for (CastExpr::path_const_iterator I = Node->path_begin(),
E = Node->path_end();
I != E; ++I) {
const CXXBaseSpecifier *Base = *I;
if (!First)
OS << " -> ";
const CXXRecordDecl *RD =
cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
if (Base->isVirtual())
OS << "virtual ";
OS << RD->getName();
First = false;
}
OS << ')';
}
void ASTDumper::VisitCastExpr(const CastExpr *Node) {
VisitExpr(Node);
OS << " <";
{
ColorScope Color(*this, CastColor);
OS << Node->getCastKindName();
}
dumpBasePath(OS, Node);
OS << ">";
}
void ASTDumper::VisitDeclRefExpr(const DeclRefExpr *Node) {
VisitExpr(Node);
OS << " ";
dumpBareDeclRef(Node->getDecl());
if (Node->getDecl() != Node->getFoundDecl()) {
OS << " (";
dumpBareDeclRef(Node->getFoundDecl());
OS << ")";
}
}
void ASTDumper::VisitUnresolvedLookupExpr(const UnresolvedLookupExpr *Node) {
VisitExpr(Node);
OS << " (";
if (!Node->requiresADL())
OS << "no ";
OS << "ADL) = '" << Node->getName() << '\'';
UnresolvedLookupExpr::decls_iterator
I = Node->decls_begin(), E = Node->decls_end();
if (I == E)
OS << " empty";
for (; I != E; ++I)
dumpPointer(*I);
}
void ASTDumper::VisitObjCIvarRefExpr(const ObjCIvarRefExpr *Node) {
VisitExpr(Node);
{
ColorScope Color(*this, DeclKindNameColor);
OS << " " << Node->getDecl()->getDeclKindName() << "Decl";
}
OS << "='" << *Node->getDecl() << "'";
dumpPointer(Node->getDecl());
if (Node->isFreeIvar())
OS << " isFreeIvar";
}
void ASTDumper::VisitPredefinedExpr(const PredefinedExpr *Node) {
VisitExpr(Node);
switch (Node->getIdentType()) {
default: llvm_unreachable("unknown case");
case PredefinedExpr::Func: OS << " __func__"; break;
case PredefinedExpr::Function: OS << " __FUNCTION__"; break;
case PredefinedExpr::FuncDName: OS << " __FUNCDNAME__"; break;
case PredefinedExpr::LFunction: OS << " L__FUNCTION__"; break;
case PredefinedExpr::PrettyFunction: OS << " __PRETTY_FUNCTION__";break;
}
}
void ASTDumper::VisitCharacterLiteral(const CharacterLiteral *Node) {
VisitExpr(Node);
ColorScope Color(*this, ValueColor);
OS << " " << Node->getValue();
}
void ASTDumper::VisitIntegerLiteral(const IntegerLiteral *Node) {
VisitExpr(Node);
bool isSigned = Node->getType()->isSignedIntegerType();
ColorScope Color(*this, ValueColor);
OS << " " << Node->getValue().toString(10, isSigned);
}
void ASTDumper::VisitFloatingLiteral(const FloatingLiteral *Node) {
VisitExpr(Node);
ColorScope Color(*this, ValueColor);
OS << " " << Node->getValueAsApproximateDouble();
}
void ASTDumper::VisitStringLiteral(const StringLiteral *Str) {
VisitExpr(Str);
ColorScope Color(*this, ValueColor);
OS << " ";
Str->outputString(OS);
}
void ASTDumper::VisitUnaryOperator(const UnaryOperator *Node) {
VisitExpr(Node);
OS << " " << (Node->isPostfix() ? "postfix" : "prefix")
<< " '" << UnaryOperator::getOpcodeStr(Node->getOpcode()) << "'";
}
void ASTDumper::VisitUnaryExprOrTypeTraitExpr(
const UnaryExprOrTypeTraitExpr *Node) {
VisitExpr(Node);
switch(Node->getKind()) {
case UETT_SizeOf:
OS << " sizeof";
break;
case UETT_AlignOf:
OS << " alignof";
break;
case UETT_VecStep:
OS << " vec_step";
break;
}
if (Node->isArgumentType())
dumpType(Node->getArgumentType());
}
void ASTDumper::VisitMemberExpr(const MemberExpr *Node) {
VisitExpr(Node);
OS << " " << (Node->isArrow() ? "->" : ".") << *Node->getMemberDecl();
dumpPointer(Node->getMemberDecl());
}
void ASTDumper::VisitExtVectorElementExpr(const ExtVectorElementExpr *Node) {
VisitExpr(Node);
OS << " " << Node->getAccessor().getNameStart();
}
void ASTDumper::VisitBinaryOperator(const BinaryOperator *Node) {
VisitExpr(Node);
OS << " '" << BinaryOperator::getOpcodeStr(Node->getOpcode()) << "'";
}
void ASTDumper::VisitCompoundAssignOperator(
const CompoundAssignOperator *Node) {
VisitExpr(Node);
OS << " '" << BinaryOperator::getOpcodeStr(Node->getOpcode())
<< "' ComputeLHSTy=";
dumpBareType(Node->getComputationLHSType());
OS << " ComputeResultTy=";
dumpBareType(Node->getComputationResultType());
}
void ASTDumper::VisitBlockExpr(const BlockExpr *Node) {
VisitExpr(Node);
dumpDecl(Node->getBlockDecl());
}
void ASTDumper::VisitOpaqueValueExpr(const OpaqueValueExpr *Node) {
VisitExpr(Node);
if (Expr *Source = Node->getSourceExpr()) {
lastChild();
dumpStmt(Source);
}
}
// GNU extensions.
void ASTDumper::VisitAddrLabelExpr(const AddrLabelExpr *Node) {
VisitExpr(Node);
OS << " " << Node->getLabel()->getName();
dumpPointer(Node->getLabel());
}
//===----------------------------------------------------------------------===//
// C++ Expressions
//===----------------------------------------------------------------------===//
void ASTDumper::VisitCXXNamedCastExpr(const CXXNamedCastExpr *Node) {
VisitExpr(Node);
OS << " " << Node->getCastName()
<< "<" << Node->getTypeAsWritten().getAsString() << ">"
<< " <" << Node->getCastKindName();
dumpBasePath(OS, Node);
OS << ">";
}
void ASTDumper::VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *Node) {
VisitExpr(Node);
OS << " " << (Node->getValue() ? "true" : "false");
}
void ASTDumper::VisitCXXThisExpr(const CXXThisExpr *Node) {
VisitExpr(Node);
OS << " this";
}
void ASTDumper::VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *Node) {
VisitExpr(Node);
OS << " functional cast to " << Node->getTypeAsWritten().getAsString()
<< " <" << Node->getCastKindName() << ">";
}
void ASTDumper::VisitCXXConstructExpr(const CXXConstructExpr *Node) {
VisitExpr(Node);
CXXConstructorDecl *Ctor = Node->getConstructor();
dumpType(Ctor->getType());
if (Node->isElidable())
OS << " elidable";
if (Node->requiresZeroInitialization())
OS << " zeroing";
}
void ASTDumper::VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *Node) {
VisitExpr(Node);
OS << " ";
dumpCXXTemporary(Node->getTemporary());
}
void
ASTDumper::VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *Node) {
VisitExpr(Node);
if (const ValueDecl *VD = Node->getExtendingDecl()) {
OS << " extended by ";
dumpBareDeclRef(VD);
}
}
void ASTDumper::VisitExprWithCleanups(const ExprWithCleanups *Node) {
VisitExpr(Node);
for (unsigned i = 0, e = Node->getNumObjects(); i != e; ++i)
dumpDeclRef(Node->getObject(i), "cleanup");
}
void ASTDumper::dumpCXXTemporary(const CXXTemporary *Temporary) {
OS << "(CXXTemporary";
dumpPointer(Temporary);
OS << ")";
}
//===----------------------------------------------------------------------===//
// Obj-C Expressions
//===----------------------------------------------------------------------===//
void ASTDumper::VisitObjCMessageExpr(const ObjCMessageExpr *Node) {
VisitExpr(Node);
OS << " selector=" << Node->getSelector().getAsString();
switch (Node->getReceiverKind()) {
case ObjCMessageExpr::Instance:
break;
case ObjCMessageExpr::Class:
OS << " class=";
dumpBareType(Node->getClassReceiver());
break;
case ObjCMessageExpr::SuperInstance:
OS << " super (instance)";
break;
case ObjCMessageExpr::SuperClass:
OS << " super (class)";
break;
}
}
void ASTDumper::VisitObjCBoxedExpr(const ObjCBoxedExpr *Node) {
VisitExpr(Node);
OS << " selector=" << Node->getBoxingMethod()->getSelector().getAsString();
}
void ASTDumper::VisitObjCAtCatchStmt(const ObjCAtCatchStmt *Node) {
VisitStmt(Node);
if (const VarDecl *CatchParam = Node->getCatchParamDecl())
dumpDecl(CatchParam);
else
OS << " catch all";
}
void ASTDumper::VisitObjCEncodeExpr(const ObjCEncodeExpr *Node) {
VisitExpr(Node);
dumpType(Node->getEncodedType());
}
void ASTDumper::VisitObjCSelectorExpr(const ObjCSelectorExpr *Node) {
VisitExpr(Node);
OS << " " << Node->getSelector().getAsString();
}
void ASTDumper::VisitObjCProtocolExpr(const ObjCProtocolExpr *Node) {
VisitExpr(Node);
OS << ' ' << *Node->getProtocol();
}
void ASTDumper::VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *Node) {
VisitExpr(Node);
if (Node->isImplicitProperty()) {
OS << " Kind=MethodRef Getter=\"";
if (Node->getImplicitPropertyGetter())
OS << Node->getImplicitPropertyGetter()->getSelector().getAsString();
else
OS << "(null)";
OS << "\" Setter=\"";
if (ObjCMethodDecl *Setter = Node->getImplicitPropertySetter())
OS << Setter->getSelector().getAsString();
else
OS << "(null)";
OS << "\"";
} else {
OS << " Kind=PropertyRef Property=\"" << *Node->getExplicitProperty() <<'"';
}
if (Node->isSuperReceiver())
OS << " super";
OS << " Messaging=";
if (Node->isMessagingGetter() && Node->isMessagingSetter())
OS << "Getter&Setter";
else if (Node->isMessagingGetter())
OS << "Getter";
else if (Node->isMessagingSetter())
OS << "Setter";
}
void ASTDumper::VisitObjCSubscriptRefExpr(const ObjCSubscriptRefExpr *Node) {
VisitExpr(Node);
if (Node->isArraySubscriptRefExpr())
OS << " Kind=ArraySubscript GetterForArray=\"";
else
OS << " Kind=DictionarySubscript GetterForDictionary=\"";
if (Node->getAtIndexMethodDecl())
OS << Node->getAtIndexMethodDecl()->getSelector().getAsString();
else
OS << "(null)";
if (Node->isArraySubscriptRefExpr())
OS << "\" SetterForArray=\"";
else
OS << "\" SetterForDictionary=\"";
if (Node->setAtIndexMethodDecl())
OS << Node->setAtIndexMethodDecl()->getSelector().getAsString();
else
OS << "(null)";
}
void ASTDumper::VisitObjCBoolLiteralExpr(const ObjCBoolLiteralExpr *Node) {
VisitExpr(Node);
OS << " " << (Node->getValue() ? "__objc_yes" : "__objc_no");
}
//===----------------------------------------------------------------------===//
// Comments
//===----------------------------------------------------------------------===//
const char *ASTDumper::getCommandName(unsigned CommandID) {
if (Traits)
return Traits->getCommandInfo(CommandID)->Name;
const CommandInfo *Info = CommandTraits::getBuiltinCommandInfo(CommandID);
if (Info)
return Info->Name;
return "<not a builtin command>";
}
void ASTDumper::dumpFullComment(const FullComment *C) {
if (!C)
return;
FC = C;
dumpComment(C);
FC = 0;
}
void ASTDumper::dumpComment(const Comment *C) {
IndentScope Indent(*this);
if (!C) {
ColorScope Color(*this, NullColor);
OS << "<<<NULL>>>";
return;
}
{
ColorScope Color(*this, CommentColor);
OS << C->getCommentKindName();
}
dumpPointer(C);
dumpSourceRange(C->getSourceRange());
ConstCommentVisitor<ASTDumper>::visit(C);
for (Comment::child_iterator I = C->child_begin(), E = C->child_end();
I != E; ++I) {
if (I + 1 == E)
lastChild();
dumpComment(*I);
}
}
void ASTDumper::visitTextComment(const TextComment *C) {
OS << " Text=\"" << C->getText() << "\"";
}
void ASTDumper::visitInlineCommandComment(const InlineCommandComment *C) {
OS << " Name=\"" << getCommandName(C->getCommandID()) << "\"";
switch (C->getRenderKind()) {
case InlineCommandComment::RenderNormal:
OS << " RenderNormal";
break;
case InlineCommandComment::RenderBold:
OS << " RenderBold";
break;
case InlineCommandComment::RenderMonospaced:
OS << " RenderMonospaced";
break;
case InlineCommandComment::RenderEmphasized:
OS << " RenderEmphasized";
break;
}
for (unsigned i = 0, e = C->getNumArgs(); i != e; ++i)
OS << " Arg[" << i << "]=\"" << C->getArgText(i) << "\"";
}
void ASTDumper::visitHTMLStartTagComment(const HTMLStartTagComment *C) {
OS << " Name=\"" << C->getTagName() << "\"";
if (C->getNumAttrs() != 0) {
OS << " Attrs: ";
for (unsigned i = 0, e = C->getNumAttrs(); i != e; ++i) {
const HTMLStartTagComment::Attribute &Attr = C->getAttr(i);
OS << " \"" << Attr.Name << "=\"" << Attr.Value << "\"";
}
}
if (C->isSelfClosing())
OS << " SelfClosing";
}
void ASTDumper::visitHTMLEndTagComment(const HTMLEndTagComment *C) {
OS << " Name=\"" << C->getTagName() << "\"";
}
void ASTDumper::visitBlockCommandComment(const BlockCommandComment *C) {
OS << " Name=\"" << getCommandName(C->getCommandID()) << "\"";
for (unsigned i = 0, e = C->getNumArgs(); i != e; ++i)
OS << " Arg[" << i << "]=\"" << C->getArgText(i) << "\"";
}
void ASTDumper::visitParamCommandComment(const ParamCommandComment *C) {
OS << " " << ParamCommandComment::getDirectionAsString(C->getDirection());
if (C->isDirectionExplicit())
OS << " explicitly";
else
OS << " implicitly";
if (C->hasParamName()) {
if (C->isParamIndexValid())
OS << " Param=\"" << C->getParamName(FC) << "\"";
else
OS << " Param=\"" << C->getParamNameAsWritten() << "\"";
}
if (C->isParamIndexValid())
OS << " ParamIndex=" << C->getParamIndex();
}
void ASTDumper::visitTParamCommandComment(const TParamCommandComment *C) {
if (C->hasParamName()) {
if (C->isPositionValid())
OS << " Param=\"" << C->getParamName(FC) << "\"";
else
OS << " Param=\"" << C->getParamNameAsWritten() << "\"";
}
if (C->isPositionValid()) {
OS << " Position=<";
for (unsigned i = 0, e = C->getDepth(); i != e; ++i) {
OS << C->getIndex(i);
if (i != e - 1)
OS << ", ";
}
OS << ">";
}
}
void ASTDumper::visitVerbatimBlockComment(const VerbatimBlockComment *C) {
OS << " Name=\"" << getCommandName(C->getCommandID()) << "\""
" CloseName=\"" << C->getCloseName() << "\"";
}
void ASTDumper::visitVerbatimBlockLineComment(
const VerbatimBlockLineComment *C) {
OS << " Text=\"" << C->getText() << "\"";
}
void ASTDumper::visitVerbatimLineComment(const VerbatimLineComment *C) {
OS << " Text=\"" << C->getText() << "\"";
}
//===----------------------------------------------------------------------===//
// Decl method implementations
//===----------------------------------------------------------------------===//
void Decl::dump() const {
dump(llvm::errs());
}
void Decl::dump(raw_ostream &OS) const {
ASTDumper P(OS, &getASTContext().getCommentCommandTraits(),
&getASTContext().getSourceManager());
P.dumpDecl(this);
}
void Decl::dumpColor() const {
ASTDumper P(llvm::errs(), &getASTContext().getCommentCommandTraits(),
&getASTContext().getSourceManager(), /*ShowColors*/true);
P.dumpDecl(this);
}
void DeclContext::dumpLookups() const {
dumpLookups(llvm::errs());
}
void DeclContext::dumpLookups(raw_ostream &OS) const {
const DeclContext *DC = this;
while (!DC->isTranslationUnit())
DC = DC->getParent();
ASTContext &Ctx = cast<TranslationUnitDecl>(DC)->getASTContext();
ASTDumper P(OS, &Ctx.getCommentCommandTraits(), &Ctx.getSourceManager());
P.dumpLookups(this);
}
//===----------------------------------------------------------------------===//
// Stmt method implementations
//===----------------------------------------------------------------------===//
void Stmt::dump(SourceManager &SM) const {
dump(llvm::errs(), SM);
}
void Stmt::dump(raw_ostream &OS, SourceManager &SM) const {
ASTDumper P(OS, 0, &SM);
P.dumpStmt(this);
}
void Stmt::dump() const {
ASTDumper P(llvm::errs(), 0, 0);
P.dumpStmt(this);
}
void Stmt::dumpColor() const {
ASTDumper P(llvm::errs(), 0, 0, /*ShowColors*/true);
P.dumpStmt(this);
}
//===----------------------------------------------------------------------===//
// Comment method implementations
//===----------------------------------------------------------------------===//
void Comment::dump() const {
dump(llvm::errs(), 0, 0);
}
void Comment::dump(const ASTContext &Context) const {
dump(llvm::errs(), &Context.getCommentCommandTraits(),
&Context.getSourceManager());
}
void Comment::dump(raw_ostream &OS, const CommandTraits *Traits,
const SourceManager *SM) const {
const FullComment *FC = dyn_cast<FullComment>(this);
ASTDumper D(OS, Traits, SM);
D.dumpFullComment(FC);
}
void Comment::dumpColor() const {
const FullComment *FC = dyn_cast<FullComment>(this);
ASTDumper D(llvm::errs(), 0, 0, /*ShowColors*/true);
D.dumpFullComment(FC);
}
Index: head/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp (revision 265925)
@@ -1,8707 +1,8696 @@
//===--- ExprConstant.cpp - Expression Constant Evaluator -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Expr constant evaluator.
//
// Constant expression evaluation produces four main results:
//
// * A success/failure flag indicating whether constant folding was successful.
// This is the 'bool' return value used by most of the code in this file. A
// 'false' return value indicates that constant folding has failed, and any
// appropriate diagnostic has already been produced.
//
// * An evaluated result, valid only if constant folding has not failed.
//
// * A flag indicating if evaluation encountered (unevaluated) side-effects.
// These arise in cases such as (sideEffect(), 0) and (sideEffect() || 1),
// where it is possible to determine the evaluated result regardless.
//
// * A set of notes indicating why the evaluation was not a constant expression
// (under the C++11 / C++1y rules only, at the moment), or, if folding failed
// too, why the expression could not be folded.
//
// If we are checking for a potential constant expression, failure to constant
// fold a potential constant sub-expression will be indicated by a 'false'
// return value (the expression could not be folded) and no diagnostic (the
// expression is not necessarily non-constant).
//
//===----------------------------------------------------------------------===//
#include "clang/AST/APValue.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTDiagnostic.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/raw_ostream.h"
#include <cstring>
#include <functional>
using namespace clang;
using llvm::APSInt;
using llvm::APFloat;
static bool IsGlobalLValue(APValue::LValueBase B);
namespace {
struct LValue;
struct CallStackFrame;
struct EvalInfo;
static QualType getType(APValue::LValueBase B) {
if (!B) return QualType();
if (const ValueDecl *D = B.dyn_cast<const ValueDecl*>())
return D->getType();
const Expr *Base = B.get<const Expr*>();
// For a materialized temporary, the type of the temporary we materialized
// may not be the type of the expression.
if (const MaterializeTemporaryExpr *MTE =
dyn_cast<MaterializeTemporaryExpr>(Base)) {
SmallVector<const Expr *, 2> CommaLHSs;
SmallVector<SubobjectAdjustment, 2> Adjustments;
const Expr *Temp = MTE->GetTemporaryExpr();
const Expr *Inner = Temp->skipRValueSubobjectAdjustments(CommaLHSs,
Adjustments);
// Keep any cv-qualifiers from the reference if we generated a temporary
// for it.
if (Inner != Temp)
return Inner->getType();
}
return Base->getType();
}
/// Get an LValue path entry, which is known to not be an array index, as a
/// field or base class.
static
APValue::BaseOrMemberType getAsBaseOrMember(APValue::LValuePathEntry E) {
APValue::BaseOrMemberType Value;
Value.setFromOpaqueValue(E.BaseOrMember);
return Value;
}
/// Get an LValue path entry, which is known to not be an array index, as a
/// field declaration.
static const FieldDecl *getAsField(APValue::LValuePathEntry E) {
return dyn_cast<FieldDecl>(getAsBaseOrMember(E).getPointer());
}
/// Get an LValue path entry, which is known to not be an array index, as a
/// base class declaration.
static const CXXRecordDecl *getAsBaseClass(APValue::LValuePathEntry E) {
return dyn_cast<CXXRecordDecl>(getAsBaseOrMember(E).getPointer());
}
/// Determine whether this LValue path entry for a base class names a virtual
/// base class.
static bool isVirtualBaseClass(APValue::LValuePathEntry E) {
return getAsBaseOrMember(E).getInt();
}
/// Find the path length and type of the most-derived subobject in the given
/// path, and find the size of the containing array, if any.
static
unsigned findMostDerivedSubobject(ASTContext &Ctx, QualType Base,
ArrayRef<APValue::LValuePathEntry> Path,
uint64_t &ArraySize, QualType &Type) {
unsigned MostDerivedLength = 0;
Type = Base;
for (unsigned I = 0, N = Path.size(); I != N; ++I) {
if (Type->isArrayType()) {
const ConstantArrayType *CAT =
cast<ConstantArrayType>(Ctx.getAsArrayType(Type));
Type = CAT->getElementType();
ArraySize = CAT->getSize().getZExtValue();
MostDerivedLength = I + 1;
} else if (Type->isAnyComplexType()) {
const ComplexType *CT = Type->castAs<ComplexType>();
Type = CT->getElementType();
ArraySize = 2;
MostDerivedLength = I + 1;
} else if (const FieldDecl *FD = getAsField(Path[I])) {
Type = FD->getType();
ArraySize = 0;
MostDerivedLength = I + 1;
} else {
// Path[I] describes a base class.
ArraySize = 0;
}
}
return MostDerivedLength;
}
// The order of this enum is important for diagnostics.
enum CheckSubobjectKind {
CSK_Base, CSK_Derived, CSK_Field, CSK_ArrayToPointer, CSK_ArrayIndex,
CSK_This, CSK_Real, CSK_Imag
};
/// A path from a glvalue to a subobject of that glvalue.
struct SubobjectDesignator {
/// True if the subobject was named in a manner not supported by C++11. Such
/// lvalues can still be folded, but they are not core constant expressions
/// and we cannot perform lvalue-to-rvalue conversions on them.
bool Invalid : 1;
/// Is this a pointer one past the end of an object?
bool IsOnePastTheEnd : 1;
/// The length of the path to the most-derived object of which this is a
/// subobject.
unsigned MostDerivedPathLength : 30;
/// The size of the array of which the most-derived object is an element, or
/// 0 if the most-derived object is not an array element.
uint64_t MostDerivedArraySize;
/// The type of the most derived object referred to by this address.
QualType MostDerivedType;
typedef APValue::LValuePathEntry PathEntry;
/// The entries on the path from the glvalue to the designated subobject.
SmallVector<PathEntry, 8> Entries;
SubobjectDesignator() : Invalid(true) {}
explicit SubobjectDesignator(QualType T)
: Invalid(false), IsOnePastTheEnd(false), MostDerivedPathLength(0),
MostDerivedArraySize(0), MostDerivedType(T) {}
SubobjectDesignator(ASTContext &Ctx, const APValue &V)
: Invalid(!V.isLValue() || !V.hasLValuePath()), IsOnePastTheEnd(false),
MostDerivedPathLength(0), MostDerivedArraySize(0) {
if (!Invalid) {
IsOnePastTheEnd = V.isLValueOnePastTheEnd();
ArrayRef<PathEntry> VEntries = V.getLValuePath();
Entries.insert(Entries.end(), VEntries.begin(), VEntries.end());
if (V.getLValueBase())
MostDerivedPathLength =
findMostDerivedSubobject(Ctx, getType(V.getLValueBase()),
V.getLValuePath(), MostDerivedArraySize,
MostDerivedType);
}
}
void setInvalid() {
Invalid = true;
Entries.clear();
}
/// Determine whether this is a one-past-the-end pointer.
bool isOnePastTheEnd() const {
if (IsOnePastTheEnd)
return true;
if (MostDerivedArraySize &&
Entries[MostDerivedPathLength - 1].ArrayIndex == MostDerivedArraySize)
return true;
return false;
}
/// Check that this refers to a valid subobject.
bool isValidSubobject() const {
if (Invalid)
return false;
return !isOnePastTheEnd();
}
/// Check that this refers to a valid subobject, and if not, produce a
/// relevant diagnostic and set the designator as invalid.
bool checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK);
/// Update this designator to refer to the first element within this array.
void addArrayUnchecked(const ConstantArrayType *CAT) {
PathEntry Entry;
Entry.ArrayIndex = 0;
Entries.push_back(Entry);
// This is a most-derived object.
MostDerivedType = CAT->getElementType();
MostDerivedArraySize = CAT->getSize().getZExtValue();
MostDerivedPathLength = Entries.size();
}
/// Update this designator to refer to the given base or member of this
/// object.
void addDeclUnchecked(const Decl *D, bool Virtual = false) {
PathEntry Entry;
APValue::BaseOrMemberType Value(D, Virtual);
Entry.BaseOrMember = Value.getOpaqueValue();
Entries.push_back(Entry);
// If this isn't a base class, it's a new most-derived object.
if (const FieldDecl *FD = dyn_cast<FieldDecl>(D)) {
MostDerivedType = FD->getType();
MostDerivedArraySize = 0;
MostDerivedPathLength = Entries.size();
}
}
/// Update this designator to refer to the given complex component.
void addComplexUnchecked(QualType EltTy, bool Imag) {
PathEntry Entry;
Entry.ArrayIndex = Imag;
Entries.push_back(Entry);
// This is technically a most-derived object, though in practice this
// is unlikely to matter.
MostDerivedType = EltTy;
MostDerivedArraySize = 2;
MostDerivedPathLength = Entries.size();
}
void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, uint64_t N);
/// Add N to the address of this subobject.
void adjustIndex(EvalInfo &Info, const Expr *E, uint64_t N) {
if (Invalid) return;
if (MostDerivedPathLength == Entries.size() && MostDerivedArraySize) {
Entries.back().ArrayIndex += N;
if (Entries.back().ArrayIndex > MostDerivedArraySize) {
diagnosePointerArithmetic(Info, E, Entries.back().ArrayIndex);
setInvalid();
}
return;
}
// [expr.add]p4: For the purposes of these operators, a pointer to a
// nonarray object behaves the same as a pointer to the first element of
// an array of length one with the type of the object as its element type.
if (IsOnePastTheEnd && N == (uint64_t)-1)
IsOnePastTheEnd = false;
else if (!IsOnePastTheEnd && N == 1)
IsOnePastTheEnd = true;
else if (N != 0) {
diagnosePointerArithmetic(Info, E, uint64_t(IsOnePastTheEnd) + N);
setInvalid();
}
}
};
/// A stack frame in the constexpr call stack.
struct CallStackFrame {
EvalInfo &Info;
/// Parent - The caller of this stack frame.
CallStackFrame *Caller;
/// CallLoc - The location of the call expression for this call.
SourceLocation CallLoc;
/// Callee - The function which was called.
const FunctionDecl *Callee;
/// Index - The call index of this call.
unsigned Index;
/// This - The binding for the this pointer in this call, if any.
const LValue *This;
/// Arguments - Parameter bindings for this function call, indexed by
/// parameters' function scope indices.
APValue *Arguments;
// Note that we intentionally use std::map here so that references to
// values are stable.
typedef std::map<const void*, APValue> MapTy;
typedef MapTy::const_iterator temp_iterator;
/// Temporaries - Temporary lvalues materialized within this stack frame.
MapTy Temporaries;
CallStackFrame(EvalInfo &Info, SourceLocation CallLoc,
const FunctionDecl *Callee, const LValue *This,
APValue *Arguments);
~CallStackFrame();
APValue *getTemporary(const void *Key) {
MapTy::iterator I = Temporaries.find(Key);
return I == Temporaries.end() ? 0 : &I->second;
}
APValue &createTemporary(const void *Key, bool IsLifetimeExtended);
};
/// Temporarily override 'this'.
class ThisOverrideRAII {
public:
ThisOverrideRAII(CallStackFrame &Frame, const LValue *NewThis, bool Enable)
: Frame(Frame), OldThis(Frame.This) {
if (Enable)
Frame.This = NewThis;
}
~ThisOverrideRAII() {
Frame.This = OldThis;
}
private:
CallStackFrame &Frame;
const LValue *OldThis;
};
/// A partial diagnostic which we might know in advance that we are not going
/// to emit.
class OptionalDiagnostic {
PartialDiagnostic *Diag;
public:
explicit OptionalDiagnostic(PartialDiagnostic *Diag = 0) : Diag(Diag) {}
template<typename T>
OptionalDiagnostic &operator<<(const T &v) {
if (Diag)
*Diag << v;
return *this;
}
OptionalDiagnostic &operator<<(const APSInt &I) {
if (Diag) {
SmallVector<char, 32> Buffer;
I.toString(Buffer);
*Diag << StringRef(Buffer.data(), Buffer.size());
}
return *this;
}
OptionalDiagnostic &operator<<(const APFloat &F) {
if (Diag) {
// FIXME: Force the precision of the source value down so we don't
// print digits which are usually useless (we don't really care here if
// we truncate a digit by accident in edge cases). Ideally,
// APFloat::toString would automatically print the shortest
// representation which rounds to the correct value, but it's a bit
// tricky to implement.
unsigned precision =
llvm::APFloat::semanticsPrecision(F.getSemantics());
precision = (precision * 59 + 195) / 196;
SmallVector<char, 32> Buffer;
F.toString(Buffer, precision);
*Diag << StringRef(Buffer.data(), Buffer.size());
}
return *this;
}
};
/// A cleanup, and a flag indicating whether it is lifetime-extended.
class Cleanup {
llvm::PointerIntPair<APValue*, 1, bool> Value;
public:
Cleanup(APValue *Val, bool IsLifetimeExtended)
: Value(Val, IsLifetimeExtended) {}
bool isLifetimeExtended() const { return Value.getInt(); }
void endLifetime() {
*Value.getPointer() = APValue();
}
};
/// EvalInfo - This is a private struct used by the evaluator to capture
/// information about a subexpression as it is folded. It retains information
/// about the AST context, but also maintains information about the folded
/// expression.
///
/// If an expression could be evaluated, it is still possible it is not a C
/// "integer constant expression" or constant expression. If not, this struct
/// captures information about how and why not.
///
/// One bit of information passed *into* the request for constant folding
/// indicates whether the subexpression is "evaluated" or not according to C
/// rules. For example, the RHS of (0 && foo()) is not evaluated. We can
/// evaluate the expression regardless of what the RHS is, but C only allows
/// certain things in certain situations.
struct EvalInfo {
ASTContext &Ctx;
/// EvalStatus - Contains information about the evaluation.
Expr::EvalStatus &EvalStatus;
/// CurrentCall - The top of the constexpr call stack.
CallStackFrame *CurrentCall;
/// CallStackDepth - The number of calls in the call stack right now.
unsigned CallStackDepth;
/// NextCallIndex - The next call index to assign.
unsigned NextCallIndex;
/// StepsLeft - The remaining number of evaluation steps we're permitted
/// to perform. This is essentially a limit for the number of statements
/// we will evaluate.
unsigned StepsLeft;
/// BottomFrame - The frame in which evaluation started. This must be
/// initialized after CurrentCall and CallStackDepth.
CallStackFrame BottomFrame;
/// A stack of values whose lifetimes end at the end of some surrounding
/// evaluation frame.
llvm::SmallVector<Cleanup, 16> CleanupStack;
/// EvaluatingDecl - This is the declaration whose initializer is being
/// evaluated, if any.
APValue::LValueBase EvaluatingDecl;
/// EvaluatingDeclValue - This is the value being constructed for the
/// declaration whose initializer is being evaluated, if any.
APValue *EvaluatingDeclValue;
/// HasActiveDiagnostic - Was the previous diagnostic stored? If so, further
/// notes attached to it will also be stored, otherwise they will not be.
bool HasActiveDiagnostic;
enum EvaluationMode {
/// Evaluate as a constant expression. Stop if we find that the expression
/// is not a constant expression.
EM_ConstantExpression,
/// Evaluate as a potential constant expression. Keep going if we hit a
/// construct that we can't evaluate yet (because we don't yet know the
/// value of something) but stop if we hit something that could never be
/// a constant expression.
EM_PotentialConstantExpression,
/// Fold the expression to a constant. Stop if we hit a side-effect that
/// we can't model.
EM_ConstantFold,
/// Evaluate the expression looking for integer overflow and similar
/// issues. Don't worry about side-effects, and try to visit all
/// subexpressions.
EM_EvaluateForOverflow,
/// Evaluate in any way we know how. Don't worry about side-effects that
/// can't be modeled.
EM_IgnoreSideEffects
} EvalMode;
/// Are we checking whether the expression is a potential constant
/// expression?
bool checkingPotentialConstantExpression() const {
return EvalMode == EM_PotentialConstantExpression;
}
/// Are we checking an expression for overflow?
// FIXME: We should check for any kind of undefined or suspicious behavior
// in such constructs, not just overflow.
bool checkingForOverflow() { return EvalMode == EM_EvaluateForOverflow; }
EvalInfo(const ASTContext &C, Expr::EvalStatus &S, EvaluationMode Mode)
: Ctx(const_cast<ASTContext&>(C)), EvalStatus(S), CurrentCall(0),
CallStackDepth(0), NextCallIndex(1),
StepsLeft(getLangOpts().ConstexprStepLimit),
BottomFrame(*this, SourceLocation(), 0, 0, 0),
EvaluatingDecl((const ValueDecl*)0), EvaluatingDeclValue(0),
HasActiveDiagnostic(false), EvalMode(Mode) {}
void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value) {
EvaluatingDecl = Base;
EvaluatingDeclValue = &Value;
}
const LangOptions &getLangOpts() const { return Ctx.getLangOpts(); }
bool CheckCallLimit(SourceLocation Loc) {
// Don't perform any constexpr calls (other than the call we're checking)
// when checking a potential constant expression.
if (checkingPotentialConstantExpression() && CallStackDepth > 1)
return false;
if (NextCallIndex == 0) {
// NextCallIndex has wrapped around.
Diag(Loc, diag::note_constexpr_call_limit_exceeded);
return false;
}
if (CallStackDepth <= getLangOpts().ConstexprCallDepth)
return true;
Diag(Loc, diag::note_constexpr_depth_limit_exceeded)
<< getLangOpts().ConstexprCallDepth;
return false;
}
CallStackFrame *getCallFrame(unsigned CallIndex) {
assert(CallIndex && "no call index in getCallFrame");
// We will eventually hit BottomFrame, which has Index 1, so Frame can't
// be null in this loop.
CallStackFrame *Frame = CurrentCall;
while (Frame->Index > CallIndex)
Frame = Frame->Caller;
return (Frame->Index == CallIndex) ? Frame : 0;
}
bool nextStep(const Stmt *S) {
if (!StepsLeft) {
Diag(S->getLocStart(), diag::note_constexpr_step_limit_exceeded);
return false;
}
--StepsLeft;
return true;
}
private:
/// Add a diagnostic to the diagnostics list.
PartialDiagnostic &addDiag(SourceLocation Loc, diag::kind DiagId) {
PartialDiagnostic PD(DiagId, Ctx.getDiagAllocator());
EvalStatus.Diag->push_back(std::make_pair(Loc, PD));
return EvalStatus.Diag->back().second;
}
/// Add notes containing a call stack to the current point of evaluation.
void addCallStack(unsigned Limit);
public:
/// Diagnose that the evaluation cannot be folded.
OptionalDiagnostic Diag(SourceLocation Loc, diag::kind DiagId
= diag::note_invalid_subexpr_in_const_expr,
unsigned ExtraNotes = 0) {
if (EvalStatus.Diag) {
// If we have a prior diagnostic, it will be noting that the expression
// isn't a constant expression. This diagnostic is more important,
// unless we require this evaluation to produce a constant expression.
//
// FIXME: We might want to show both diagnostics to the user in
// EM_ConstantFold mode.
if (!EvalStatus.Diag->empty()) {
switch (EvalMode) {
case EM_ConstantFold:
case EM_IgnoreSideEffects:
case EM_EvaluateForOverflow:
if (!EvalStatus.HasSideEffects)
break;
// We've had side-effects; we want the diagnostic from them, not
// some later problem.
case EM_ConstantExpression:
case EM_PotentialConstantExpression:
HasActiveDiagnostic = false;
return OptionalDiagnostic();
}
}
unsigned CallStackNotes = CallStackDepth - 1;
unsigned Limit = Ctx.getDiagnostics().getConstexprBacktraceLimit();
if (Limit)
CallStackNotes = std::min(CallStackNotes, Limit + 1);
if (checkingPotentialConstantExpression())
CallStackNotes = 0;
HasActiveDiagnostic = true;
EvalStatus.Diag->clear();
EvalStatus.Diag->reserve(1 + ExtraNotes + CallStackNotes);
addDiag(Loc, DiagId);
if (!checkingPotentialConstantExpression())
addCallStack(Limit);
return OptionalDiagnostic(&(*EvalStatus.Diag)[0].second);
}
HasActiveDiagnostic = false;
return OptionalDiagnostic();
}
OptionalDiagnostic Diag(const Expr *E, diag::kind DiagId
= diag::note_invalid_subexpr_in_const_expr,
unsigned ExtraNotes = 0) {
if (EvalStatus.Diag)
return Diag(E->getExprLoc(), DiagId, ExtraNotes);
HasActiveDiagnostic = false;
return OptionalDiagnostic();
}
/// Diagnose that the evaluation does not produce a C++11 core constant
/// expression.
///
/// FIXME: Stop evaluating if we're in EM_ConstantExpression or
/// EM_PotentialConstantExpression mode and we produce one of these.
template<typename LocArg>
OptionalDiagnostic CCEDiag(LocArg Loc, diag::kind DiagId
= diag::note_invalid_subexpr_in_const_expr,
unsigned ExtraNotes = 0) {
// Don't override a previous diagnostic. Don't bother collecting
// diagnostics if we're evaluating for overflow.
if (!EvalStatus.Diag || !EvalStatus.Diag->empty()) {
HasActiveDiagnostic = false;
return OptionalDiagnostic();
}
return Diag(Loc, DiagId, ExtraNotes);
}
/// Add a note to a prior diagnostic.
OptionalDiagnostic Note(SourceLocation Loc, diag::kind DiagId) {
if (!HasActiveDiagnostic)
return OptionalDiagnostic();
return OptionalDiagnostic(&addDiag(Loc, DiagId));
}
/// Add a stack of notes to a prior diagnostic.
void addNotes(ArrayRef<PartialDiagnosticAt> Diags) {
if (HasActiveDiagnostic) {
EvalStatus.Diag->insert(EvalStatus.Diag->end(),
Diags.begin(), Diags.end());
}
}
/// Should we continue evaluation after encountering a side-effect that we
/// couldn't model?
bool keepEvaluatingAfterSideEffect() {
switch (EvalMode) {
case EM_PotentialConstantExpression:
case EM_EvaluateForOverflow:
case EM_IgnoreSideEffects:
return true;
case EM_ConstantExpression:
case EM_ConstantFold:
return false;
}
llvm_unreachable("Missed EvalMode case");
}
/// Note that we have had a side-effect, and determine whether we should
/// keep evaluating.
bool noteSideEffect() {
EvalStatus.HasSideEffects = true;
return keepEvaluatingAfterSideEffect();
}
/// Should we continue evaluation as much as possible after encountering a
/// construct which can't be reduced to a value?
bool keepEvaluatingAfterFailure() {
if (!StepsLeft)
return false;
switch (EvalMode) {
case EM_PotentialConstantExpression:
case EM_EvaluateForOverflow:
return true;
case EM_ConstantExpression:
case EM_ConstantFold:
case EM_IgnoreSideEffects:
return false;
}
llvm_unreachable("Missed EvalMode case");
}
};
/// Object used to treat all foldable expressions as constant expressions.
struct FoldConstant {
EvalInfo &Info;
bool Enabled;
bool HadNoPriorDiags;
EvalInfo::EvaluationMode OldMode;
explicit FoldConstant(EvalInfo &Info, bool Enabled)
: Info(Info),
Enabled(Enabled),
HadNoPriorDiags(Info.EvalStatus.Diag &&
Info.EvalStatus.Diag->empty() &&
!Info.EvalStatus.HasSideEffects),
OldMode(Info.EvalMode) {
if (Enabled && Info.EvalMode == EvalInfo::EM_ConstantExpression)
Info.EvalMode = EvalInfo::EM_ConstantFold;
}
void keepDiagnostics() { Enabled = false; }
~FoldConstant() {
if (Enabled && HadNoPriorDiags && !Info.EvalStatus.Diag->empty() &&
!Info.EvalStatus.HasSideEffects)
Info.EvalStatus.Diag->clear();
Info.EvalMode = OldMode;
}
};
/// RAII object used to suppress diagnostics and side-effects from a
/// speculative evaluation.
class SpeculativeEvaluationRAII {
EvalInfo &Info;
Expr::EvalStatus Old;
public:
SpeculativeEvaluationRAII(EvalInfo &Info,
SmallVectorImpl<PartialDiagnosticAt> *NewDiag = 0)
: Info(Info), Old(Info.EvalStatus) {
Info.EvalStatus.Diag = NewDiag;
// If we're speculatively evaluating, we may have skipped over some
// evaluations and missed out a side effect.
Info.EvalStatus.HasSideEffects = true;
}
~SpeculativeEvaluationRAII() {
Info.EvalStatus = Old;
}
};
/// RAII object wrapping a full-expression or block scope, and handling
/// the ending of the lifetime of temporaries created within it.
template<bool IsFullExpression>
class ScopeRAII {
EvalInfo &Info;
unsigned OldStackSize;
public:
ScopeRAII(EvalInfo &Info)
: Info(Info), OldStackSize(Info.CleanupStack.size()) {}
~ScopeRAII() {
// Body moved to a static method to encourage the compiler to inline away
// instances of this class.
cleanup(Info, OldStackSize);
}
private:
static void cleanup(EvalInfo &Info, unsigned OldStackSize) {
unsigned NewEnd = OldStackSize;
for (unsigned I = OldStackSize, N = Info.CleanupStack.size();
I != N; ++I) {
if (IsFullExpression && Info.CleanupStack[I].isLifetimeExtended()) {
// Full-expression cleanup of a lifetime-extended temporary: nothing
// to do, just move this cleanup to the right place in the stack.
std::swap(Info.CleanupStack[I], Info.CleanupStack[NewEnd]);
++NewEnd;
} else {
// End the lifetime of the object.
Info.CleanupStack[I].endLifetime();
}
}
Info.CleanupStack.erase(Info.CleanupStack.begin() + NewEnd,
Info.CleanupStack.end());
}
};
typedef ScopeRAII<false> BlockScopeRAII;
typedef ScopeRAII<true> FullExpressionRAII;
}
bool SubobjectDesignator::checkSubobject(EvalInfo &Info, const Expr *E,
CheckSubobjectKind CSK) {
if (Invalid)
return false;
if (isOnePastTheEnd()) {
Info.CCEDiag(E, diag::note_constexpr_past_end_subobject)
<< CSK;
setInvalid();
return false;
}
return true;
}
void SubobjectDesignator::diagnosePointerArithmetic(EvalInfo &Info,
const Expr *E, uint64_t N) {
if (MostDerivedPathLength == Entries.size() && MostDerivedArraySize)
Info.CCEDiag(E, diag::note_constexpr_array_index)
<< static_cast<int>(N) << /*array*/ 0
<< static_cast<unsigned>(MostDerivedArraySize);
else
Info.CCEDiag(E, diag::note_constexpr_array_index)
<< static_cast<int>(N) << /*non-array*/ 1;
setInvalid();
}
CallStackFrame::CallStackFrame(EvalInfo &Info, SourceLocation CallLoc,
const FunctionDecl *Callee, const LValue *This,
APValue *Arguments)
: Info(Info), Caller(Info.CurrentCall), CallLoc(CallLoc), Callee(Callee),
Index(Info.NextCallIndex++), This(This), Arguments(Arguments) {
Info.CurrentCall = this;
++Info.CallStackDepth;
}
CallStackFrame::~CallStackFrame() {
assert(Info.CurrentCall == this && "calls retired out of order");
--Info.CallStackDepth;
Info.CurrentCall = Caller;
}
APValue &CallStackFrame::createTemporary(const void *Key,
bool IsLifetimeExtended) {
APValue &Result = Temporaries[Key];
assert(Result.isUninit() && "temporary created multiple times");
Info.CleanupStack.push_back(Cleanup(&Result, IsLifetimeExtended));
return Result;
}
static void describeCall(CallStackFrame *Frame, raw_ostream &Out);
void EvalInfo::addCallStack(unsigned Limit) {
// Determine which calls to skip, if any.
unsigned ActiveCalls = CallStackDepth - 1;
unsigned SkipStart = ActiveCalls, SkipEnd = SkipStart;
if (Limit && Limit < ActiveCalls) {
SkipStart = Limit / 2 + Limit % 2;
SkipEnd = ActiveCalls - Limit / 2;
}
// Walk the call stack and add the diagnostics.
unsigned CallIdx = 0;
for (CallStackFrame *Frame = CurrentCall; Frame != &BottomFrame;
Frame = Frame->Caller, ++CallIdx) {
// Skip this call?
if (CallIdx >= SkipStart && CallIdx < SkipEnd) {
if (CallIdx == SkipStart) {
// Note that we're skipping calls.
addDiag(Frame->CallLoc, diag::note_constexpr_calls_suppressed)
<< unsigned(ActiveCalls - Limit);
}
continue;
}
SmallVector<char, 128> Buffer;
llvm::raw_svector_ostream Out(Buffer);
describeCall(Frame, Out);
addDiag(Frame->CallLoc, diag::note_constexpr_call_here) << Out.str();
}
}
namespace {
struct ComplexValue {
private:
bool IsInt;
public:
APSInt IntReal, IntImag;
APFloat FloatReal, FloatImag;
ComplexValue() : FloatReal(APFloat::Bogus), FloatImag(APFloat::Bogus) {}
void makeComplexFloat() { IsInt = false; }
bool isComplexFloat() const { return !IsInt; }
APFloat &getComplexFloatReal() { return FloatReal; }
APFloat &getComplexFloatImag() { return FloatImag; }
void makeComplexInt() { IsInt = true; }
bool isComplexInt() const { return IsInt; }
APSInt &getComplexIntReal() { return IntReal; }
APSInt &getComplexIntImag() { return IntImag; }
void moveInto(APValue &v) const {
if (isComplexFloat())
v = APValue(FloatReal, FloatImag);
else
v = APValue(IntReal, IntImag);
}
void setFrom(const APValue &v) {
assert(v.isComplexFloat() || v.isComplexInt());
if (v.isComplexFloat()) {
makeComplexFloat();
FloatReal = v.getComplexFloatReal();
FloatImag = v.getComplexFloatImag();
} else {
makeComplexInt();
IntReal = v.getComplexIntReal();
IntImag = v.getComplexIntImag();
}
}
};
struct LValue {
APValue::LValueBase Base;
CharUnits Offset;
unsigned CallIndex;
SubobjectDesignator Designator;
const APValue::LValueBase getLValueBase() const { return Base; }
CharUnits &getLValueOffset() { return Offset; }
const CharUnits &getLValueOffset() const { return Offset; }
unsigned getLValueCallIndex() const { return CallIndex; }
SubobjectDesignator &getLValueDesignator() { return Designator; }
const SubobjectDesignator &getLValueDesignator() const { return Designator;}
void moveInto(APValue &V) const {
if (Designator.Invalid)
V = APValue(Base, Offset, APValue::NoLValuePath(), CallIndex);
else
V = APValue(Base, Offset, Designator.Entries,
Designator.IsOnePastTheEnd, CallIndex);
}
void setFrom(ASTContext &Ctx, const APValue &V) {
assert(V.isLValue());
Base = V.getLValueBase();
Offset = V.getLValueOffset();
CallIndex = V.getLValueCallIndex();
Designator = SubobjectDesignator(Ctx, V);
}
void set(APValue::LValueBase B, unsigned I = 0) {
Base = B;
Offset = CharUnits::Zero();
CallIndex = I;
Designator = SubobjectDesignator(getType(B));
}
// Check that this LValue is not based on a null pointer. If it is, produce
// a diagnostic and mark the designator as invalid.
bool checkNullPointer(EvalInfo &Info, const Expr *E,
CheckSubobjectKind CSK) {
if (Designator.Invalid)
return false;
if (!Base) {
Info.CCEDiag(E, diag::note_constexpr_null_subobject)
<< CSK;
Designator.setInvalid();
return false;
}
return true;
}
// Check this LValue refers to an object. If not, set the designator to be
// invalid and emit a diagnostic.
bool checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK) {
// Outside C++11, do not build a designator referring to a subobject of
// any object: we won't use such a designator for anything.
if (!Info.getLangOpts().CPlusPlus11)
Designator.setInvalid();
return checkNullPointer(Info, E, CSK) &&
Designator.checkSubobject(Info, E, CSK);
}
void addDecl(EvalInfo &Info, const Expr *E,
const Decl *D, bool Virtual = false) {
if (checkSubobject(Info, E, isa<FieldDecl>(D) ? CSK_Field : CSK_Base))
Designator.addDeclUnchecked(D, Virtual);
}
void addArray(EvalInfo &Info, const Expr *E, const ConstantArrayType *CAT) {
if (checkSubobject(Info, E, CSK_ArrayToPointer))
Designator.addArrayUnchecked(CAT);
}
void addComplex(EvalInfo &Info, const Expr *E, QualType EltTy, bool Imag) {
if (checkSubobject(Info, E, Imag ? CSK_Imag : CSK_Real))
Designator.addComplexUnchecked(EltTy, Imag);
}
void adjustIndex(EvalInfo &Info, const Expr *E, uint64_t N) {
if (checkNullPointer(Info, E, CSK_ArrayIndex))
Designator.adjustIndex(Info, E, N);
}
};
struct MemberPtr {
MemberPtr() {}
explicit MemberPtr(const ValueDecl *Decl) :
DeclAndIsDerivedMember(Decl, false), Path() {}
/// The member or (direct or indirect) field referred to by this member
/// pointer, or 0 if this is a null member pointer.
const ValueDecl *getDecl() const {
return DeclAndIsDerivedMember.getPointer();
}
/// Is this actually a member of some type derived from the relevant class?
bool isDerivedMember() const {
return DeclAndIsDerivedMember.getInt();
}
/// Get the class which the declaration actually lives in.
const CXXRecordDecl *getContainingRecord() const {
return cast<CXXRecordDecl>(
DeclAndIsDerivedMember.getPointer()->getDeclContext());
}
void moveInto(APValue &V) const {
V = APValue(getDecl(), isDerivedMember(), Path);
}
void setFrom(const APValue &V) {
assert(V.isMemberPointer());
DeclAndIsDerivedMember.setPointer(V.getMemberPointerDecl());
DeclAndIsDerivedMember.setInt(V.isMemberPointerToDerivedMember());
Path.clear();
ArrayRef<const CXXRecordDecl*> P = V.getMemberPointerPath();
Path.insert(Path.end(), P.begin(), P.end());
}
/// DeclAndIsDerivedMember - The member declaration, and a flag indicating
/// whether the member is a member of some class derived from the class type
/// of the member pointer.
llvm::PointerIntPair<const ValueDecl*, 1, bool> DeclAndIsDerivedMember;
/// Path - The path of base/derived classes from the member declaration's
/// class (exclusive) to the class type of the member pointer (inclusive).
SmallVector<const CXXRecordDecl*, 4> Path;
/// Perform a cast towards the class of the Decl (either up or down the
/// hierarchy).
bool castBack(const CXXRecordDecl *Class) {
assert(!Path.empty());
const CXXRecordDecl *Expected;
if (Path.size() >= 2)
Expected = Path[Path.size() - 2];
else
Expected = getContainingRecord();
if (Expected->getCanonicalDecl() != Class->getCanonicalDecl()) {
// C++11 [expr.static.cast]p12: In a conversion from (D::*) to (B::*),
// if B does not contain the original member and is not a base or
// derived class of the class containing the original member, the result
// of the cast is undefined.
// C++11 [conv.mem]p2 does not cover this case for a cast from (B::*) to
// (D::*). We consider that to be a language defect.
return false;
}
Path.pop_back();
return true;
}
/// Perform a base-to-derived member pointer cast.
bool castToDerived(const CXXRecordDecl *Derived) {
if (!getDecl())
return true;
if (!isDerivedMember()) {
Path.push_back(Derived);
return true;
}
if (!castBack(Derived))
return false;
if (Path.empty())
DeclAndIsDerivedMember.setInt(false);
return true;
}
/// Perform a derived-to-base member pointer cast.
bool castToBase(const CXXRecordDecl *Base) {
if (!getDecl())
return true;
if (Path.empty())
DeclAndIsDerivedMember.setInt(true);
if (isDerivedMember()) {
Path.push_back(Base);
return true;
}
return castBack(Base);
}
};
/// Compare two member pointers, which are assumed to be of the same type.
static bool operator==(const MemberPtr &LHS, const MemberPtr &RHS) {
if (!LHS.getDecl() || !RHS.getDecl())
return !LHS.getDecl() && !RHS.getDecl();
if (LHS.getDecl()->getCanonicalDecl() != RHS.getDecl()->getCanonicalDecl())
return false;
return LHS.Path == RHS.Path;
}
}
static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E);
static bool EvaluateInPlace(APValue &Result, EvalInfo &Info,
const LValue &This, const Expr *E,
bool AllowNonLiteralTypes = false);
static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info);
static bool EvaluatePointer(const Expr *E, LValue &Result, EvalInfo &Info);
static bool EvaluateMemberPointer(const Expr *E, MemberPtr &Result,
EvalInfo &Info);
static bool EvaluateTemporary(const Expr *E, LValue &Result, EvalInfo &Info);
static bool EvaluateInteger(const Expr *E, APSInt &Result, EvalInfo &Info);
static bool EvaluateIntegerOrLValue(const Expr *E, APValue &Result,
EvalInfo &Info);
static bool EvaluateFloat(const Expr *E, APFloat &Result, EvalInfo &Info);
static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info);
static bool EvaluateAtomic(const Expr *E, APValue &Result, EvalInfo &Info);
//===----------------------------------------------------------------------===//
// Misc utilities
//===----------------------------------------------------------------------===//
/// Produce a string describing the given constexpr call.
static void describeCall(CallStackFrame *Frame, raw_ostream &Out) {
unsigned ArgIndex = 0;
bool IsMemberCall = isa<CXXMethodDecl>(Frame->Callee) &&
!isa<CXXConstructorDecl>(Frame->Callee) &&
cast<CXXMethodDecl>(Frame->Callee)->isInstance();
if (!IsMemberCall)
Out << *Frame->Callee << '(';
if (Frame->This && IsMemberCall) {
APValue Val;
Frame->This->moveInto(Val);
Val.printPretty(Out, Frame->Info.Ctx,
Frame->This->Designator.MostDerivedType);
// FIXME: Add parens around Val if needed.
Out << "->" << *Frame->Callee << '(';
IsMemberCall = false;
}
for (FunctionDecl::param_const_iterator I = Frame->Callee->param_begin(),
E = Frame->Callee->param_end(); I != E; ++I, ++ArgIndex) {
if (ArgIndex > (unsigned)IsMemberCall)
Out << ", ";
const ParmVarDecl *Param = *I;
const APValue &Arg = Frame->Arguments[ArgIndex];
Arg.printPretty(Out, Frame->Info.Ctx, Param->getType());
if (ArgIndex == 0 && IsMemberCall)
Out << "->" << *Frame->Callee << '(';
}
Out << ')';
}
/// Evaluate an expression to see if it had side-effects, and discard its
/// result.
/// \return \c true if the caller should keep evaluating.
static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) {
APValue Scratch;
if (!Evaluate(Scratch, Info, E))
// We don't need the value, but we might have skipped a side effect here.
return Info.noteSideEffect();
return true;
}
/// Sign- or zero-extend a value to 64 bits. If it's already 64 bits, just
/// return its existing value.
static int64_t getExtValue(const APSInt &Value) {
return Value.isSigned() ? Value.getSExtValue()
: static_cast<int64_t>(Value.getZExtValue());
}
/// Should this call expression be treated as a string literal?
static bool IsStringLiteralCall(const CallExpr *E) {
unsigned Builtin = E->isBuiltinCall();
return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString ||
Builtin == Builtin::BI__builtin___NSStringMakeConstantString);
}
static bool IsGlobalLValue(APValue::LValueBase B) {
// C++11 [expr.const]p3 An address constant expression is a prvalue core
// constant expression of pointer type that evaluates to...
// ... a null pointer value, or a prvalue core constant expression of type
// std::nullptr_t.
if (!B) return true;
if (const ValueDecl *D = B.dyn_cast<const ValueDecl*>()) {
// ... the address of an object with static storage duration,
if (const VarDecl *VD = dyn_cast<VarDecl>(D))
return VD->hasGlobalStorage();
// ... the address of a function,
return isa<FunctionDecl>(D);
}
const Expr *E = B.get<const Expr*>();
switch (E->getStmtClass()) {
default:
return false;
case Expr::CompoundLiteralExprClass: {
const CompoundLiteralExpr *CLE = cast<CompoundLiteralExpr>(E);
return CLE->isFileScope() && CLE->isLValue();
}
case Expr::MaterializeTemporaryExprClass:
// A materialized temporary might have been lifetime-extended to static
// storage duration.
return cast<MaterializeTemporaryExpr>(E)->getStorageDuration() == SD_Static;
// A string literal has static storage duration.
case Expr::StringLiteralClass:
case Expr::PredefinedExprClass:
case Expr::ObjCStringLiteralClass:
case Expr::ObjCEncodeExprClass:
case Expr::CXXTypeidExprClass:
case Expr::CXXUuidofExprClass:
return true;
case Expr::CallExprClass:
return IsStringLiteralCall(cast<CallExpr>(E));
// For GCC compatibility, &&label has static storage duration.
case Expr::AddrLabelExprClass:
return true;
// A Block literal expression may be used as the initialization value for
// Block variables at global or local static scope.
case Expr::BlockExprClass:
return !cast<BlockExpr>(E)->getBlockDecl()->hasCaptures();
case Expr::ImplicitValueInitExprClass:
// FIXME:
// We can never form an lvalue with an implicit value initialization as its
// base through expression evaluation, so these only appear in one case: the
// implicit variable declaration we invent when checking whether a constexpr
// constructor can produce a constant expression. We must assume that such
// an expression might be a global lvalue.
return true;
}
}
static void NoteLValueLocation(EvalInfo &Info, APValue::LValueBase Base) {
assert(Base && "no location for a null lvalue");
const ValueDecl *VD = Base.dyn_cast<const ValueDecl*>();
if (VD)
Info.Note(VD->getLocation(), diag::note_declared_at);
else
Info.Note(Base.get<const Expr*>()->getExprLoc(),
diag::note_constexpr_temporary_here);
}
/// Check that this reference or pointer core constant expression is a valid
/// value for an address or reference constant expression. Return true if we
/// can fold this expression, whether or not it's a constant expression.
static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc,
QualType Type, const LValue &LVal) {
bool IsReferenceType = Type->isReferenceType();
APValue::LValueBase Base = LVal.getLValueBase();
const SubobjectDesignator &Designator = LVal.getLValueDesignator();
// Check that the object is a global. Note that the fake 'this' object we
// manufacture when checking potential constant expressions is conservatively
// assumed to be global here.
if (!IsGlobalLValue(Base)) {
if (Info.getLangOpts().CPlusPlus11) {
const ValueDecl *VD = Base.dyn_cast<const ValueDecl*>();
Info.Diag(Loc, diag::note_constexpr_non_global, 1)
<< IsReferenceType << !Designator.Entries.empty()
<< !!VD << VD;
NoteLValueLocation(Info, Base);
} else {
Info.Diag(Loc);
}
// Don't allow references to temporaries to escape.
return false;
}
assert((Info.checkingPotentialConstantExpression() ||
LVal.getLValueCallIndex() == 0) &&
"have call index for global lvalue");
// Check if this is a thread-local variable.
if (const ValueDecl *VD = Base.dyn_cast<const ValueDecl*>()) {
if (const VarDecl *Var = dyn_cast<const VarDecl>(VD)) {
if (Var->getTLSKind())
return false;
}
}
// Allow address constant expressions to be past-the-end pointers. This is
// an extension: the standard requires them to point to an object.
if (!IsReferenceType)
return true;
// A reference constant expression must refer to an object.
if (!Base) {
// FIXME: diagnostic
Info.CCEDiag(Loc);
return true;
}
// Does this refer one past the end of some object?
if (Designator.isOnePastTheEnd()) {
const ValueDecl *VD = Base.dyn_cast<const ValueDecl*>();
Info.Diag(Loc, diag::note_constexpr_past_end, 1)
<< !Designator.Entries.empty() << !!VD << VD;
NoteLValueLocation(Info, Base);
}
return true;
}
/// Check that this core constant expression is of literal type, and if not,
/// produce an appropriate diagnostic.
static bool CheckLiteralType(EvalInfo &Info, const Expr *E,
const LValue *This = 0) {
if (!E->isRValue() || E->getType()->isLiteralType(Info.Ctx))
return true;
// C++1y: A constant initializer for an object o [...] may also invoke
// constexpr constructors for o and its subobjects even if those objects
// are of non-literal class types.
if (Info.getLangOpts().CPlusPlus1y && This &&
Info.EvaluatingDecl == This->getLValueBase())
return true;
// Prvalue constant expressions must be of literal types.
if (Info.getLangOpts().CPlusPlus11)
Info.Diag(E, diag::note_constexpr_nonliteral)
<< E->getType();
else
Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}
/// Check that this core constant expression value is a valid value for a
/// constant expression. If not, report an appropriate diagnostic. Does not
/// check that the expression is of literal type.
static bool CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc,
QualType Type, const APValue &Value) {
if (Value.isUninit()) {
Info.Diag(DiagLoc, diag::note_constexpr_uninitialized)
<< true << Type;
return false;
}
// Core issue 1454: For a literal constant expression of array or class type,
// each subobject of its value shall have been initialized by a constant
// expression.
if (Value.isArray()) {
QualType EltTy = Type->castAsArrayTypeUnsafe()->getElementType();
for (unsigned I = 0, N = Value.getArrayInitializedElts(); I != N; ++I) {
if (!CheckConstantExpression(Info, DiagLoc, EltTy,
Value.getArrayInitializedElt(I)))
return false;
}
if (!Value.hasArrayFiller())
return true;
return CheckConstantExpression(Info, DiagLoc, EltTy,
Value.getArrayFiller());
}
if (Value.isUnion() && Value.getUnionField()) {
return CheckConstantExpression(Info, DiagLoc,
Value.getUnionField()->getType(),
Value.getUnionValue());
}
if (Value.isStruct()) {
RecordDecl *RD = Type->castAs<RecordType>()->getDecl();
if (const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD)) {
unsigned BaseIndex = 0;
for (CXXRecordDecl::base_class_const_iterator I = CD->bases_begin(),
End = CD->bases_end(); I != End; ++I, ++BaseIndex) {
if (!CheckConstantExpression(Info, DiagLoc, I->getType(),
Value.getStructBase(BaseIndex)))
return false;
}
}
for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end();
I != E; ++I) {
if (!CheckConstantExpression(Info, DiagLoc, I->getType(),
Value.getStructField(I->getFieldIndex())))
return false;
}
}
if (Value.isLValue()) {
LValue LVal;
LVal.setFrom(Info.Ctx, Value);
return CheckLValueConstantExpression(Info, DiagLoc, Type, LVal);
}
// Everything else is fine.
return true;
}
const ValueDecl *GetLValueBaseDecl(const LValue &LVal) {
return LVal.Base.dyn_cast<const ValueDecl*>();
}
static bool IsLiteralLValue(const LValue &Value) {
if (Value.CallIndex)
return false;
const Expr *E = Value.Base.dyn_cast<const Expr*>();
return E && !isa<MaterializeTemporaryExpr>(E);
}
static bool IsWeakLValue(const LValue &Value) {
const ValueDecl *Decl = GetLValueBaseDecl(Value);
return Decl && Decl->isWeak();
}
static bool EvalPointerValueAsBool(const APValue &Value, bool &Result) {
// A null base expression indicates a null pointer. These are always
// evaluatable, and they are false unless the offset is zero.
if (!Value.getLValueBase()) {
Result = !Value.getLValueOffset().isZero();
return true;
}
// We have a non-null base. These are generally known to be true, but if it's
// a weak declaration it can be null at runtime.
Result = true;
const ValueDecl *Decl = Value.getLValueBase().dyn_cast<const ValueDecl*>();
return !Decl || !Decl->isWeak();
}
static bool HandleConversionToBool(const APValue &Val, bool &Result) {
switch (Val.getKind()) {
case APValue::Uninitialized:
return false;
case APValue::Int:
Result = Val.getInt().getBoolValue();
return true;
case APValue::Float:
Result = !Val.getFloat().isZero();
return true;
case APValue::ComplexInt:
Result = Val.getComplexIntReal().getBoolValue() ||
Val.getComplexIntImag().getBoolValue();
return true;
case APValue::ComplexFloat:
Result = !Val.getComplexFloatReal().isZero() ||
!Val.getComplexFloatImag().isZero();
return true;
case APValue::LValue:
return EvalPointerValueAsBool(Val, Result);
case APValue::MemberPointer:
Result = Val.getMemberPointerDecl();
return true;
case APValue::Vector:
case APValue::Array:
case APValue::Struct:
case APValue::Union:
case APValue::AddrLabelDiff:
return false;
}
llvm_unreachable("unknown APValue kind");
}
static bool EvaluateAsBooleanCondition(const Expr *E, bool &Result,
EvalInfo &Info) {
assert(E->isRValue() && "missing lvalue-to-rvalue conv in bool condition");
APValue Val;
if (!Evaluate(Val, Info, E))
return false;
return HandleConversionToBool(Val, Result);
}
template<typename T>
static void HandleOverflow(EvalInfo &Info, const Expr *E,
const T &SrcValue, QualType DestType) {
Info.CCEDiag(E, diag::note_constexpr_overflow)
<< SrcValue << DestType;
}
static bool HandleFloatToIntCast(EvalInfo &Info, const Expr *E,
QualType SrcType, const APFloat &Value,
QualType DestType, APSInt &Result) {
unsigned DestWidth = Info.Ctx.getIntWidth(DestType);
// Determine whether we are converting to unsigned or signed.
bool DestSigned = DestType->isSignedIntegerOrEnumerationType();
Result = APSInt(DestWidth, !DestSigned);
bool ignored;
if (Value.convertToInteger(Result, llvm::APFloat::rmTowardZero, &ignored)
& APFloat::opInvalidOp)
HandleOverflow(Info, E, Value, DestType);
return true;
}
static bool HandleFloatToFloatCast(EvalInfo &Info, const Expr *E,
QualType SrcType, QualType DestType,
APFloat &Result) {
APFloat Value = Result;
bool ignored;
if (Result.convert(Info.Ctx.getFloatTypeSemantics(DestType),
APFloat::rmNearestTiesToEven, &ignored)
& APFloat::opOverflow)
HandleOverflow(Info, E, Value, DestType);
return true;
}
static APSInt HandleIntToIntCast(EvalInfo &Info, const Expr *E,
QualType DestType, QualType SrcType,
APSInt &Value) {
unsigned DestWidth = Info.Ctx.getIntWidth(DestType);
APSInt Result = Value;
// Figure out if this is a truncate, extend or noop cast.
// If the input is signed, do a sign extend, noop, or truncate.
Result = Result.extOrTrunc(DestWidth);
Result.setIsUnsigned(DestType->isUnsignedIntegerOrEnumerationType());
return Result;
}
static bool HandleIntToFloatCast(EvalInfo &Info, const Expr *E,
QualType SrcType, const APSInt &Value,
QualType DestType, APFloat &Result) {
Result = APFloat(Info.Ctx.getFloatTypeSemantics(DestType), 1);
if (Result.convertFromAPInt(Value, Value.isSigned(),
APFloat::rmNearestTiesToEven)
& APFloat::opOverflow)
HandleOverflow(Info, E, Value, DestType);
return true;
}
static bool truncateBitfieldValue(EvalInfo &Info, const Expr *E,
APValue &Value, const FieldDecl *FD) {
assert(FD->isBitField() && "truncateBitfieldValue on non-bitfield");
if (!Value.isInt()) {
// Trying to store a pointer-cast-to-integer into a bitfield.
// FIXME: In this case, we should provide the diagnostic for casting
// a pointer to an integer.
assert(Value.isLValue() && "integral value neither int nor lvalue?");
Info.Diag(E);
return false;
}
APSInt &Int = Value.getInt();
unsigned OldBitWidth = Int.getBitWidth();
unsigned NewBitWidth = FD->getBitWidthValue(Info.Ctx);
if (NewBitWidth < OldBitWidth)
Int = Int.trunc(NewBitWidth).extend(OldBitWidth);
return true;
}
static bool EvalAndBitcastToAPInt(EvalInfo &Info, const Expr *E,
llvm::APInt &Res) {
APValue SVal;
if (!Evaluate(SVal, Info, E))
return false;
if (SVal.isInt()) {
Res = SVal.getInt();
return true;
}
if (SVal.isFloat()) {
Res = SVal.getFloat().bitcastToAPInt();
return true;
}
if (SVal.isVector()) {
QualType VecTy = E->getType();
unsigned VecSize = Info.Ctx.getTypeSize(VecTy);
QualType EltTy = VecTy->castAs<VectorType>()->getElementType();
unsigned EltSize = Info.Ctx.getTypeSize(EltTy);
bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian();
Res = llvm::APInt::getNullValue(VecSize);
for (unsigned i = 0; i < SVal.getVectorLength(); i++) {
APValue &Elt = SVal.getVectorElt(i);
llvm::APInt EltAsInt;
if (Elt.isInt()) {
EltAsInt = Elt.getInt();
} else if (Elt.isFloat()) {
EltAsInt = Elt.getFloat().bitcastToAPInt();
} else {
// Don't try to handle vectors of anything other than int or float
// (not sure if it's possible to hit this case).
Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}
unsigned BaseEltSize = EltAsInt.getBitWidth();
if (BigEndian)
Res |= EltAsInt.zextOrTrunc(VecSize).rotr(i*EltSize+BaseEltSize);
else
Res |= EltAsInt.zextOrTrunc(VecSize).rotl(i*EltSize);
}
return true;
}
// Give up if the input isn't an int, float, or vector. For example, we
// reject "(v4i16)(intptr_t)&a".
Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}
/// Perform the given integer operation, which is known to need at most BitWidth
/// bits, and check for overflow in the original type (if that type was not an
/// unsigned type).
template<typename Operation>
static APSInt CheckedIntArithmetic(EvalInfo &Info, const Expr *E,
const APSInt &LHS, const APSInt &RHS,
unsigned BitWidth, Operation Op) {
if (LHS.isUnsigned())
return Op(LHS, RHS);
APSInt Value(Op(LHS.extend(BitWidth), RHS.extend(BitWidth)), false);
APSInt Result = Value.trunc(LHS.getBitWidth());
if (Result.extend(BitWidth) != Value) {
if (Info.checkingForOverflow())
Info.Ctx.getDiagnostics().Report(E->getExprLoc(),
diag::warn_integer_constant_overflow)
<< Result.toString(10) << E->getType();
else
HandleOverflow(Info, E, Value, E->getType());
}
return Result;
}
/// Perform the given binary integer operation.
static bool handleIntIntBinOp(EvalInfo &Info, const Expr *E, const APSInt &LHS,
BinaryOperatorKind Opcode, APSInt RHS,
APSInt &Result) {
switch (Opcode) {
default:
Info.Diag(E);
return false;
case BO_Mul:
Result = CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() * 2,
std::multiplies<APSInt>());
return true;
case BO_Add:
Result = CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() + 1,
std::plus<APSInt>());
return true;
case BO_Sub:
Result = CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() + 1,
std::minus<APSInt>());
return true;
case BO_And: Result = LHS & RHS; return true;
case BO_Xor: Result = LHS ^ RHS; return true;
case BO_Or: Result = LHS | RHS; return true;
case BO_Div:
case BO_Rem:
if (RHS == 0) {
Info.Diag(E, diag::note_expr_divide_by_zero);
return false;
}
// Check for overflow case: INT_MIN / -1 or INT_MIN % -1.
if (RHS.isNegative() && RHS.isAllOnesValue() &&
LHS.isSigned() && LHS.isMinSignedValue())
HandleOverflow(Info, E, -LHS.extend(LHS.getBitWidth() + 1), E->getType());
Result = (Opcode == BO_Rem ? LHS % RHS : LHS / RHS);
return true;
case BO_Shl: {
if (Info.getLangOpts().OpenCL)
// OpenCL 6.3j: shift values are effectively % word size of LHS.
RHS &= APSInt(llvm::APInt(RHS.getBitWidth(),
static_cast<uint64_t>(LHS.getBitWidth() - 1)),
RHS.isUnsigned());
else if (RHS.isSigned() && RHS.isNegative()) {
// During constant-folding, a negative shift is an opposite shift. Such
// a shift is not a constant expression.
Info.CCEDiag(E, diag::note_constexpr_negative_shift) << RHS;
RHS = -RHS;
goto shift_right;
}
shift_left:
// C++11 [expr.shift]p1: Shift width must be less than the bit width of
// the shifted type.
unsigned SA = (unsigned) RHS.getLimitedValue(LHS.getBitWidth()-1);
if (SA != RHS) {
Info.CCEDiag(E, diag::note_constexpr_large_shift)
<< RHS << E->getType() << LHS.getBitWidth();
} else if (LHS.isSigned()) {
// C++11 [expr.shift]p2: A signed left shift must have a non-negative
// operand, and must not overflow the corresponding unsigned type.
if (LHS.isNegative())
Info.CCEDiag(E, diag::note_constexpr_lshift_of_negative) << LHS;
else if (LHS.countLeadingZeros() < SA)
Info.CCEDiag(E, diag::note_constexpr_lshift_discards);
}
Result = LHS << SA;
return true;
}
case BO_Shr: {
if (Info.getLangOpts().OpenCL)
// OpenCL 6.3j: shift values are effectively % word size of LHS.
RHS &= APSInt(llvm::APInt(RHS.getBitWidth(),
static_cast<uint64_t>(LHS.getBitWidth() - 1)),
RHS.isUnsigned());
else if (RHS.isSigned() && RHS.isNegative()) {
// During constant-folding, a negative shift is an opposite shift. Such a
// shift is not a constant expression.
Info.CCEDiag(E, diag::note_constexpr_negative_shift) << RHS;
RHS = -RHS;
goto shift_left;
}
shift_right:
// C++11 [expr.shift]p1: Shift width must be less than the bit width of the
// shifted type.
unsigned SA = (unsigned) RHS.getLimitedValue(LHS.getBitWidth()-1);
if (SA != RHS)
Info.CCEDiag(E, diag::note_constexpr_large_shift)
<< RHS << E->getType() << LHS.getBitWidth();
Result = LHS >> SA;
return true;
}
case BO_LT: Result = LHS < RHS; return true;
case BO_GT: Result = LHS > RHS; return true;
case BO_LE: Result = LHS <= RHS; return true;
case BO_GE: Result = LHS >= RHS; return true;
case BO_EQ: Result = LHS == RHS; return true;
case BO_NE: Result = LHS != RHS; return true;
}
}
/// Perform the given binary floating-point operation, in-place, on LHS.
static bool handleFloatFloatBinOp(EvalInfo &Info, const Expr *E,
APFloat &LHS, BinaryOperatorKind Opcode,
const APFloat &RHS) {
switch (Opcode) {
default:
Info.Diag(E);
return false;
case BO_Mul:
LHS.multiply(RHS, APFloat::rmNearestTiesToEven);
break;
case BO_Add:
LHS.add(RHS, APFloat::rmNearestTiesToEven);
break;
case BO_Sub:
LHS.subtract(RHS, APFloat::rmNearestTiesToEven);
break;
case BO_Div:
LHS.divide(RHS, APFloat::rmNearestTiesToEven);
break;
}
if (LHS.isInfinity() || LHS.isNaN())
Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << LHS.isNaN();
return true;
}
/// Cast an lvalue referring to a base subobject to a derived class, by
/// truncating the lvalue's path to the given length.
static bool CastToDerivedClass(EvalInfo &Info, const Expr *E, LValue &Result,
const RecordDecl *TruncatedType,
unsigned TruncatedElements) {
SubobjectDesignator &D = Result.Designator;
// Check we actually point to a derived class object.
if (TruncatedElements == D.Entries.size())
return true;
assert(TruncatedElements >= D.MostDerivedPathLength &&
"not casting to a derived class");
if (!Result.checkSubobject(Info, E, CSK_Derived))
return false;
// Truncate the path to the subobject, and remove any derived-to-base offsets.
const RecordDecl *RD = TruncatedType;
for (unsigned I = TruncatedElements, N = D.Entries.size(); I != N; ++I) {
if (RD->isInvalidDecl()) return false;
const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD);
const CXXRecordDecl *Base = getAsBaseClass(D.Entries[I]);
if (isVirtualBaseClass(D.Entries[I]))
Result.Offset -= Layout.getVBaseClassOffset(Base);
else
Result.Offset -= Layout.getBaseClassOffset(Base);
RD = Base;
}
D.Entries.resize(TruncatedElements);
return true;
}
static bool HandleLValueDirectBase(EvalInfo &Info, const Expr *E, LValue &Obj,
const CXXRecordDecl *Derived,
const CXXRecordDecl *Base,
const ASTRecordLayout *RL = 0) {
if (!RL) {
if (Derived->isInvalidDecl()) return false;
RL = &Info.Ctx.getASTRecordLayout(Derived);
}
Obj.getLValueOffset() += RL->getBaseClassOffset(Base);
Obj.addDecl(Info, E, Base, /*Virtual*/ false);
return true;
}
static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj,
const CXXRecordDecl *DerivedDecl,
const CXXBaseSpecifier *Base) {
const CXXRecordDecl *BaseDecl = Base->getType()->getAsCXXRecordDecl();
if (!Base->isVirtual())
return HandleLValueDirectBase(Info, E, Obj, DerivedDecl, BaseDecl);
SubobjectDesignator &D = Obj.Designator;
if (D.Invalid)
return false;
// Extract most-derived object and corresponding type.
DerivedDecl = D.MostDerivedType->getAsCXXRecordDecl();
if (!CastToDerivedClass(Info, E, Obj, DerivedDecl, D.MostDerivedPathLength))
return false;
// Find the virtual base class.
if (DerivedDecl->isInvalidDecl()) return false;
const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(DerivedDecl);
Obj.getLValueOffset() += Layout.getVBaseClassOffset(BaseDecl);
Obj.addDecl(Info, E, BaseDecl, /*Virtual*/ true);
return true;
}
static bool HandleLValueBasePath(EvalInfo &Info, const CastExpr *E,
QualType Type, LValue &Result) {
for (CastExpr::path_const_iterator PathI = E->path_begin(),
PathE = E->path_end();
PathI != PathE; ++PathI) {
if (!HandleLValueBase(Info, E, Result, Type->getAsCXXRecordDecl(),
*PathI))
return false;
Type = (*PathI)->getType();
}
return true;
}
/// Update LVal to refer to the given field, which must be a member of the type
/// currently described by LVal.
static bool HandleLValueMember(EvalInfo &Info, const Expr *E, LValue &LVal,
const FieldDecl *FD,
const ASTRecordLayout *RL = 0) {
if (!RL) {
if (FD->getParent()->isInvalidDecl()) return false;
RL = &Info.Ctx.getASTRecordLayout(FD->getParent());
}
unsigned I = FD->getFieldIndex();
LVal.Offset += Info.Ctx.toCharUnitsFromBits(RL->getFieldOffset(I));
LVal.addDecl(Info, E, FD);
return true;
}
/// Update LVal to refer to the given indirect field.
static bool HandleLValueIndirectMember(EvalInfo &Info, const Expr *E,
LValue &LVal,
const IndirectFieldDecl *IFD) {
for (IndirectFieldDecl::chain_iterator C = IFD->chain_begin(),
CE = IFD->chain_end(); C != CE; ++C)
if (!HandleLValueMember(Info, E, LVal, cast<FieldDecl>(*C)))
return false;
return true;
}
/// Get the size of the given type in char units.
static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc,
QualType Type, CharUnits &Size) {
// sizeof(void), __alignof__(void), sizeof(function) = 1 as a gcc
// extension.
if (Type->isVoidType() || Type->isFunctionType()) {
Size = CharUnits::One();
return true;
}
if (!Type->isConstantSizeType()) {
// sizeof(vla) is not a constantexpr: C99 6.5.3.4p2.
// FIXME: Better diagnostic.
Info.Diag(Loc);
return false;
}
Size = Info.Ctx.getTypeSizeInChars(Type);
return true;
}
/// Update a pointer value to model pointer arithmetic.
/// \param Info - Information about the ongoing evaluation.
/// \param E - The expression being evaluated, for diagnostic purposes.
/// \param LVal - The pointer value to be updated.
/// \param EltTy - The pointee type represented by LVal.
/// \param Adjustment - The adjustment, in objects of type EltTy, to add.
static bool HandleLValueArrayAdjustment(EvalInfo &Info, const Expr *E,
LValue &LVal, QualType EltTy,
int64_t Adjustment) {
CharUnits SizeOfPointee;
if (!HandleSizeof(Info, E->getExprLoc(), EltTy, SizeOfPointee))
return false;
// Compute the new offset in the appropriate width.
LVal.Offset += Adjustment * SizeOfPointee;
LVal.adjustIndex(Info, E, Adjustment);
return true;
}
/// Update an lvalue to refer to a component of a complex number.
/// \param Info - Information about the ongoing evaluation.
/// \param LVal - The lvalue to be updated.
/// \param EltTy - The complex number's component type.
/// \param Imag - False for the real component, true for the imaginary.
static bool HandleLValueComplexElement(EvalInfo &Info, const Expr *E,
LValue &LVal, QualType EltTy,
bool Imag) {
if (Imag) {
CharUnits SizeOfComponent;
if (!HandleSizeof(Info, E->getExprLoc(), EltTy, SizeOfComponent))
return false;
LVal.Offset += SizeOfComponent;
}
LVal.addComplex(Info, E, EltTy, Imag);
return true;
}
/// Try to evaluate the initializer for a variable declaration.
///
/// \param Info Information about the ongoing evaluation.
/// \param E An expression to be used when printing diagnostics.
/// \param VD The variable whose initializer should be obtained.
/// \param Frame The frame in which the variable was created. Must be null
/// if this variable is not local to the evaluation.
/// \param Result Filled in with a pointer to the value of the variable.
static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
const VarDecl *VD, CallStackFrame *Frame,
APValue *&Result) {
// If this is a parameter to an active constexpr function call, perform
// argument substitution.
if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(VD)) {
// Assume arguments of a potential constant expression are unknown
// constant expressions.
if (Info.checkingPotentialConstantExpression())
return false;
if (!Frame || !Frame->Arguments) {
Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}
Result = &Frame->Arguments[PVD->getFunctionScopeIndex()];
return true;
}
// If this is a local variable, dig out its value.
if (Frame) {
Result = Frame->getTemporary(VD);
assert(Result && "missing value for local variable");
return true;
}
// Dig out the initializer, and use the declaration which it's attached to.
const Expr *Init = VD->getAnyInitializer(VD);
if (!Init || Init->isValueDependent()) {
// If we're checking a potential constant expression, the variable could be
// initialized later.
if (!Info.checkingPotentialConstantExpression())
Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}
// If we're currently evaluating the initializer of this declaration, use that
// in-flight value.
if (Info.EvaluatingDecl.dyn_cast<const ValueDecl*>() == VD) {
Result = Info.EvaluatingDeclValue;
return true;
}
// Never evaluate the initializer of a weak variable. We can't be sure that
// this is the definition which will be used.
if (VD->isWeak()) {
Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}
// Check that we can fold the initializer. In C++, we will have already done
// this in the cases where it matters for conformance.
SmallVector<PartialDiagnosticAt, 8> Notes;
if (!VD->evaluateValue(Notes)) {
Info.Diag(E, diag::note_constexpr_var_init_non_constant,
Notes.size() + 1) << VD;
Info.Note(VD->getLocation(), diag::note_declared_at);
Info.addNotes(Notes);
return false;
} else if (!VD->checkInitIsICE()) {
Info.CCEDiag(E, diag::note_constexpr_var_init_non_constant,
Notes.size() + 1) << VD;
Info.Note(VD->getLocation(), diag::note_declared_at);
Info.addNotes(Notes);
}
Result = VD->getEvaluatedValue();
return true;
}
static bool IsConstNonVolatile(QualType T) {
Qualifiers Quals = T.getQualifiers();
return Quals.hasConst() && !Quals.hasVolatile();
}
/// Get the base index of the given base class within an APValue representing
/// the given derived class.
static unsigned getBaseIndex(const CXXRecordDecl *Derived,
const CXXRecordDecl *Base) {
Base = Base->getCanonicalDecl();
unsigned Index = 0;
for (CXXRecordDecl::base_class_const_iterator I = Derived->bases_begin(),
E = Derived->bases_end(); I != E; ++I, ++Index) {
if (I->getType()->getAsCXXRecordDecl()->getCanonicalDecl() == Base)
return Index;
}
llvm_unreachable("base class missing from derived class's bases list");
}
/// Extract the value of a character from a string literal.
static APSInt extractStringLiteralCharacter(EvalInfo &Info, const Expr *Lit,
uint64_t Index) {
// FIXME: Support PredefinedExpr, ObjCEncodeExpr, MakeStringConstant
const StringLiteral *S = cast<StringLiteral>(Lit);
const ConstantArrayType *CAT =
Info.Ctx.getAsConstantArrayType(S->getType());
assert(CAT && "string literal isn't an array");
QualType CharType = CAT->getElementType();
assert(CharType->isIntegerType() && "unexpected character type");
APSInt Value(S->getCharByteWidth() * Info.Ctx.getCharWidth(),
CharType->isUnsignedIntegerType());
if (Index < S->getLength())
Value = S->getCodeUnit(Index);
return Value;
}
// Expand a string literal into an array of characters.
static void expandStringLiteral(EvalInfo &Info, const Expr *Lit,
APValue &Result) {
const StringLiteral *S = cast<StringLiteral>(Lit);
const ConstantArrayType *CAT =
Info.Ctx.getAsConstantArrayType(S->getType());
assert(CAT && "string literal isn't an array");
QualType CharType = CAT->getElementType();
assert(CharType->isIntegerType() && "unexpected character type");
unsigned Elts = CAT->getSize().getZExtValue();
Result = APValue(APValue::UninitArray(),
std::min(S->getLength(), Elts), Elts);
APSInt Value(S->getCharByteWidth() * Info.Ctx.getCharWidth(),
CharType->isUnsignedIntegerType());
if (Result.hasArrayFiller())
Result.getArrayFiller() = APValue(Value);
for (unsigned I = 0, N = Result.getArrayInitializedElts(); I != N; ++I) {
Value = S->getCodeUnit(I);
Result.getArrayInitializedElt(I) = APValue(Value);
}
}
// Expand an array so that it has more than Index filled elements.
static void expandArray(APValue &Array, unsigned Index) {
unsigned Size = Array.getArraySize();
assert(Index < Size);
// Always at least double the number of elements for which we store a value.
unsigned OldElts = Array.getArrayInitializedElts();
unsigned NewElts = std::max(Index+1, OldElts * 2);
NewElts = std::min(Size, std::max(NewElts, 8u));
// Copy the data across.
APValue NewValue(APValue::UninitArray(), NewElts, Size);
for (unsigned I = 0; I != OldElts; ++I)
NewValue.getArrayInitializedElt(I).swap(Array.getArrayInitializedElt(I));
for (unsigned I = OldElts; I != NewElts; ++I)
NewValue.getArrayInitializedElt(I) = Array.getArrayFiller();
if (NewValue.hasArrayFiller())
NewValue.getArrayFiller() = Array.getArrayFiller();
Array.swap(NewValue);
}
/// Kinds of access we can perform on an object, for diagnostics.
enum AccessKinds {
AK_Read,
AK_Assign,
AK_Increment,
AK_Decrement
};
/// A handle to a complete object (an object that is not a subobject of
/// another object).
struct CompleteObject {
/// The value of the complete object.
APValue *Value;
/// The type of the complete object.
QualType Type;
CompleteObject() : Value(0) {}
CompleteObject(APValue *Value, QualType Type)
: Value(Value), Type(Type) {
assert(Value && "missing value for complete object");
}
LLVM_EXPLICIT operator bool() const { return Value; }
};
/// Find the designated sub-object of an rvalue.
template<typename SubobjectHandler>
typename SubobjectHandler::result_type
findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
const SubobjectDesignator &Sub, SubobjectHandler &handler) {
if (Sub.Invalid)
// A diagnostic will have already been produced.
return handler.failed();
if (Sub.isOnePastTheEnd()) {
if (Info.getLangOpts().CPlusPlus11)
Info.Diag(E, diag::note_constexpr_access_past_end)
<< handler.AccessKind;
else
Info.Diag(E);
return handler.failed();
}
APValue *O = Obj.Value;
QualType ObjType = Obj.Type;
const FieldDecl *LastField = 0;
// Walk the designator's path to find the subobject.
for (unsigned I = 0, N = Sub.Entries.size(); /**/; ++I) {
if (O->isUninit()) {
if (!Info.checkingPotentialConstantExpression())
Info.Diag(E, diag::note_constexpr_access_uninit) << handler.AccessKind;
return handler.failed();
}
if (I == N) {
if (!handler.found(*O, ObjType))
return false;
// If we modified a bit-field, truncate it to the right width.
if (handler.AccessKind != AK_Read &&
LastField && LastField->isBitField() &&
!truncateBitfieldValue(Info, E, *O, LastField))
return false;
return true;
}
LastField = 0;
if (ObjType->isArrayType()) {
// Next subobject is an array element.
const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(ObjType);
assert(CAT && "vla in literal type?");
uint64_t Index = Sub.Entries[I].ArrayIndex;
if (CAT->getSize().ule(Index)) {
// Note, it should not be possible to form a pointer with a valid
// designator which points more than one past the end of the array.
if (Info.getLangOpts().CPlusPlus11)
Info.Diag(E, diag::note_constexpr_access_past_end)
<< handler.AccessKind;
else
Info.Diag(E);
return handler.failed();
}
ObjType = CAT->getElementType();
// An array object is represented as either an Array APValue or as an
// LValue which refers to a string literal.
if (O->isLValue()) {
assert(I == N - 1 && "extracting subobject of character?");
assert(!O->hasLValuePath() || O->getLValuePath().empty());
if (handler.AccessKind != AK_Read)
expandStringLiteral(Info, O->getLValueBase().get<const Expr *>(),
*O);
else
return handler.foundString(*O, ObjType, Index);
}
if (O->getArrayInitializedElts() > Index)
O = &O->getArrayInitializedElt(Index);
else if (handler.AccessKind != AK_Read) {
expandArray(*O, Index);
O = &O->getArrayInitializedElt(Index);
} else
O = &O->getArrayFiller();
} else if (ObjType->isAnyComplexType()) {
// Next subobject is a complex number.
uint64_t Index = Sub.Entries[I].ArrayIndex;
if (Index > 1) {
if (Info.getLangOpts().CPlusPlus11)
Info.Diag(E, diag::note_constexpr_access_past_end)
<< handler.AccessKind;
else
Info.Diag(E);
return handler.failed();
}
bool WasConstQualified = ObjType.isConstQualified();
ObjType = ObjType->castAs<ComplexType>()->getElementType();
if (WasConstQualified)
ObjType.addConst();
assert(I == N - 1 && "extracting subobject of scalar?");
if (O->isComplexInt()) {
return handler.found(Index ? O->getComplexIntImag()
: O->getComplexIntReal(), ObjType);
} else {
assert(O->isComplexFloat());
return handler.found(Index ? O->getComplexFloatImag()
: O->getComplexFloatReal(), ObjType);
}
} else if (const FieldDecl *Field = getAsField(Sub.Entries[I])) {
if (Field->isMutable() && handler.AccessKind == AK_Read) {
Info.Diag(E, diag::note_constexpr_ltor_mutable, 1)
<< Field;
Info.Note(Field->getLocation(), diag::note_declared_at);
return handler.failed();
}
// Next subobject is a class, struct or union field.
RecordDecl *RD = ObjType->castAs<RecordType>()->getDecl();
if (RD->isUnion()) {
const FieldDecl *UnionField = O->getUnionField();
if (!UnionField ||
UnionField->getCanonicalDecl() != Field->getCanonicalDecl()) {
Info.Diag(E, diag::note_constexpr_access_inactive_union_member)
<< handler.AccessKind << Field << !UnionField << UnionField;
return handler.failed();
}
O = &O->getUnionValue();
} else
O = &O->getStructField(Field->getFieldIndex());
bool WasConstQualified = ObjType.isConstQualified();
ObjType = Field->getType();
if (WasConstQualified && !Field->isMutable())
ObjType.addConst();
if (ObjType.isVolatileQualified()) {
if (Info.getLangOpts().CPlusPlus) {
// FIXME: Include a description of the path to the volatile subobject.
Info.Diag(E, diag::note_constexpr_access_volatile_obj, 1)
<< handler.AccessKind << 2 << Field;
Info.Note(Field->getLocation(), diag::note_declared_at);
} else {
Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
}
return handler.failed();
}
LastField = Field;
} else {
// Next subobject is a base class.
const CXXRecordDecl *Derived = ObjType->getAsCXXRecordDecl();
const CXXRecordDecl *Base = getAsBaseClass(Sub.Entries[I]);
O = &O->getStructBase(getBaseIndex(Derived, Base));
bool WasConstQualified = ObjType.isConstQualified();
ObjType = Info.Ctx.getRecordType(Base);
if (WasConstQualified)
ObjType.addConst();
}
}
}
namespace {
struct ExtractSubobjectHandler {
EvalInfo &Info;
APValue &Result;
static const AccessKinds AccessKind = AK_Read;
typedef bool result_type;
bool failed() { return false; }
bool found(APValue &Subobj, QualType SubobjType) {
Result = Subobj;
return true;
}
bool found(APSInt &Value, QualType SubobjType) {
Result = APValue(Value);
return true;
}
bool found(APFloat &Value, QualType SubobjType) {
Result = APValue(Value);
return true;
}
bool foundString(APValue &Subobj, QualType SubobjType, uint64_t Character) {
Result = APValue(extractStringLiteralCharacter(
Info, Subobj.getLValueBase().get<const Expr *>(), Character));
return true;
}
};
} // end anonymous namespace
const AccessKinds ExtractSubobjectHandler::AccessKind;
/// Extract the designated sub-object of an rvalue.
static bool extractSubobject(EvalInfo &Info, const Expr *E,
const CompleteObject &Obj,
const SubobjectDesignator &Sub,
APValue &Result) {
ExtractSubobjectHandler Handler = { Info, Result };
return findSubobject(Info, E, Obj, Sub, Handler);
}
namespace {
struct ModifySubobjectHandler {
EvalInfo &Info;
APValue &NewVal;
const Expr *E;
typedef bool result_type;
static const AccessKinds AccessKind = AK_Assign;
bool checkConst(QualType QT) {
// Assigning to a const object has undefined behavior.
if (QT.isConstQualified()) {
Info.Diag(E, diag::note_constexpr_modify_const_type) << QT;
return false;
}
return true;
}
bool failed() { return false; }
bool found(APValue &Subobj, QualType SubobjType) {
if (!checkConst(SubobjType))
return false;
// We've been given ownership of NewVal, so just swap it in.
Subobj.swap(NewVal);
return true;
}
bool found(APSInt &Value, QualType SubobjType) {
if (!checkConst(SubobjType))
return false;
if (!NewVal.isInt()) {
// Maybe trying to write a cast pointer value into a complex?
Info.Diag(E);
return false;
}
Value = NewVal.getInt();
return true;
}
bool found(APFloat &Value, QualType SubobjType) {
if (!checkConst(SubobjType))
return false;
Value = NewVal.getFloat();
return true;
}
bool foundString(APValue &Subobj, QualType SubobjType, uint64_t Character) {
llvm_unreachable("shouldn't encounter string elements with ExpandArrays");
}
};
} // end anonymous namespace
const AccessKinds ModifySubobjectHandler::AccessKind;
/// Update the designated sub-object of an rvalue to the given value.
static bool modifySubobject(EvalInfo &Info, const Expr *E,
const CompleteObject &Obj,
const SubobjectDesignator &Sub,
APValue &NewVal) {
ModifySubobjectHandler Handler = { Info, NewVal, E };
return findSubobject(Info, E, Obj, Sub, Handler);
}
/// Find the position where two subobject designators diverge, or equivalently
/// the length of the common initial subsequence.
static unsigned FindDesignatorMismatch(QualType ObjType,
const SubobjectDesignator &A,
const SubobjectDesignator &B,
bool &WasArrayIndex) {
unsigned I = 0, N = std::min(A.Entries.size(), B.Entries.size());
for (/**/; I != N; ++I) {
if (!ObjType.isNull() &&
(ObjType->isArrayType() || ObjType->isAnyComplexType())) {
// Next subobject is an array element.
if (A.Entries[I].ArrayIndex != B.Entries[I].ArrayIndex) {
WasArrayIndex = true;
return I;
}
if (ObjType->isAnyComplexType())
ObjType = ObjType->castAs<ComplexType>()->getElementType();
else
ObjType = ObjType->castAsArrayTypeUnsafe()->getElementType();
} else {
if (A.Entries[I].BaseOrMember != B.Entries[I].BaseOrMember) {
WasArrayIndex = false;
return I;
}
if (const FieldDecl *FD = getAsField(A.Entries[I]))
// Next subobject is a field.
ObjType = FD->getType();
else
// Next subobject is a base class.
ObjType = QualType();
}
}
WasArrayIndex = false;
return I;
}
/// Determine whether the given subobject designators refer to elements of the
/// same array object.
static bool AreElementsOfSameArray(QualType ObjType,
const SubobjectDesignator &A,
const SubobjectDesignator &B) {
if (A.Entries.size() != B.Entries.size())
return false;
bool IsArray = A.MostDerivedArraySize != 0;
if (IsArray && A.MostDerivedPathLength != A.Entries.size())
// A is a subobject of the array element.
return false;
// If A (and B) designates an array element, the last entry will be the array
// index. That doesn't have to match. Otherwise, we're in the 'implicit array
// of length 1' case, and the entire path must match.
bool WasArrayIndex;
unsigned CommonLength = FindDesignatorMismatch(ObjType, A, B, WasArrayIndex);
return CommonLength >= A.Entries.size() - IsArray;
}
/// Find the complete object to which an LValue refers.
CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E, AccessKinds AK,
const LValue &LVal, QualType LValType) {
if (!LVal.Base) {
Info.Diag(E, diag::note_constexpr_access_null) << AK;
return CompleteObject();
}
CallStackFrame *Frame = 0;
if (LVal.CallIndex) {
Frame = Info.getCallFrame(LVal.CallIndex);
if (!Frame) {
Info.Diag(E, diag::note_constexpr_lifetime_ended, 1)
<< AK << LVal.Base.is<const ValueDecl*>();
NoteLValueLocation(Info, LVal.Base);
return CompleteObject();
}
}
// C++11 DR1311: An lvalue-to-rvalue conversion on a volatile-qualified type
// is not a constant expression (even if the object is non-volatile). We also
// apply this rule to C++98, in order to conform to the expected 'volatile'
// semantics.
if (LValType.isVolatileQualified()) {
if (Info.getLangOpts().CPlusPlus)
Info.Diag(E, diag::note_constexpr_access_volatile_type)
<< AK << LValType;
else
Info.Diag(E);
return CompleteObject();
}
// Compute value storage location and type of base object.
APValue *BaseVal = 0;
QualType BaseType = getType(LVal.Base);
if (const ValueDecl *D = LVal.Base.dyn_cast<const ValueDecl*>()) {
// In C++98, const, non-volatile integers initialized with ICEs are ICEs.
// In C++11, constexpr, non-volatile variables initialized with constant
// expressions are constant expressions too. Inside constexpr functions,
// parameters are constant expressions even if they're non-const.
// In C++1y, objects local to a constant expression (those with a Frame) are
// both readable and writable inside constant expressions.
// In C, such things can also be folded, although they are not ICEs.
const VarDecl *VD = dyn_cast<VarDecl>(D);
if (VD) {
if (const VarDecl *VDef = VD->getDefinition(Info.Ctx))
VD = VDef;
}
if (!VD || VD->isInvalidDecl()) {
Info.Diag(E);
return CompleteObject();
}
// Accesses of volatile-qualified objects are not allowed.
if (BaseType.isVolatileQualified()) {
if (Info.getLangOpts().CPlusPlus) {
Info.Diag(E, diag::note_constexpr_access_volatile_obj, 1)
<< AK << 1 << VD;
Info.Note(VD->getLocation(), diag::note_declared_at);
} else {
Info.Diag(E);
}
return CompleteObject();
}
// Unless we're looking at a local variable or argument in a constexpr call,
// the variable we're reading must be const.
if (!Frame) {
if (Info.getLangOpts().CPlusPlus1y &&
VD == Info.EvaluatingDecl.dyn_cast<const ValueDecl *>()) {
// OK, we can read and modify an object if we're in the process of
// evaluating its initializer, because its lifetime began in this
// evaluation.
} else if (AK != AK_Read) {
// All the remaining cases only permit reading.
Info.Diag(E, diag::note_constexpr_modify_global);
return CompleteObject();
} else if (VD->isConstexpr()) {
// OK, we can read this variable.
} else if (BaseType->isIntegralOrEnumerationType()) {
if (!BaseType.isConstQualified()) {
if (Info.getLangOpts().CPlusPlus) {
Info.Diag(E, diag::note_constexpr_ltor_non_const_int, 1) << VD;
Info.Note(VD->getLocation(), diag::note_declared_at);
} else {
Info.Diag(E);
}
return CompleteObject();
}
} else if (BaseType->isFloatingType() && BaseType.isConstQualified()) {
// We support folding of const floating-point types, in order to make
// static const data members of such types (supported as an extension)
// more useful.
if (Info.getLangOpts().CPlusPlus11) {
Info.CCEDiag(E, diag::note_constexpr_ltor_non_constexpr, 1) << VD;
Info.Note(VD->getLocation(), diag::note_declared_at);
} else {
Info.CCEDiag(E);
}
} else {
// FIXME: Allow folding of values of any literal type in all languages.
if (Info.getLangOpts().CPlusPlus11) {
Info.Diag(E, diag::note_constexpr_ltor_non_constexpr, 1) << VD;
Info.Note(VD->getLocation(), diag::note_declared_at);
} else {
Info.Diag(E);
}
return CompleteObject();
}
}
if (!evaluateVarDeclInit(Info, E, VD, Frame, BaseVal))
return CompleteObject();
} else {
const Expr *Base = LVal.Base.dyn_cast<const Expr*>();
if (!Frame) {
if (const MaterializeTemporaryExpr *MTE =
dyn_cast<MaterializeTemporaryExpr>(Base)) {
assert(MTE->getStorageDuration() == SD_Static &&
"should have a frame for a non-global materialized temporary");
// Per C++1y [expr.const]p2:
// an lvalue-to-rvalue conversion [is not allowed unless it applies to]
// - a [...] glvalue of integral or enumeration type that refers to
// a non-volatile const object [...]
// [...]
// - a [...] glvalue of literal type that refers to a non-volatile
// object whose lifetime began within the evaluation of e.
//
// C++11 misses the 'began within the evaluation of e' check and
// instead allows all temporaries, including things like:
// int &&r = 1;
// int x = ++r;
// constexpr int k = r;
// Therefore we use the C++1y rules in C++11 too.
const ValueDecl *VD = Info.EvaluatingDecl.dyn_cast<const ValueDecl*>();
const ValueDecl *ED = MTE->getExtendingDecl();
if (!(BaseType.isConstQualified() &&
BaseType->isIntegralOrEnumerationType()) &&
!(VD && VD->getCanonicalDecl() == ED->getCanonicalDecl())) {
Info.Diag(E, diag::note_constexpr_access_static_temporary, 1) << AK;
Info.Note(MTE->getExprLoc(), diag::note_constexpr_temporary_here);
return CompleteObject();
}
BaseVal = Info.Ctx.getMaterializedTemporaryValue(MTE, false);
assert(BaseVal && "got reference to unevaluated temporary");
} else {
Info.Diag(E);
return CompleteObject();
}
} else {
BaseVal = Frame->getTemporary(Base);
assert(BaseVal && "missing value for temporary");
}
// Volatile temporary objects cannot be accessed in constant expressions.
if (BaseType.isVolatileQualified()) {
if (Info.getLangOpts().CPlusPlus) {
Info.Diag(E, diag::note_constexpr_access_volatile_obj, 1)
<< AK << 0;
Info.Note(Base->getExprLoc(), diag::note_constexpr_temporary_here);
} else {
Info.Diag(E);
}
return CompleteObject();
}
}
// During the construction of an object, it is not yet 'const'.
// FIXME: We don't set up EvaluatingDecl for local variables or temporaries,
// and this doesn't do quite the right thing for const subobjects of the
// object under construction.
if (LVal.getLValueBase() == Info.EvaluatingDecl) {
BaseType = Info.Ctx.getCanonicalType(BaseType);
BaseType.removeLocalConst();
}
// In C++1y, we can't safely access any mutable state when we might be
// evaluating after an unmodeled side effect or an evaluation failure.
//
// FIXME: Not all local state is mutable. Allow local constant subobjects
// to be read here (but take care with 'mutable' fields).
if (Frame && Info.getLangOpts().CPlusPlus1y &&
(Info.EvalStatus.HasSideEffects || Info.keepEvaluatingAfterFailure()))
return CompleteObject();
return CompleteObject(BaseVal, BaseType);
}
/// \brief Perform an lvalue-to-rvalue conversion on the given glvalue. This
/// can also be used for 'lvalue-to-lvalue' conversions for looking up the
/// glvalue referred to by an entity of reference type.
///
/// \param Info - Information about the ongoing evaluation.
/// \param Conv - The expression for which we are performing the conversion.
/// Used for diagnostics.
/// \param Type - The type of the glvalue (before stripping cv-qualifiers in the
/// case of a non-class type).
/// \param LVal - The glvalue on which we are attempting to perform this action.
/// \param RVal - The produced value will be placed here.
static bool handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv,
QualType Type,
const LValue &LVal, APValue &RVal) {
if (LVal.Designator.Invalid)
return false;
// Check for special cases where there is no existing APValue to look at.
const Expr *Base = LVal.Base.dyn_cast<const Expr*>();
if (!LVal.Designator.Invalid && Base && !LVal.CallIndex &&
!Type.isVolatileQualified()) {
if (const CompoundLiteralExpr *CLE = dyn_cast<CompoundLiteralExpr>(Base)) {
// In C99, a CompoundLiteralExpr is an lvalue, and we defer evaluating the
// initializer until now for such expressions. Such an expression can't be
// an ICE in C, so this only matters for fold.
assert(!Info.getLangOpts().CPlusPlus && "lvalue compound literal in c++?");
if (Type.isVolatileQualified()) {
Info.Diag(Conv);
return false;
}
APValue Lit;
if (!Evaluate(Lit, Info, CLE->getInitializer()))
return false;
CompleteObject LitObj(&Lit, Base->getType());
return extractSubobject(Info, Conv, LitObj, LVal.Designator, RVal);
} else if (isa<StringLiteral>(Base)) {
// We represent a string literal array as an lvalue pointing at the
// corresponding expression, rather than building an array of chars.
// FIXME: Support PredefinedExpr, ObjCEncodeExpr, MakeStringConstant
APValue Str(Base, CharUnits::Zero(), APValue::NoLValuePath(), 0);
CompleteObject StrObj(&Str, Base->getType());
return extractSubobject(Info, Conv, StrObj, LVal.Designator, RVal);
}
}
CompleteObject Obj = findCompleteObject(Info, Conv, AK_Read, LVal, Type);
return Obj && extractSubobject(Info, Conv, Obj, LVal.Designator, RVal);
}
/// Perform an assignment of Val to LVal. Takes ownership of Val.
static bool handleAssignment(EvalInfo &Info, const Expr *E, const LValue &LVal,
QualType LValType, APValue &Val) {
if (LVal.Designator.Invalid)
return false;
if (!Info.getLangOpts().CPlusPlus1y) {
Info.Diag(E);
return false;
}
CompleteObject Obj = findCompleteObject(Info, E, AK_Assign, LVal, LValType);
return Obj && modifySubobject(Info, E, Obj, LVal.Designator, Val);
}
static bool isOverflowingIntegerType(ASTContext &Ctx, QualType T) {
return T->isSignedIntegerType() &&
Ctx.getIntWidth(T) >= Ctx.getIntWidth(Ctx.IntTy);
}
namespace {
struct CompoundAssignSubobjectHandler {
EvalInfo &Info;
const Expr *E;
QualType PromotedLHSType;
BinaryOperatorKind Opcode;
const APValue &RHS;
static const AccessKinds AccessKind = AK_Assign;
typedef bool result_type;
bool checkConst(QualType QT) {
// Assigning to a const object has undefined behavior.
if (QT.isConstQualified()) {
Info.Diag(E, diag::note_constexpr_modify_const_type) << QT;
return false;
}
return true;
}
bool failed() { return false; }
bool found(APValue &Subobj, QualType SubobjType) {
switch (Subobj.getKind()) {
case APValue::Int:
return found(Subobj.getInt(), SubobjType);
case APValue::Float:
return found(Subobj.getFloat(), SubobjType);
case APValue::ComplexInt:
case APValue::ComplexFloat:
// FIXME: Implement complex compound assignment.
Info.Diag(E);
return false;
case APValue::LValue:
return foundPointer(Subobj, SubobjType);
default:
// FIXME: can this happen?
Info.Diag(E);
return false;
}
}
bool found(APSInt &Value, QualType SubobjType) {
if (!checkConst(SubobjType))
return false;
if (!SubobjType->isIntegerType() || !RHS.isInt()) {
// We don't support compound assignment on integer-cast-to-pointer
// values.
Info.Diag(E);
return false;
}
APSInt LHS = HandleIntToIntCast(Info, E, PromotedLHSType,
SubobjType, Value);
if (!handleIntIntBinOp(Info, E, LHS, Opcode, RHS.getInt(), LHS))
return false;
Value = HandleIntToIntCast(Info, E, SubobjType, PromotedLHSType, LHS);
return true;
}
bool found(APFloat &Value, QualType SubobjType) {
return checkConst(SubobjType) &&
HandleFloatToFloatCast(Info, E, SubobjType, PromotedLHSType,
Value) &&
handleFloatFloatBinOp(Info, E, Value, Opcode, RHS.getFloat()) &&
HandleFloatToFloatCast(Info, E, PromotedLHSType, SubobjType, Value);
}
bool foundPointer(APValue &Subobj, QualType SubobjType) {
if (!checkConst(SubobjType))
return false;
QualType PointeeType;
if (const PointerType *PT = SubobjType->getAs<PointerType>())
PointeeType = PT->getPointeeType();
if (PointeeType.isNull() || !RHS.isInt() ||
(Opcode != BO_Add && Opcode != BO_Sub)) {
Info.Diag(E);
return false;
}
int64_t Offset = getExtValue(RHS.getInt());
if (Opcode == BO_Sub)
Offset = -Offset;
LValue LVal;
LVal.setFrom(Info.Ctx, Subobj);
if (!HandleLValueArrayAdjustment(Info, E, LVal, PointeeType, Offset))
return false;
LVal.moveInto(Subobj);
return true;
}
bool foundString(APValue &Subobj, QualType SubobjType, uint64_t Character) {
llvm_unreachable("shouldn't encounter string elements here");
}
};
} // end anonymous namespace
const AccessKinds CompoundAssignSubobjectHandler::AccessKind;
/// Perform a compound assignment of LVal <op>= RVal.
static bool handleCompoundAssignment(
EvalInfo &Info, const Expr *E,
const LValue &LVal, QualType LValType, QualType PromotedLValType,
BinaryOperatorKind Opcode, const APValue &RVal) {
if (LVal.Designator.Invalid)
return false;
if (!Info.getLangOpts().CPlusPlus1y) {
Info.Diag(E);
return false;
}
CompleteObject Obj = findCompleteObject(Info, E, AK_Assign, LVal, LValType);
CompoundAssignSubobjectHandler Handler = { Info, E, PromotedLValType, Opcode,
RVal };
return Obj && findSubobject(Info, E, Obj, LVal.Designator, Handler);
}
namespace {
struct IncDecSubobjectHandler {
EvalInfo &Info;
const Expr *E;
AccessKinds AccessKind;
APValue *Old;
typedef bool result_type;
bool checkConst(QualType QT) {
// Assigning to a const object has undefined behavior.
if (QT.isConstQualified()) {
Info.Diag(E, diag::note_constexpr_modify_const_type) << QT;
return false;
}
return true;
}
bool failed() { return false; }
bool found(APValue &Subobj, QualType SubobjType) {
// Stash the old value. Also clear Old, so we don't clobber it later
// if we're post-incrementing a complex.
if (Old) {
*Old = Subobj;
Old = 0;
}
switch (Subobj.getKind()) {
case APValue::Int:
return found(Subobj.getInt(), SubobjType);
case APValue::Float:
return found(Subobj.getFloat(), SubobjType);
case APValue::ComplexInt:
return found(Subobj.getComplexIntReal(),
SubobjType->castAs<ComplexType>()->getElementType()
.withCVRQualifiers(SubobjType.getCVRQualifiers()));
case APValue::ComplexFloat:
return found(Subobj.getComplexFloatReal(),
SubobjType->castAs<ComplexType>()->getElementType()
.withCVRQualifiers(SubobjType.getCVRQualifiers()));
case APValue::LValue:
return foundPointer(Subobj, SubobjType);
default:
// FIXME: can this happen?
Info.Diag(E);
return false;
}
}
bool found(APSInt &Value, QualType SubobjType) {
if (!checkConst(SubobjType))
return false;
if (!SubobjType->isIntegerType()) {
// We don't support increment / decrement on integer-cast-to-pointer
// values.
Info.Diag(E);
return false;
}
if (Old) *Old = APValue(Value);
// bool arithmetic promotes to int, and the conversion back to bool
// doesn't reduce mod 2^n, so special-case it.
if (SubobjType->isBooleanType()) {
if (AccessKind == AK_Increment)
Value = 1;
else
Value = !Value;
return true;
}
bool WasNegative = Value.isNegative();
if (AccessKind == AK_Increment) {
++Value;
if (!WasNegative && Value.isNegative() &&
isOverflowingIntegerType(Info.Ctx, SubobjType)) {
APSInt ActualValue(Value, /*IsUnsigned*/true);
HandleOverflow(Info, E, ActualValue, SubobjType);
}
} else {
--Value;
if (WasNegative && !Value.isNegative() &&
isOverflowingIntegerType(Info.Ctx, SubobjType)) {
unsigned BitWidth = Value.getBitWidth();
APSInt ActualValue(Value.sext(BitWidth + 1), /*IsUnsigned*/false);
ActualValue.setBit(BitWidth);
HandleOverflow(Info, E, ActualValue, SubobjType);
}
}
return true;
}
bool found(APFloat &Value, QualType SubobjType) {
if (!checkConst(SubobjType))
return false;
if (Old) *Old = APValue(Value);
APFloat One(Value.getSemantics(), 1);
if (AccessKind == AK_Increment)
Value.add(One, APFloat::rmNearestTiesToEven);
else
Value.subtract(One, APFloat::rmNearestTiesToEven);
return true;
}
bool foundPointer(APValue &Subobj, QualType SubobjType) {
if (!checkConst(SubobjType))
return false;
QualType PointeeType;
if (const PointerType *PT = SubobjType->getAs<PointerType>())
PointeeType = PT->getPointeeType();
else {
Info.Diag(E);
return false;
}
LValue LVal;
LVal.setFrom(Info.Ctx, Subobj);
if (!HandleLValueArrayAdjustment(Info, E, LVal, PointeeType,
AccessKind == AK_Increment ? 1 : -1))
return false;
LVal.moveInto(Subobj);
return true;
}
bool foundString(APValue &Subobj, QualType SubobjType, uint64_t Character) {
llvm_unreachable("shouldn't encounter string elements here");
}
};
} // end anonymous namespace
/// Perform an increment or decrement on LVal.
static bool handleIncDec(EvalInfo &Info, const Expr *E, const LValue &LVal,
QualType LValType, bool IsIncrement, APValue *Old) {
if (LVal.Designator.Invalid)
return false;
if (!Info.getLangOpts().CPlusPlus1y) {
Info.Diag(E);
return false;
}
AccessKinds AK = IsIncrement ? AK_Increment : AK_Decrement;
CompleteObject Obj = findCompleteObject(Info, E, AK, LVal, LValType);
IncDecSubobjectHandler Handler = { Info, E, AK, Old };
return Obj && findSubobject(Info, E, Obj, LVal.Designator, Handler);
}
/// Build an lvalue for the object argument of a member function call.
static bool EvaluateObjectArgument(EvalInfo &Info, const Expr *Object,
LValue &This) {
if (Object->getType()->isPointerType())
return EvaluatePointer(Object, This, Info);
if (Object->isGLValue())
return EvaluateLValue(Object, This, Info);
if (Object->getType()->isLiteralType(Info.Ctx))
return EvaluateTemporary(Object, This, Info);
return false;
}
/// HandleMemberPointerAccess - Evaluate a member access operation and build an
/// lvalue referring to the result.
///
/// \param Info - Information about the ongoing evaluation.
/// \param LV - An lvalue referring to the base of the member pointer.
/// \param RHS - The member pointer expression.
/// \param IncludeMember - Specifies whether the member itself is included in
/// the resulting LValue subobject designator. This is not possible when
/// creating a bound member function.
/// \return The field or method declaration to which the member pointer refers,
/// or 0 if evaluation fails.
static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info,
QualType LVType,
LValue &LV,
const Expr *RHS,
bool IncludeMember = true) {
MemberPtr MemPtr;
if (!EvaluateMemberPointer(RHS, MemPtr, Info))
return 0;
// C++11 [expr.mptr.oper]p6: If the second operand is the null pointer to
// member value, the behavior is undefined.
if (!MemPtr.getDecl()) {
// FIXME: Specific diagnostic.
Info.Diag(RHS);
return 0;
}
if (MemPtr.isDerivedMember()) {
// This is a member of some derived class. Truncate LV appropriately.
// The end of the derived-to-base path for the base object must match the
// derived-to-base path for the member pointer.
if (LV.Designator.MostDerivedPathLength + MemPtr.Path.size() >
LV.Designator.Entries.size()) {
Info.Diag(RHS);
return 0;
}
unsigned PathLengthToMember =
LV.Designator.Entries.size() - MemPtr.Path.size();
for (unsigned I = 0, N = MemPtr.Path.size(); I != N; ++I) {
const CXXRecordDecl *LVDecl = getAsBaseClass(
LV.Designator.Entries[PathLengthToMember + I]);
const CXXRecordDecl *MPDecl = MemPtr.Path[I];
if (LVDecl->getCanonicalDecl() != MPDecl->getCanonicalDecl()) {
Info.Diag(RHS);
return 0;
}
}
// Truncate the lvalue to the appropriate derived class.
if (!CastToDerivedClass(Info, RHS, LV, MemPtr.getContainingRecord(),
PathLengthToMember))
return 0;
} else if (!MemPtr.Path.empty()) {
// Extend the LValue path with the member pointer's path.
LV.Designator.Entries.reserve(LV.Designator.Entries.size() +
MemPtr.Path.size() + IncludeMember);
// Walk down to the appropriate base class.
if (const PointerType *PT = LVType->getAs<PointerType>())
LVType = PT->getPointeeType();
const CXXRecordDecl *RD = LVType->getAsCXXRecordDecl();
assert(RD && "member pointer access on non-class-type expression");
// The first class in the path is that of the lvalue.
for (unsigned I = 1, N = MemPtr.Path.size(); I != N; ++I) {
const CXXRecordDecl *Base = MemPtr.Path[N - I - 1];
if (!HandleLValueDirectBase(Info, RHS, LV, RD, Base))
return 0;
RD = Base;
}
// Finally cast to the class containing the member.
if (!HandleLValueDirectBase(Info, RHS, LV, RD,
MemPtr.getContainingRecord()))
return 0;
}
// Add the member. Note that we cannot build bound member functions here.
if (IncludeMember) {
if (const FieldDecl *FD = dyn_cast<FieldDecl>(MemPtr.getDecl())) {
if (!HandleLValueMember(Info, RHS, LV, FD))
return 0;
} else if (const IndirectFieldDecl *IFD =
dyn_cast<IndirectFieldDecl>(MemPtr.getDecl())) {
if (!HandleLValueIndirectMember(Info, RHS, LV, IFD))
return 0;
} else {
llvm_unreachable("can't construct reference to bound member function");
}
}
return MemPtr.getDecl();
}
static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info,
const BinaryOperator *BO,
LValue &LV,
bool IncludeMember = true) {
assert(BO->getOpcode() == BO_PtrMemD || BO->getOpcode() == BO_PtrMemI);
if (!EvaluateObjectArgument(Info, BO->getLHS(), LV)) {
if (Info.keepEvaluatingAfterFailure()) {
MemberPtr MemPtr;
EvaluateMemberPointer(BO->getRHS(), MemPtr, Info);
}
return 0;
}
return HandleMemberPointerAccess(Info, BO->getLHS()->getType(), LV,
BO->getRHS(), IncludeMember);
}
/// HandleBaseToDerivedCast - Apply the given base-to-derived cast operation on
/// the provided lvalue, which currently refers to the base object.
static bool HandleBaseToDerivedCast(EvalInfo &Info, const CastExpr *E,
LValue &Result) {
SubobjectDesignator &D = Result.Designator;
if (D.Invalid || !Result.checkNullPointer(Info, E, CSK_Derived))
return false;
QualType TargetQT = E->getType();
if (const PointerType *PT = TargetQT->getAs<PointerType>())
TargetQT = PT->getPointeeType();
// Check this cast lands within the final derived-to-base subobject path.
if (D.MostDerivedPathLength + E->path_size() > D.Entries.size()) {
Info.CCEDiag(E, diag::note_constexpr_invalid_downcast)
<< D.MostDerivedType << TargetQT;
return false;
}
// Check the type of the final cast. We don't need to check the path,
// since a cast can only be formed if the path is unique.
unsigned NewEntriesSize = D.Entries.size() - E->path_size();
const CXXRecordDecl *TargetType = TargetQT->getAsCXXRecordDecl();
const CXXRecordDecl *FinalType;
if (NewEntriesSize == D.MostDerivedPathLength)
FinalType = D.MostDerivedType->getAsCXXRecordDecl();
else
FinalType = getAsBaseClass(D.Entries[NewEntriesSize - 1]);
if (FinalType->getCanonicalDecl() != TargetType->getCanonicalDecl()) {
Info.CCEDiag(E, diag::note_constexpr_invalid_downcast)
<< D.MostDerivedType << TargetQT;
return false;
}
// Truncate the lvalue to the appropriate derived class.
return CastToDerivedClass(Info, E, Result, TargetType, NewEntriesSize);
}
namespace {
enum EvalStmtResult {
/// Evaluation failed.
ESR_Failed,
/// Hit a 'return' statement.
ESR_Returned,
/// Evaluation succeeded.
ESR_Succeeded,
/// Hit a 'continue' statement.
ESR_Continue,
/// Hit a 'break' statement.
ESR_Break,
/// Still scanning for 'case' or 'default' statement.
ESR_CaseNotFound
};
}
static bool EvaluateDecl(EvalInfo &Info, const Decl *D) {
if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
// We don't need to evaluate the initializer for a static local.
if (!VD->hasLocalStorage())
return true;
LValue Result;
Result.set(VD, Info.CurrentCall->Index);
APValue &Val = Info.CurrentCall->createTemporary(VD, true);
if (!VD->getInit()) {
Info.Diag(D->getLocStart(), diag::note_constexpr_uninitialized)
<< false << VD->getType();
Val = APValue();
return false;
}
if (!EvaluateInPlace(Val, Info, Result, VD->getInit())) {
// Wipe out any partially-computed value, to allow tracking that this
// evaluation failed.
Val = APValue();
return false;
}
}
return true;
}
/// Evaluate a condition (either a variable declaration or an expression).
static bool EvaluateCond(EvalInfo &Info, const VarDecl *CondDecl,
const Expr *Cond, bool &Result) {
FullExpressionRAII Scope(Info);
if (CondDecl && !EvaluateDecl(Info, CondDecl))
return false;
return EvaluateAsBooleanCondition(Cond, Result, Info);
}
static EvalStmtResult EvaluateStmt(APValue &Result, EvalInfo &Info,
const Stmt *S, const SwitchCase *SC = 0);
/// Evaluate the body of a loop, and translate the result as appropriate.
static EvalStmtResult EvaluateLoopBody(APValue &Result, EvalInfo &Info,
const Stmt *Body,
const SwitchCase *Case = 0) {
BlockScopeRAII Scope(Info);
switch (EvalStmtResult ESR = EvaluateStmt(Result, Info, Body, Case)) {
case ESR_Break:
return ESR_Succeeded;
case ESR_Succeeded:
case ESR_Continue:
return ESR_Continue;
case ESR_Failed:
case ESR_Returned:
case ESR_CaseNotFound:
return ESR;
}
llvm_unreachable("Invalid EvalStmtResult!");
}
/// Evaluate a switch statement.
static EvalStmtResult EvaluateSwitch(APValue &Result, EvalInfo &Info,
const SwitchStmt *SS) {
BlockScopeRAII Scope(Info);
// Evaluate the switch condition.
APSInt Value;
{
FullExpressionRAII Scope(Info);
if (SS->getConditionVariable() &&
!EvaluateDecl(Info, SS->getConditionVariable()))
return ESR_Failed;
if (!EvaluateInteger(SS->getCond(), Value, Info))
return ESR_Failed;
}
// Find the switch case corresponding to the value of the condition.
// FIXME: Cache this lookup.
const SwitchCase *Found = 0;
for (const SwitchCase *SC = SS->getSwitchCaseList(); SC;
SC = SC->getNextSwitchCase()) {
if (isa<DefaultStmt>(SC)) {
Found = SC;
continue;
}
const CaseStmt *CS = cast<CaseStmt>(SC);
APSInt LHS = CS->getLHS()->EvaluateKnownConstInt(Info.Ctx);
APSInt RHS = CS->getRHS() ? CS->getRHS()->EvaluateKnownConstInt(Info.Ctx)
: LHS;
if (LHS <= Value && Value <= RHS) {
Found = SC;
break;
}
}
if (!Found)
return ESR_Succeeded;
// Search the switch body for the switch case and evaluate it from there.
switch (EvalStmtResult ESR = EvaluateStmt(Result, Info, SS->getBody(), Found)) {
case ESR_Break:
return ESR_Succeeded;
case ESR_Succeeded:
case ESR_Continue:
case ESR_Failed:
case ESR_Returned:
return ESR;
case ESR_CaseNotFound:
// This can only happen if the switch case is nested within a statement
// expression. We have no intention of supporting that.
Info.Diag(Found->getLocStart(), diag::note_constexpr_stmt_expr_unsupported);
return ESR_Failed;
}
llvm_unreachable("Invalid EvalStmtResult!");
}
// Evaluate a statement.
static EvalStmtResult EvaluateStmt(APValue &Result, EvalInfo &Info,
const Stmt *S, const SwitchCase *Case) {
if (!Info.nextStep(S))
return ESR_Failed;
// If we're hunting down a 'case' or 'default' label, recurse through
// substatements until we hit the label.
if (Case) {
// FIXME: We don't start the lifetime of objects whose initialization we
// jump over. However, such objects must be of class type with a trivial
// default constructor that initialize all subobjects, so must be empty,
// so this almost never matters.
switch (S->getStmtClass()) {
case Stmt::CompoundStmtClass:
// FIXME: Precompute which substatement of a compound statement we
// would jump to, and go straight there rather than performing a
// linear scan each time.
case Stmt::LabelStmtClass:
case Stmt::AttributedStmtClass:
case Stmt::DoStmtClass:
break;
case Stmt::CaseStmtClass:
case Stmt::DefaultStmtClass:
if (Case == S)
Case = 0;
break;
case Stmt::IfStmtClass: {
// FIXME: Precompute which side of an 'if' we would jump to, and go
// straight there rather than scanning both sides.
const IfStmt *IS = cast<IfStmt>(S);
// Wrap the evaluation in a block scope, in case it's a DeclStmt
// preceded by our switch label.
BlockScopeRAII Scope(Info);
EvalStmtResult ESR = EvaluateStmt(Result, Info, IS->getThen(), Case);
if (ESR != ESR_CaseNotFound || !IS->getElse())
return ESR;
return EvaluateStmt(Result, Info, IS->getElse(), Case);
}
case Stmt::WhileStmtClass: {
EvalStmtResult ESR =
EvaluateLoopBody(Result, Info, cast<WhileStmt>(S)->getBody(), Case);
if (ESR != ESR_Continue)
return ESR;
break;
}
case Stmt::ForStmtClass: {
const ForStmt *FS = cast<ForStmt>(S);
EvalStmtResult ESR =
EvaluateLoopBody(Result, Info, FS->getBody(), Case);
if (ESR != ESR_Continue)
return ESR;
if (FS->getInc()) {
FullExpressionRAII IncScope(Info);
if (!EvaluateIgnoredValue(Info, FS->getInc()))
return ESR_Failed;
}
break;
}
case Stmt::DeclStmtClass:
// FIXME: If the variable has initialization that can't be jumped over,
// bail out of any immediately-surrounding compound-statement too.
default:
return ESR_CaseNotFound;
}
}
switch (S->getStmtClass()) {
default:
if (const Expr *E = dyn_cast<Expr>(S)) {
// Don't bother evaluating beyond an expression-statement which couldn't
// be evaluated.
FullExpressionRAII Scope(Info);
if (!EvaluateIgnoredValue(Info, E))
return ESR_Failed;
return ESR_Succeeded;
}
Info.Diag(S->getLocStart());
return ESR_Failed;
case Stmt::NullStmtClass:
return ESR_Succeeded;
case Stmt::DeclStmtClass: {
const DeclStmt *DS = cast<DeclStmt>(S);
for (DeclStmt::const_decl_iterator DclIt = DS->decl_begin(),
DclEnd = DS->decl_end(); DclIt != DclEnd; ++DclIt) {
// Each declaration initialization is its own full-expression.
// FIXME: This isn't quite right; if we're performing aggregate
// initialization, each braced subexpression is its own full-expression.
FullExpressionRAII Scope(Info);
if (!EvaluateDecl(Info, *DclIt) && !Info.keepEvaluatingAfterFailure())
return ESR_Failed;
}
return ESR_Succeeded;
}
case Stmt::ReturnStmtClass: {
const Expr *RetExpr = cast<ReturnStmt>(S)->getRetValue();
FullExpressionRAII Scope(Info);
if (RetExpr && !Evaluate(Result, Info, RetExpr))
return ESR_Failed;
return ESR_Returned;
}
case Stmt::CompoundStmtClass: {
BlockScopeRAII Scope(Info);
const CompoundStmt *CS = cast<CompoundStmt>(S);
for (CompoundStmt::const_body_iterator BI = CS->body_begin(),
BE = CS->body_end(); BI != BE; ++BI) {
EvalStmtResult ESR = EvaluateStmt(Result, Info, *BI, Case);
if (ESR == ESR_Succeeded)
Case = 0;
else if (ESR != ESR_CaseNotFound)
return ESR;
}
return Case ? ESR_CaseNotFound : ESR_Succeeded;
}
case Stmt::IfStmtClass: {
const IfStmt *IS = cast<IfStmt>(S);
// Evaluate the condition, as either a var decl or as an expression.
BlockScopeRAII Scope(Info);
bool Cond;
if (!EvaluateCond(Info, IS->getConditionVariable(), IS->getCond(), Cond))
return ESR_Failed;
if (const Stmt *SubStmt = Cond ? IS->getThen() : IS->getElse()) {
EvalStmtResult ESR = EvaluateStmt(Result, Info, SubStmt);
if (ESR != ESR_Succeeded)
return ESR;
}
return ESR_Succeeded;
}
case Stmt::WhileStmtClass: {
const WhileStmt *WS = cast<WhileStmt>(S);
while (true) {
BlockScopeRAII Scope(Info);
bool Continue;
if (!EvaluateCond(Info, WS->getConditionVariable(), WS->getCond(),
Continue))
return ESR_Failed;
if (!Continue)
break;
EvalStmtResult ESR = EvaluateLoopBody(Result, Info, WS->getBody());
if (ESR != ESR_Continue)
return ESR;
}
return ESR_Succeeded;
}
case Stmt::DoStmtClass: {
const DoStmt *DS = cast<DoStmt>(S);
bool Continue;
do {
EvalStmtResult ESR = EvaluateLoopBody(Result, Info, DS->getBody(), Case);
if (ESR != ESR_Continue)
return ESR;
Case = 0;
FullExpressionRAII CondScope(Info);
if (!EvaluateAsBooleanCondition(DS->getCond(), Continue, Info))
return ESR_Failed;
} while (Continue);
return ESR_Succeeded;
}
case Stmt::ForStmtClass: {
const ForStmt *FS = cast<ForStmt>(S);
BlockScopeRAII Scope(Info);
if (FS->getInit()) {
EvalStmtResult ESR = EvaluateStmt(Result, Info, FS->getInit());
if (ESR != ESR_Succeeded)
return ESR;
}
while (true) {
BlockScopeRAII Scope(Info);
bool Continue = true;
if (FS->getCond() && !EvaluateCond(Info, FS->getConditionVariable(),
FS->getCond(), Continue))
return ESR_Failed;
if (!Continue)
break;
EvalStmtResult ESR = EvaluateLoopBody(Result, Info, FS->getBody());
if (ESR != ESR_Continue)
return ESR;
if (FS->getInc()) {
FullExpressionRAII IncScope(Info);
if (!EvaluateIgnoredValue(Info, FS->getInc()))
return ESR_Failed;
}
}
return ESR_Succeeded;
}
case Stmt::CXXForRangeStmtClass: {
const CXXForRangeStmt *FS = cast<CXXForRangeStmt>(S);
BlockScopeRAII Scope(Info);
// Initialize the __range variable.
EvalStmtResult ESR = EvaluateStmt(Result, Info, FS->getRangeStmt());
if (ESR != ESR_Succeeded)
return ESR;
// Create the __begin and __end iterators.
ESR = EvaluateStmt(Result, Info, FS->getBeginEndStmt());
if (ESR != ESR_Succeeded)
return ESR;
while (true) {
// Condition: __begin != __end.
{
bool Continue = true;
FullExpressionRAII CondExpr(Info);
if (!EvaluateAsBooleanCondition(FS->getCond(), Continue, Info))
return ESR_Failed;
if (!Continue)
break;
}
// User's variable declaration, initialized by *__begin.
BlockScopeRAII InnerScope(Info);
ESR = EvaluateStmt(Result, Info, FS->getLoopVarStmt());
if (ESR != ESR_Succeeded)
return ESR;
// Loop body.
ESR = EvaluateLoopBody(Result, Info, FS->getBody());
if (ESR != ESR_Continue)
return ESR;
// Increment: ++__begin
if (!EvaluateIgnoredValue(Info, FS->getInc()))
return ESR_Failed;
}
return ESR_Succeeded;
}
case Stmt::SwitchStmtClass:
return EvaluateSwitch(Result, Info, cast<SwitchStmt>(S));
case Stmt::ContinueStmtClass:
return ESR_Continue;
case Stmt::BreakStmtClass:
return ESR_Break;
case Stmt::LabelStmtClass:
return EvaluateStmt(Result, Info, cast<LabelStmt>(S)->getSubStmt(), Case);
case Stmt::AttributedStmtClass:
// As a general principle, C++11 attributes can be ignored without
// any semantic impact.
return EvaluateStmt(Result, Info, cast<AttributedStmt>(S)->getSubStmt(),
Case);
case Stmt::CaseStmtClass:
case Stmt::DefaultStmtClass:
return EvaluateStmt(Result, Info, cast<SwitchCase>(S)->getSubStmt(), Case);
}
}
/// CheckTrivialDefaultConstructor - Check whether a constructor is a trivial
/// default constructor. If so, we'll fold it whether or not it's marked as
/// constexpr. If it is marked as constexpr, we will never implicitly define it,
/// so we need special handling.
static bool CheckTrivialDefaultConstructor(EvalInfo &Info, SourceLocation Loc,
const CXXConstructorDecl *CD,
bool IsValueInitialization) {
if (!CD->isTrivial() || !CD->isDefaultConstructor())
return false;
// Value-initialization does not call a trivial default constructor, so such a
// call is a core constant expression whether or not the constructor is
// constexpr.
if (!CD->isConstexpr() && !IsValueInitialization) {
if (Info.getLangOpts().CPlusPlus11) {
// FIXME: If DiagDecl is an implicitly-declared special member function,
// we should be much more explicit about why it's not constexpr.
Info.CCEDiag(Loc, diag::note_constexpr_invalid_function, 1)
<< /*IsConstexpr*/0 << /*IsConstructor*/1 << CD;
Info.Note(CD->getLocation(), diag::note_declared_at);
} else {
Info.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr);
}
}
return true;
}
/// CheckConstexprFunction - Check that a function can be called in a constant
/// expression.
static bool CheckConstexprFunction(EvalInfo &Info, SourceLocation CallLoc,
const FunctionDecl *Declaration,
const FunctionDecl *Definition) {
// Potential constant expressions can contain calls to declared, but not yet
// defined, constexpr functions.
if (Info.checkingPotentialConstantExpression() && !Definition &&
Declaration->isConstexpr())
return false;
// Bail out with no diagnostic if the function declaration itself is invalid.
// We will have produced a relevant diagnostic while parsing it.
if (Declaration->isInvalidDecl())
return false;
// Can we evaluate this function call?
if (Definition && Definition->isConstexpr() && !Definition->isInvalidDecl())
return true;
if (Info.getLangOpts().CPlusPlus11) {
const FunctionDecl *DiagDecl = Definition ? Definition : Declaration;
// FIXME: If DiagDecl is an implicitly-declared special member function, we
// should be much more explicit about why it's not constexpr.
Info.Diag(CallLoc, diag::note_constexpr_invalid_function, 1)
<< DiagDecl->isConstexpr() << isa<CXXConstructorDecl>(DiagDecl)
<< DiagDecl;
Info.Note(DiagDecl->getLocation(), diag::note_declared_at);
} else {
Info.Diag(CallLoc, diag::note_invalid_subexpr_in_const_expr);
}
return false;
}
namespace {
typedef SmallVector<APValue, 8> ArgVector;
}
/// EvaluateArgs - Evaluate the arguments to a function call.
static bool EvaluateArgs(ArrayRef<const Expr*> Args, ArgVector &ArgValues,
EvalInfo &Info) {
bool Success = true;
for (ArrayRef<const Expr*>::iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
if (!Evaluate(ArgValues[I - Args.begin()], Info, *I)) {
// If we're checking for a potential constant expression, evaluate all
// initializers even if some of them fail.
if (!Info.keepEvaluatingAfterFailure())
return false;
Success = false;
}
}
return Success;
}
/// Evaluate a function call.
static bool HandleFunctionCall(SourceLocation CallLoc,
const FunctionDecl *Callee, const LValue *This,
ArrayRef<const Expr*> Args, const Stmt *Body,
EvalInfo &Info, APValue &Result) {
ArgVector ArgValues(Args.size());
if (!EvaluateArgs(Args, ArgValues, Info))
return false;
if (!Info.CheckCallLimit(CallLoc))
return false;
CallStackFrame Frame(Info, CallLoc, Callee, This, ArgValues.data());
// For a trivial copy or move assignment, perform an APValue copy. This is
// essential for unions, where the operations performed by the assignment
// operator cannot be represented as statements.
const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Callee);
if (MD && MD->isDefaulted() && MD->isTrivial()) {
assert(This &&
(MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()));
LValue RHS;
RHS.setFrom(Info.Ctx, ArgValues[0]);
APValue RHSValue;
if (!handleLValueToRValueConversion(Info, Args[0], Args[0]->getType(),
RHS, RHSValue))
return false;
if (!handleAssignment(Info, Args[0], *This, MD->getThisType(Info.Ctx),
RHSValue))
return false;
This->moveInto(Result);
return true;
}
EvalStmtResult ESR = EvaluateStmt(Result, Info, Body);
if (ESR == ESR_Succeeded) {
if (Callee->getResultType()->isVoidType())
return true;
Info.Diag(Callee->getLocEnd(), diag::note_constexpr_no_return);
}
return ESR == ESR_Returned;
}
/// Evaluate a constructor call.
static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This,
ArrayRef<const Expr*> Args,
const CXXConstructorDecl *Definition,
EvalInfo &Info, APValue &Result) {
ArgVector ArgValues(Args.size());
if (!EvaluateArgs(Args, ArgValues, Info))
return false;
if (!Info.CheckCallLimit(CallLoc))
return false;
const CXXRecordDecl *RD = Definition->getParent();
if (RD->getNumVBases()) {
Info.Diag(CallLoc, diag::note_constexpr_virtual_base) << RD;
return false;
}
CallStackFrame Frame(Info, CallLoc, Definition, &This, ArgValues.data());
// If it's a delegating constructor, just delegate.
if (Definition->isDelegatingConstructor()) {
CXXConstructorDecl::init_const_iterator I = Definition->init_begin();
{
FullExpressionRAII InitScope(Info);
if (!EvaluateInPlace(Result, Info, This, (*I)->getInit()))
return false;
}
return EvaluateStmt(Result, Info, Definition->getBody()) != ESR_Failed;
}
// For a trivial copy or move constructor, perform an APValue copy. This is
// essential for unions, where the operations performed by the constructor
// cannot be represented by ctor-initializers.
if (Definition->isDefaulted() &&
((Definition->isCopyConstructor() && Definition->isTrivial()) ||
(Definition->isMoveConstructor() && Definition->isTrivial()))) {
LValue RHS;
RHS.setFrom(Info.Ctx, ArgValues[0]);
return handleLValueToRValueConversion(Info, Args[0], Args[0]->getType(),
RHS, Result);
}
// Reserve space for the struct members.
if (!RD->isUnion() && Result.isUninit())
Result = APValue(APValue::UninitStruct(), RD->getNumBases(),
std::distance(RD->field_begin(), RD->field_end()));
if (RD->isInvalidDecl()) return false;
const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD);
// A scope for temporaries lifetime-extended by reference members.
BlockScopeRAII LifetimeExtendedScope(Info);
bool Success = true;
unsigned BasesSeen = 0;
#ifndef NDEBUG
CXXRecordDecl::base_class_const_iterator BaseIt = RD->bases_begin();
#endif
for (CXXConstructorDecl::init_const_iterator I = Definition->init_begin(),
E = Definition->init_end(); I != E; ++I) {
LValue Subobject = This;
APValue *Value = &Result;
// Determine the subobject to initialize.
FieldDecl *FD = 0;
if ((*I)->isBaseInitializer()) {
QualType BaseType((*I)->getBaseClass(), 0);
#ifndef NDEBUG
// Non-virtual base classes are initialized in the order in the class
// definition. We have already checked for virtual base classes.
assert(!BaseIt->isVirtual() && "virtual base for literal type");
assert(Info.Ctx.hasSameType(BaseIt->getType(), BaseType) &&
"base class initializers not in expected order");
++BaseIt;
#endif
if (!HandleLValueDirectBase(Info, (*I)->getInit(), Subobject, RD,
BaseType->getAsCXXRecordDecl(), &Layout))
return false;
Value = &Result.getStructBase(BasesSeen++);
} else if ((FD = (*I)->getMember())) {
if (!HandleLValueMember(Info, (*I)->getInit(), Subobject, FD, &Layout))
return false;
if (RD->isUnion()) {
Result = APValue(FD);
Value = &Result.getUnionValue();
} else {
Value = &Result.getStructField(FD->getFieldIndex());
}
} else if (IndirectFieldDecl *IFD = (*I)->getIndirectMember()) {
// Walk the indirect field decl's chain to find the object to initialize,
// and make sure we've initialized every step along it.
for (IndirectFieldDecl::chain_iterator C = IFD->chain_begin(),
CE = IFD->chain_end();
C != CE; ++C) {
FD = cast<FieldDecl>(*C);
CXXRecordDecl *CD = cast<CXXRecordDecl>(FD->getParent());
// Switch the union field if it differs. This happens if we had
// preceding zero-initialization, and we're now initializing a union
// subobject other than the first.
// FIXME: In this case, the values of the other subobjects are
// specified, since zero-initialization sets all padding bits to zero.
if (Value->isUninit() ||
(Value->isUnion() && Value->getUnionField() != FD)) {
if (CD->isUnion())
*Value = APValue(FD);
else
*Value = APValue(APValue::UninitStruct(), CD->getNumBases(),
std::distance(CD->field_begin(), CD->field_end()));
}
if (!HandleLValueMember(Info, (*I)->getInit(), Subobject, FD))
return false;
if (CD->isUnion())
Value = &Value->getUnionValue();
else
Value = &Value->getStructField(FD->getFieldIndex());
}
} else {
llvm_unreachable("unknown base initializer kind");
}
FullExpressionRAII InitScope(Info);
if (!EvaluateInPlace(*Value, Info, Subobject, (*I)->getInit()) ||
(FD && FD->isBitField() && !truncateBitfieldValue(Info, (*I)->getInit(),
*Value, FD))) {
// If we're checking for a potential constant expression, evaluate all
// initializers even if some of them fail.
if (!Info.keepEvaluatingAfterFailure())
return false;
Success = false;
}
}
return Success &&
EvaluateStmt(Result, Info, Definition->getBody()) != ESR_Failed;
}
//===----------------------------------------------------------------------===//
// Generic Evaluation
//===----------------------------------------------------------------------===//
namespace {
// FIXME: RetTy is always bool. Remove it.
template <class Derived, typename RetTy=bool>
class ExprEvaluatorBase
: public ConstStmtVisitor<Derived, RetTy> {
private:
RetTy DerivedSuccess(const APValue &V, const Expr *E) {
return static_cast<Derived*>(this)->Success(V, E);
}
RetTy DerivedZeroInitialization(const Expr *E) {
return static_cast<Derived*>(this)->ZeroInitialization(E);
}
// Check whether a conditional operator with a non-constant condition is a
// potential constant expression. If neither arm is a potential constant
// expression, then the conditional operator is not either.
template<typename ConditionalOperator>
void CheckPotentialConstantConditional(const ConditionalOperator *E) {
assert(Info.checkingPotentialConstantExpression());
// Speculatively evaluate both arms.
{
SmallVector<PartialDiagnosticAt, 8> Diag;
SpeculativeEvaluationRAII Speculate(Info, &Diag);
StmtVisitorTy::Visit(E->getFalseExpr());
if (Diag.empty())
return;
Diag.clear();
StmtVisitorTy::Visit(E->getTrueExpr());
if (Diag.empty())
return;
}
Error(E, diag::note_constexpr_conditional_never_const);
}
template<typename ConditionalOperator>
bool HandleConditionalOperator(const ConditionalOperator *E) {
bool BoolResult;
if (!EvaluateAsBooleanCondition(E->getCond(), BoolResult, Info)) {
if (Info.checkingPotentialConstantExpression())
CheckPotentialConstantConditional(E);
return false;
}
Expr *EvalExpr = BoolResult ? E->getTrueExpr() : E->getFalseExpr();
return StmtVisitorTy::Visit(EvalExpr);
}
protected:
EvalInfo &Info;
typedef ConstStmtVisitor<Derived, RetTy> StmtVisitorTy;
typedef ExprEvaluatorBase ExprEvaluatorBaseTy;
OptionalDiagnostic CCEDiag(const Expr *E, diag::kind D) {
return Info.CCEDiag(E, D);
}
RetTy ZeroInitialization(const Expr *E) { return Error(E); }
public:
ExprEvaluatorBase(EvalInfo &Info) : Info(Info) {}
EvalInfo &getEvalInfo() { return Info; }
/// Report an evaluation error. This should only be called when an error is
/// first discovered. When propagating an error, just return false.
bool Error(const Expr *E, diag::kind D) {
Info.Diag(E, D);
return false;
}
bool Error(const Expr *E) {
return Error(E, diag::note_invalid_subexpr_in_const_expr);
}
RetTy VisitStmt(const Stmt *) {
llvm_unreachable("Expression evaluator should not be called on stmts");
}
RetTy VisitExpr(const Expr *E) {
return Error(E);
}
RetTy VisitParenExpr(const ParenExpr *E)
{ return StmtVisitorTy::Visit(E->getSubExpr()); }
RetTy VisitUnaryExtension(const UnaryOperator *E)
{ return StmtVisitorTy::Visit(E->getSubExpr()); }
RetTy VisitUnaryPlus(const UnaryOperator *E)
{ return StmtVisitorTy::Visit(E->getSubExpr()); }
RetTy VisitChooseExpr(const ChooseExpr *E)
{ return StmtVisitorTy::Visit(E->getChosenSubExpr()); }
RetTy VisitGenericSelectionExpr(const GenericSelectionExpr *E)
{ return StmtVisitorTy::Visit(E->getResultExpr()); }
RetTy VisitSubstNonTypeTemplateParmExpr(const SubstNonTypeTemplateParmExpr *E)
{ return StmtVisitorTy::Visit(E->getReplacement()); }
RetTy VisitCXXDefaultArgExpr(const CXXDefaultArgExpr *E)
{ return StmtVisitorTy::Visit(E->getExpr()); }
RetTy VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *E) {
// The initializer may not have been parsed yet, or might be erroneous.
if (!E->getExpr())
return Error(E);
return StmtVisitorTy::Visit(E->getExpr());
}
// We cannot create any objects for which cleanups are required, so there is
// nothing to do here; all cleanups must come from unevaluated subexpressions.
RetTy VisitExprWithCleanups(const ExprWithCleanups *E)
{ return StmtVisitorTy::Visit(E->getSubExpr()); }
RetTy VisitCXXReinterpretCastExpr(const CXXReinterpretCastExpr *E) {
CCEDiag(E, diag::note_constexpr_invalid_cast) << 0;
return static_cast<Derived*>(this)->VisitCastExpr(E);
}
RetTy VisitCXXDynamicCastExpr(const CXXDynamicCastExpr *E) {
CCEDiag(E, diag::note_constexpr_invalid_cast) << 1;
return static_cast<Derived*>(this)->VisitCastExpr(E);
}
RetTy VisitBinaryOperator(const BinaryOperator *E) {
switch (E->getOpcode()) {
default:
return Error(E);
case BO_Comma:
VisitIgnoredValue(E->getLHS());
return StmtVisitorTy::Visit(E->getRHS());
case BO_PtrMemD:
case BO_PtrMemI: {
LValue Obj;
if (!HandleMemberPointerAccess(Info, E, Obj))
return false;
APValue Result;
if (!handleLValueToRValueConversion(Info, E, E->getType(), Obj, Result))
return false;
return DerivedSuccess(Result, E);
}
}
}
RetTy VisitBinaryConditionalOperator(const BinaryConditionalOperator *E) {
// Evaluate and cache the common expression. We treat it as a temporary,
// even though it's not quite the same thing.
if (!Evaluate(Info.CurrentCall->createTemporary(E->getOpaqueValue(), false),
Info, E->getCommon()))
return false;
return HandleConditionalOperator(E);
}
RetTy VisitConditionalOperator(const ConditionalOperator *E) {
bool IsBcpCall = false;
// If the condition (ignoring parens) is a __builtin_constant_p call,
// the result is a constant expression if it can be folded without
// side-effects. This is an important GNU extension. See GCC PR38377
// for discussion.
if (const CallExpr *CallCE =
dyn_cast<CallExpr>(E->getCond()->IgnoreParenCasts()))
if (CallCE->isBuiltinCall() == Builtin::BI__builtin_constant_p)
IsBcpCall = true;
// Always assume __builtin_constant_p(...) ? ... : ... is a potential
// constant expression; we can't check whether it's potentially foldable.
if (Info.checkingPotentialConstantExpression() && IsBcpCall)
return false;
FoldConstant Fold(Info, IsBcpCall);
if (!HandleConditionalOperator(E)) {
Fold.keepDiagnostics();
return false;
}
return true;
}
RetTy VisitOpaqueValueExpr(const OpaqueValueExpr *E) {
if (APValue *Value = Info.CurrentCall->getTemporary(E))
return DerivedSuccess(*Value, E);
const Expr *Source = E->getSourceExpr();
if (!Source)
return Error(E);
if (Source == E) { // sanity checking.
assert(0 && "OpaqueValueExpr recursively refers to itself");
return Error(E);
}
return StmtVisitorTy::Visit(Source);
}
RetTy VisitCallExpr(const CallExpr *E) {
const Expr *Callee = E->getCallee()->IgnoreParens();
QualType CalleeType = Callee->getType();
const FunctionDecl *FD = 0;
LValue *This = 0, ThisVal;
ArrayRef<const Expr *> Args(E->getArgs(), E->getNumArgs());
bool HasQualifier = false;
// Extract function decl and 'this' pointer from the callee.
if (CalleeType->isSpecificBuiltinType(BuiltinType::BoundMember)) {
const ValueDecl *Member = 0;
if (const MemberExpr *ME = dyn_cast<MemberExpr>(Callee)) {
// Explicit bound member calls, such as x.f() or p->g();
if (!EvaluateObjectArgument(Info, ME->getBase(), ThisVal))
return false;
Member = ME->getMemberDecl();
This = &ThisVal;
HasQualifier = ME->hasQualifier();
} else if (const BinaryOperator *BE = dyn_cast<BinaryOperator>(Callee)) {
// Indirect bound member calls ('.*' or '->*').
Member = HandleMemberPointerAccess(Info, BE, ThisVal, false);
if (!Member) return false;
This = &ThisVal;
} else
return Error(Callee);
FD = dyn_cast<FunctionDecl>(Member);
if (!FD)
return Error(Callee);
} else if (CalleeType->isFunctionPointerType()) {
LValue Call;
if (!EvaluatePointer(Callee, Call, Info))
return false;
if (!Call.getLValueOffset().isZero())
return Error(Callee);
FD = dyn_cast_or_null<FunctionDecl>(
Call.getLValueBase().dyn_cast<const ValueDecl*>());
if (!FD)
return Error(Callee);
// Overloaded operator calls to member functions are represented as normal
// calls with '*this' as the first argument.
const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
if (MD && !MD->isStatic()) {
// FIXME: When selecting an implicit conversion for an overloaded
// operator delete, we sometimes try to evaluate calls to conversion
// operators without a 'this' parameter!
if (Args.empty())
return Error(E);
if (!EvaluateObjectArgument(Info, Args[0], ThisVal))
return false;
This = &ThisVal;
Args = Args.slice(1);
}
// Don't call function pointers which have been cast to some other type.
if (!Info.Ctx.hasSameType(CalleeType->getPointeeType(), FD->getType()))
return Error(E);
} else
return Error(E);
if (This && !This->checkSubobject(Info, E, CSK_This))
return false;
// DR1358 allows virtual constexpr functions in some cases. Don't allow
// calls to such functions in constant expressions.
if (This && !HasQualifier &&
isa<CXXMethodDecl>(FD) && cast<CXXMethodDecl>(FD)->isVirtual())
return Error(E, diag::note_constexpr_virtual_call);
const FunctionDecl *Definition = 0;
Stmt *Body = FD->getBody(Definition);
APValue Result;
if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition) ||
!HandleFunctionCall(E->getExprLoc(), Definition, This, Args, Body,
Info, Result))
return false;
return DerivedSuccess(Result, E);
}
RetTy VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
return StmtVisitorTy::Visit(E->getInitializer());
}
RetTy VisitInitListExpr(const InitListExpr *E) {
if (E->getNumInits() == 0)
return DerivedZeroInitialization(E);
if (E->getNumInits() == 1)
return StmtVisitorTy::Visit(E->getInit(0));
return Error(E);
}
RetTy VisitImplicitValueInitExpr(const ImplicitValueInitExpr *E) {
return DerivedZeroInitialization(E);
}
RetTy VisitCXXScalarValueInitExpr(const CXXScalarValueInitExpr *E) {
return DerivedZeroInitialization(E);
}
RetTy VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *E) {
return DerivedZeroInitialization(E);
}
/// A member expression where the object is a prvalue is itself a prvalue.
RetTy VisitMemberExpr(const MemberExpr *E) {
assert(!E->isArrow() && "missing call to bound member function?");
APValue Val;
if (!Evaluate(Val, Info, E->getBase()))
return false;
QualType BaseTy = E->getBase()->getType();
const FieldDecl *FD = dyn_cast<FieldDecl>(E->getMemberDecl());
if (!FD) return Error(E);
assert(!FD->getType()->isReferenceType() && "prvalue reference?");
assert(BaseTy->castAs<RecordType>()->getDecl()->getCanonicalDecl() ==
FD->getParent()->getCanonicalDecl() && "record / field mismatch");
CompleteObject Obj(&Val, BaseTy);
SubobjectDesignator Designator(BaseTy);
Designator.addDeclUnchecked(FD);
APValue Result;
return extractSubobject(Info, E, Obj, Designator, Result) &&
DerivedSuccess(Result, E);
}
RetTy VisitCastExpr(const CastExpr *E) {
switch (E->getCastKind()) {
default:
break;
case CK_AtomicToNonAtomic: {
APValue AtomicVal;
if (!EvaluateAtomic(E->getSubExpr(), AtomicVal, Info))
return false;
return DerivedSuccess(AtomicVal, E);
}
case CK_NoOp:
case CK_UserDefinedConversion:
return StmtVisitorTy::Visit(E->getSubExpr());
case CK_LValueToRValue: {
LValue LVal;
if (!EvaluateLValue(E->getSubExpr(), LVal, Info))
return false;
APValue RVal;
// Note, we use the subexpression's type in order to retain cv-qualifiers.
if (!handleLValueToRValueConversion(Info, E, E->getSubExpr()->getType(),
LVal, RVal))
return false;
return DerivedSuccess(RVal, E);
}
}
return Error(E);
}
RetTy VisitUnaryPostInc(const UnaryOperator *UO) {
return VisitUnaryPostIncDec(UO);
}
RetTy VisitUnaryPostDec(const UnaryOperator *UO) {
return VisitUnaryPostIncDec(UO);
}
RetTy VisitUnaryPostIncDec(const UnaryOperator *UO) {
if (!Info.getLangOpts().CPlusPlus1y && !Info.keepEvaluatingAfterFailure())
return Error(UO);
LValue LVal;
if (!EvaluateLValue(UO->getSubExpr(), LVal, Info))
return false;
APValue RVal;
if (!handleIncDec(this->Info, UO, LVal, UO->getSubExpr()->getType(),
UO->isIncrementOp(), &RVal))
return false;
return DerivedSuccess(RVal, UO);
}
RetTy VisitStmtExpr(const StmtExpr *E) {
// We will have checked the full-expressions inside the statement expression
// when they were completed, and don't need to check them again now.
if (Info.checkingForOverflow())
return Error(E);
BlockScopeRAII Scope(Info);
const CompoundStmt *CS = E->getSubStmt();
for (CompoundStmt::const_body_iterator BI = CS->body_begin(),
BE = CS->body_end();
/**/; ++BI) {
if (BI + 1 == BE) {
const Expr *FinalExpr = dyn_cast<Expr>(*BI);
if (!FinalExpr) {
Info.Diag((*BI)->getLocStart(),
diag::note_constexpr_stmt_expr_unsupported);
return false;
}
return this->Visit(FinalExpr);
}
APValue ReturnValue;
EvalStmtResult ESR = EvaluateStmt(ReturnValue, Info, *BI);
if (ESR != ESR_Succeeded) {
// FIXME: If the statement-expression terminated due to 'return',
// 'break', or 'continue', it would be nice to propagate that to
// the outer statement evaluation rather than bailing out.
if (ESR != ESR_Failed)
Info.Diag((*BI)->getLocStart(),
diag::note_constexpr_stmt_expr_unsupported);
return false;
}
}
}
/// Visit a value which is evaluated, but whose value is ignored.
void VisitIgnoredValue(const Expr *E) {
EvaluateIgnoredValue(Info, E);
}
};
}
//===----------------------------------------------------------------------===//
// Common base class for lvalue and temporary evaluation.
//===----------------------------------------------------------------------===//
namespace {
template<class Derived>
class LValueExprEvaluatorBase
: public ExprEvaluatorBase<Derived, bool> {
protected:
LValue &Result;
typedef LValueExprEvaluatorBase LValueExprEvaluatorBaseTy;
typedef ExprEvaluatorBase<Derived, bool> ExprEvaluatorBaseTy;
bool Success(APValue::LValueBase B) {
Result.set(B);
return true;
}
public:
LValueExprEvaluatorBase(EvalInfo &Info, LValue &Result) :
ExprEvaluatorBaseTy(Info), Result(Result) {}
bool Success(const APValue &V, const Expr *E) {
Result.setFrom(this->Info.Ctx, V);
return true;
}
bool VisitMemberExpr(const MemberExpr *E) {
// Handle non-static data members.
QualType BaseTy;
if (E->isArrow()) {
if (!EvaluatePointer(E->getBase(), Result, this->Info))
return false;
BaseTy = E->getBase()->getType()->castAs<PointerType>()->getPointeeType();
} else if (E->getBase()->isRValue()) {
assert(E->getBase()->getType()->isRecordType());
if (!EvaluateTemporary(E->getBase(), Result, this->Info))
return false;
BaseTy = E->getBase()->getType();
} else {
if (!this->Visit(E->getBase()))
return false;
BaseTy = E->getBase()->getType();
}
const ValueDecl *MD = E->getMemberDecl();
if (const FieldDecl *FD = dyn_cast<FieldDecl>(E->getMemberDecl())) {
assert(BaseTy->getAs<RecordType>()->getDecl()->getCanonicalDecl() ==
FD->getParent()->getCanonicalDecl() && "record / field mismatch");
(void)BaseTy;
if (!HandleLValueMember(this->Info, E, Result, FD))
return false;
} else if (const IndirectFieldDecl *IFD = dyn_cast<IndirectFieldDecl>(MD)) {
if (!HandleLValueIndirectMember(this->Info, E, Result, IFD))
return false;
} else
return this->Error(E);
if (MD->getType()->isReferenceType()) {
APValue RefValue;
if (!handleLValueToRValueConversion(this->Info, E, MD->getType(), Result,
RefValue))
return false;
return Success(RefValue, E);
}
return true;
}
bool VisitBinaryOperator(const BinaryOperator *E) {
switch (E->getOpcode()) {
default:
return ExprEvaluatorBaseTy::VisitBinaryOperator(E);
case BO_PtrMemD:
case BO_PtrMemI:
return HandleMemberPointerAccess(this->Info, E, Result);
}
}
bool VisitCastExpr(const CastExpr *E) {
switch (E->getCastKind()) {
default:
return ExprEvaluatorBaseTy::VisitCastExpr(E);
case CK_DerivedToBase:
case CK_UncheckedDerivedToBase:
if (!this->Visit(E->getSubExpr()))
return false;
// Now figure out the necessary offset to add to the base LV to get from
// the derived class to the base class.
return HandleLValueBasePath(this->Info, E, E->getSubExpr()->getType(),
Result);
}
}
};
}
//===----------------------------------------------------------------------===//
// LValue Evaluation
//
// This is used for evaluating lvalues (in C and C++), xvalues (in C++11),
// function designators (in C), decl references to void objects (in C), and
// temporaries (if building with -Wno-address-of-temporary).
//
// LValue evaluation produces values comprising a base expression of one of the
// following types:
// - Declarations
// * VarDecl
// * FunctionDecl
// - Literals
// * CompoundLiteralExpr in C
// * StringLiteral
// * CXXTypeidExpr
// * PredefinedExpr
// * ObjCStringLiteralExpr
// * ObjCEncodeExpr
// * AddrLabelExpr
// * BlockExpr
// * CallExpr for a MakeStringConstant builtin
// - Locals and temporaries
// * MaterializeTemporaryExpr
// * Any Expr, with a CallIndex indicating the function in which the temporary
// was evaluated, for cases where the MaterializeTemporaryExpr is missing
// from the AST (FIXME).
// * A MaterializeTemporaryExpr that has static storage duration, with no
// CallIndex, for a lifetime-extended temporary.
// plus an offset in bytes.
//===----------------------------------------------------------------------===//
namespace {
class LValueExprEvaluator
: public LValueExprEvaluatorBase<LValueExprEvaluator> {
public:
LValueExprEvaluator(EvalInfo &Info, LValue &Result) :
LValueExprEvaluatorBaseTy(Info, Result) {}
bool VisitVarDecl(const Expr *E, const VarDecl *VD);
bool VisitUnaryPreIncDec(const UnaryOperator *UO);
bool VisitDeclRefExpr(const DeclRefExpr *E);
bool VisitPredefinedExpr(const PredefinedExpr *E) { return Success(E); }
bool VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E);
bool VisitMemberExpr(const MemberExpr *E);
bool VisitStringLiteral(const StringLiteral *E) { return Success(E); }
bool VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return Success(E); }
bool VisitCXXTypeidExpr(const CXXTypeidExpr *E);
bool VisitCXXUuidofExpr(const CXXUuidofExpr *E);
bool VisitArraySubscriptExpr(const ArraySubscriptExpr *E);
bool VisitUnaryDeref(const UnaryOperator *E);
bool VisitUnaryReal(const UnaryOperator *E);
bool VisitUnaryImag(const UnaryOperator *E);
bool VisitUnaryPreInc(const UnaryOperator *UO) {
return VisitUnaryPreIncDec(UO);
}
bool VisitUnaryPreDec(const UnaryOperator *UO) {
return VisitUnaryPreIncDec(UO);
}
bool VisitBinAssign(const BinaryOperator *BO);
bool VisitCompoundAssignOperator(const CompoundAssignOperator *CAO);
bool VisitCastExpr(const CastExpr *E) {
switch (E->getCastKind()) {
default:
return LValueExprEvaluatorBaseTy::VisitCastExpr(E);
case CK_LValueBitCast:
this->CCEDiag(E, diag::note_constexpr_invalid_cast) << 2;
if (!Visit(E->getSubExpr()))
return false;
Result.Designator.setInvalid();
return true;
case CK_BaseToDerived:
if (!Visit(E->getSubExpr()))
return false;
return HandleBaseToDerivedCast(Info, E, Result);
}
}
};
} // end anonymous namespace
/// Evaluate an expression as an lvalue. This can be legitimately called on
/// expressions which are not glvalues, in two cases:
/// * function designators in C, and
/// * "extern void" objects
static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info) {
assert(E->isGLValue() || E->getType()->isFunctionType() ||
E->getType()->isVoidType());
return LValueExprEvaluator(Info, Result).Visit(E);
}
bool LValueExprEvaluator::VisitDeclRefExpr(const DeclRefExpr *E) {
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(E->getDecl()))
return Success(FD);
if (const VarDecl *VD = dyn_cast<VarDecl>(E->getDecl()))
return VisitVarDecl(E, VD);
return Error(E);
}
bool LValueExprEvaluator::VisitVarDecl(const Expr *E, const VarDecl *VD) {
CallStackFrame *Frame = 0;
if (VD->hasLocalStorage() && Info.CurrentCall->Index > 1)
Frame = Info.CurrentCall;
if (!VD->getType()->isReferenceType()) {
if (Frame) {
Result.set(VD, Frame->Index);
return true;
}
return Success(VD);
}
APValue *V;
if (!evaluateVarDeclInit(Info, E, VD, Frame, V))
return false;
if (V->isUninit()) {
if (!Info.checkingPotentialConstantExpression())
Info.Diag(E, diag::note_constexpr_use_uninit_reference);
return false;
}
return Success(*V, E);
}
bool LValueExprEvaluator::VisitMaterializeTemporaryExpr(
const MaterializeTemporaryExpr *E) {
// Walk through the expression to find the materialized temporary itself.
SmallVector<const Expr *, 2> CommaLHSs;
SmallVector<SubobjectAdjustment, 2> Adjustments;
const Expr *Inner = E->GetTemporaryExpr()->
skipRValueSubobjectAdjustments(CommaLHSs, Adjustments);
// If we passed any comma operators, evaluate their LHSs.
for (unsigned I = 0, N = CommaLHSs.size(); I != N; ++I)
if (!EvaluateIgnoredValue(Info, CommaLHSs[I]))
return false;
// A materialized temporary with static storage duration can appear within the
// result of a constant expression evaluation, so we need to preserve its
// value for use outside this evaluation.
APValue *Value;
if (E->getStorageDuration() == SD_Static) {
Value = Info.Ctx.getMaterializedTemporaryValue(E, true);
*Value = APValue();
Result.set(E);
} else {
Value = &Info.CurrentCall->
createTemporary(E, E->getStorageDuration() == SD_Automatic);
Result.set(E, Info.CurrentCall->Index);
}
QualType Type = Inner->getType();
// Materialize the temporary itself.
if (!EvaluateInPlace(*Value, Info, Result, Inner) ||
(E->getStorageDuration() == SD_Static &&
!CheckConstantExpression(Info, E->getExprLoc(), Type, *Value))) {
*Value = APValue();
return false;
}
// Adjust our lvalue to refer to the desired subobject.
for (unsigned I = Adjustments.size(); I != 0; /**/) {
--I;
switch (Adjustments[I].Kind) {
case SubobjectAdjustment::DerivedToBaseAdjustment:
if (!HandleLValueBasePath(Info, Adjustments[I].DerivedToBase.BasePath,
Type, Result))
return false;
Type = Adjustments[I].DerivedToBase.BasePath->getType();
break;
case SubobjectAdjustment::FieldAdjustment:
if (!HandleLValueMember(Info, E, Result, Adjustments[I].Field))
return false;
Type = Adjustments[I].Field->getType();
break;
case SubobjectAdjustment::MemberPointerAdjustment:
if (!HandleMemberPointerAccess(this->Info, Type, Result,
Adjustments[I].Ptr.RHS))
return false;
Type = Adjustments[I].Ptr.MPT->getPointeeType();
break;
}
}
return true;
}
bool
LValueExprEvaluator::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
assert(!Info.getLangOpts().CPlusPlus && "lvalue compound literal in c++?");
// Defer visiting the literal until the lvalue-to-rvalue conversion. We can
// only see this when folding in C, so there's no standard to follow here.
return Success(E);
}
bool LValueExprEvaluator::VisitCXXTypeidExpr(const CXXTypeidExpr *E) {
if (!E->isPotentiallyEvaluated())
return Success(E);
Info.Diag(E, diag::note_constexpr_typeid_polymorphic)
<< E->getExprOperand()->getType()
<< E->getExprOperand()->getSourceRange();
return false;
}
bool LValueExprEvaluator::VisitCXXUuidofExpr(const CXXUuidofExpr *E) {
return Success(E);
}
bool LValueExprEvaluator::VisitMemberExpr(const MemberExpr *E) {
// Handle static data members.
if (const VarDecl *VD = dyn_cast<VarDecl>(E->getMemberDecl())) {
VisitIgnoredValue(E->getBase());
return VisitVarDecl(E, VD);
}
// Handle static member functions.
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(E->getMemberDecl())) {
if (MD->isStatic()) {
VisitIgnoredValue(E->getBase());
return Success(MD);
}
}
// Handle non-static data members.
return LValueExprEvaluatorBaseTy::VisitMemberExpr(E);
}
bool LValueExprEvaluator::VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
// FIXME: Deal with vectors as array subscript bases.
if (E->getBase()->getType()->isVectorType())
return Error(E);
if (!EvaluatePointer(E->getBase(), Result, Info))
return false;
APSInt Index;
if (!EvaluateInteger(E->getIdx(), Index, Info))
return false;
return HandleLValueArrayAdjustment(Info, E, Result, E->getType(),
getExtValue(Index));
}
bool LValueExprEvaluator::VisitUnaryDeref(const UnaryOperator *E) {
return EvaluatePointer(E->getSubExpr(), Result, Info);
}
bool LValueExprEvaluator::VisitUnaryReal(const UnaryOperator *E) {
if (!Visit(E->getSubExpr()))
return false;
// __real is a no-op on scalar lvalues.
if (E->getSubExpr()->getType()->isAnyComplexType())
HandleLValueComplexElement(Info, E, Result, E->getType(), false);
return true;
}
bool LValueExprEvaluator::VisitUnaryImag(const UnaryOperator *E) {
assert(E->getSubExpr()->getType()->isAnyComplexType() &&
"lvalue __imag__ on scalar?");
if (!Visit(E->getSubExpr()))
return false;
HandleLValueComplexElement(Info, E, Result, E->getType(), true);
return true;
}
bool LValueExprEvaluator::VisitUnaryPreIncDec(const UnaryOperator *UO) {
if (!Info.getLangOpts().CPlusPlus1y && !Info.keepEvaluatingAfterFailure())
return Error(UO);
if (!this->Visit(UO->getSubExpr()))
return false;
return handleIncDec(
this->Info, UO, Result, UO->getSubExpr()->getType(),
UO->isIncrementOp(), 0);
}
bool LValueExprEvaluator::VisitCompoundAssignOperator(
const CompoundAssignOperator *CAO) {
if (!Info.getLangOpts().CPlusPlus1y && !Info.keepEvaluatingAfterFailure())
return Error(CAO);
APValue RHS;
// The overall lvalue result is the result of evaluating the LHS.
if (!this->Visit(CAO->getLHS())) {
if (Info.keepEvaluatingAfterFailure())
Evaluate(RHS, this->Info, CAO->getRHS());
return false;
}
if (!Evaluate(RHS, this->Info, CAO->getRHS()))
return false;
return handleCompoundAssignment(
this->Info, CAO,
Result, CAO->getLHS()->getType(), CAO->getComputationLHSType(),
CAO->getOpForCompoundAssignment(CAO->getOpcode()), RHS);
}
bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) {
if (!Info.getLangOpts().CPlusPlus1y && !Info.keepEvaluatingAfterFailure())
return Error(E);
APValue NewVal;
if (!this->Visit(E->getLHS())) {
if (Info.keepEvaluatingAfterFailure())
Evaluate(NewVal, this->Info, E->getRHS());
return false;
}
if (!Evaluate(NewVal, this->Info, E->getRHS()))
return false;
return handleAssignment(this->Info, E, Result, E->getLHS()->getType(),
NewVal);
}
//===----------------------------------------------------------------------===//
// Pointer Evaluation
//===----------------------------------------------------------------------===//
namespace {
class PointerExprEvaluator
: public ExprEvaluatorBase<PointerExprEvaluator, bool> {
LValue &Result;
bool Success(const Expr *E) {
Result.set(E);
return true;
}
public:
PointerExprEvaluator(EvalInfo &info, LValue &Result)
: ExprEvaluatorBaseTy(info), Result(Result) {}
bool Success(const APValue &V, const Expr *E) {
Result.setFrom(Info.Ctx, V);
return true;
}
bool ZeroInitialization(const Expr *E) {
return Success((Expr*)0);
}
bool VisitBinaryOperator(const BinaryOperator *E);
bool VisitCastExpr(const CastExpr* E);
bool VisitUnaryAddrOf(const UnaryOperator *E);
bool VisitObjCStringLiteral(const ObjCStringLiteral *E)
{ return Success(E); }
bool VisitObjCBoxedExpr(const ObjCBoxedExpr *E)
{ return Success(E); }
bool VisitAddrLabelExpr(const AddrLabelExpr *E)
{ return Success(E); }
bool VisitCallExpr(const CallExpr *E);
bool VisitBlockExpr(const BlockExpr *E) {
if (!E->getBlockDecl()->hasCaptures())
return Success(E);
return Error(E);
}
bool VisitCXXThisExpr(const CXXThisExpr *E) {
// Can't look at 'this' when checking a potential constant expression.
if (Info.checkingPotentialConstantExpression())
return false;
if (!Info.CurrentCall->This)
return Error(E);
Result = *Info.CurrentCall->This;
return true;
}
// FIXME: Missing: @protocol, @selector
};
} // end anonymous namespace
static bool EvaluatePointer(const Expr* E, LValue& Result, EvalInfo &Info) {
assert(E->isRValue() && E->getType()->hasPointerRepresentation());
return PointerExprEvaluator(Info, Result).Visit(E);
}
bool PointerExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
if (E->getOpcode() != BO_Add &&
E->getOpcode() != BO_Sub)
return ExprEvaluatorBaseTy::VisitBinaryOperator(E);
const Expr *PExp = E->getLHS();
const Expr *IExp = E->getRHS();
if (IExp->getType()->isPointerType())
std::swap(PExp, IExp);
bool EvalPtrOK = EvaluatePointer(PExp, Result, Info);
if (!EvalPtrOK && !Info.keepEvaluatingAfterFailure())
return false;
llvm::APSInt Offset;
if (!EvaluateInteger(IExp, Offset, Info) || !EvalPtrOK)
return false;
int64_t AdditionalOffset = getExtValue(Offset);
if (E->getOpcode() == BO_Sub)
AdditionalOffset = -AdditionalOffset;
QualType Pointee = PExp->getType()->castAs<PointerType>()->getPointeeType();
return HandleLValueArrayAdjustment(Info, E, Result, Pointee,
AdditionalOffset);
}
bool PointerExprEvaluator::VisitUnaryAddrOf(const UnaryOperator *E) {
return EvaluateLValue(E->getSubExpr(), Result, Info);
}
bool PointerExprEvaluator::VisitCastExpr(const CastExpr* E) {
const Expr* SubExpr = E->getSubExpr();
switch (E->getCastKind()) {
default:
break;
case CK_BitCast:
case CK_CPointerToObjCPointerCast:
case CK_BlockPointerToObjCPointerCast:
case CK_AnyPointerToBlockPointerCast:
if (!Visit(SubExpr))
return false;
// Bitcasts to cv void* are static_casts, not reinterpret_casts, so are
// permitted in constant expressions in C++11. Bitcasts from cv void* are
// also static_casts, but we disallow them as a resolution to DR1312.
if (!E->getType()->isVoidPointerType()) {
Result.Designator.setInvalid();
if (SubExpr->getType()->isVoidPointerType())
CCEDiag(E, diag::note_constexpr_invalid_cast)
<< 3 << SubExpr->getType();
else
CCEDiag(E, diag::note_constexpr_invalid_cast) << 2;
}
return true;
case CK_DerivedToBase:
case CK_UncheckedDerivedToBase:
if (!EvaluatePointer(E->getSubExpr(), Result, Info))
return false;
if (!Result.Base && Result.Offset.isZero())
return true;
// Now figure out the necessary offset to add to the base LV to get from
// the derived class to the base class.
return HandleLValueBasePath(Info, E, E->getSubExpr()->getType()->
castAs<PointerType>()->getPointeeType(),
Result);
case CK_BaseToDerived:
if (!Visit(E->getSubExpr()))
return false;
if (!Result.Base && Result.Offset.isZero())
return true;
return HandleBaseToDerivedCast(Info, E, Result);
case CK_NullToPointer:
VisitIgnoredValue(E->getSubExpr());
return ZeroInitialization(E);
case CK_IntegralToPointer: {
CCEDiag(E, diag::note_constexpr_invalid_cast) << 2;
APValue Value;
if (!EvaluateIntegerOrLValue(SubExpr, Value, Info))
break;
if (Value.isInt()) {
unsigned Size = Info.Ctx.getTypeSize(E->getType());
uint64_t N = Value.getInt().extOrTrunc(Size).getZExtValue();
Result.Base = (Expr*)0;
Result.Offset = CharUnits::fromQuantity(N);
Result.CallIndex = 0;
Result.Designator.setInvalid();
return true;
} else {
// Cast is of an lvalue, no need to change value.
Result.setFrom(Info.Ctx, Value);
return true;
}
}
case CK_ArrayToPointerDecay:
if (SubExpr->isGLValue()) {
if (!EvaluateLValue(SubExpr, Result, Info))
return false;
} else {
Result.set(SubExpr, Info.CurrentCall->Index);
if (!EvaluateInPlace(Info.CurrentCall->createTemporary(SubExpr, false),
Info, Result, SubExpr))
return false;
}
// The result is a pointer to the first element of the array.
if (const ConstantArrayType *CAT
= Info.Ctx.getAsConstantArrayType(SubExpr->getType()))
Result.addArray(Info, E, CAT);
else
Result.Designator.setInvalid();
return true;
case CK_FunctionToPointerDecay:
return EvaluateLValue(SubExpr, Result, Info);
}
return ExprEvaluatorBaseTy::VisitCastExpr(E);
}
bool PointerExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (IsStringLiteralCall(E))
return Success(E);
switch (E->isBuiltinCall()) {
case Builtin::BI__builtin_addressof:
return EvaluateLValue(E->getArg(0), Result, Info);
default:
return ExprEvaluatorBaseTy::VisitCallExpr(E);
}
}
//===----------------------------------------------------------------------===//
// Member Pointer Evaluation
//===----------------------------------------------------------------------===//
namespace {
class MemberPointerExprEvaluator
: public ExprEvaluatorBase<MemberPointerExprEvaluator, bool> {
MemberPtr &Result;
bool Success(const ValueDecl *D) {
Result = MemberPtr(D);
return true;
}
public:
MemberPointerExprEvaluator(EvalInfo &Info, MemberPtr &Result)
: ExprEvaluatorBaseTy(Info), Result(Result) {}
bool Success(const APValue &V, const Expr *E) {
Result.setFrom(V);
return true;
}
bool ZeroInitialization(const Expr *E) {
return Success((const ValueDecl*)0);
}
bool VisitCastExpr(const CastExpr *E);
bool VisitUnaryAddrOf(const UnaryOperator *E);
};
} // end anonymous namespace
static bool EvaluateMemberPointer(const Expr *E, MemberPtr &Result,
EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isMemberPointerType());
return MemberPointerExprEvaluator(Info, Result).Visit(E);
}
bool MemberPointerExprEvaluator::VisitCastExpr(const CastExpr *E) {
switch (E->getCastKind()) {
default:
return ExprEvaluatorBaseTy::VisitCastExpr(E);
case CK_NullToMemberPointer:
VisitIgnoredValue(E->getSubExpr());
return ZeroInitialization(E);
case CK_BaseToDerivedMemberPointer: {
if (!Visit(E->getSubExpr()))
return false;
if (E->path_empty())
return true;
// Base-to-derived member pointer casts store the path in derived-to-base
// order, so iterate backwards. The CXXBaseSpecifier also provides us with
// the wrong end of the derived->base arc, so stagger the path by one class.
typedef std::reverse_iterator<CastExpr::path_const_iterator> ReverseIter;
for (ReverseIter PathI(E->path_end() - 1), PathE(E->path_begin());
PathI != PathE; ++PathI) {
assert(!(*PathI)->isVirtual() && "memptr cast through vbase");
const CXXRecordDecl *Derived = (*PathI)->getType()->getAsCXXRecordDecl();
if (!Result.castToDerived(Derived))
return Error(E);
}
const Type *FinalTy = E->getType()->castAs<MemberPointerType>()->getClass();
if (!Result.castToDerived(FinalTy->getAsCXXRecordDecl()))
return Error(E);
return true;
}
case CK_DerivedToBaseMemberPointer:
if (!Visit(E->getSubExpr()))
return false;
for (CastExpr::path_const_iterator PathI = E->path_begin(),
PathE = E->path_end(); PathI != PathE; ++PathI) {
assert(!(*PathI)->isVirtual() && "memptr cast through vbase");
const CXXRecordDecl *Base = (*PathI)->getType()->getAsCXXRecordDecl();
if (!Result.castToBase(Base))
return Error(E);
}
return true;
}
}
bool MemberPointerExprEvaluator::VisitUnaryAddrOf(const UnaryOperator *E) {
// C++11 [expr.unary.op]p3 has very strict rules on how the address of a
// member can be formed.
return Success(cast<DeclRefExpr>(E->getSubExpr())->getDecl());
}
//===----------------------------------------------------------------------===//
// Record Evaluation
//===----------------------------------------------------------------------===//
namespace {
class RecordExprEvaluator
: public ExprEvaluatorBase<RecordExprEvaluator, bool> {
const LValue &This;
APValue &Result;
public:
RecordExprEvaluator(EvalInfo &info, const LValue &This, APValue &Result)
: ExprEvaluatorBaseTy(info), This(This), Result(Result) {}
bool Success(const APValue &V, const Expr *E) {
Result = V;
return true;
}
bool ZeroInitialization(const Expr *E);
bool VisitCastExpr(const CastExpr *E);
bool VisitInitListExpr(const InitListExpr *E);
bool VisitCXXConstructExpr(const CXXConstructExpr *E);
bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E);
};
}
/// Perform zero-initialization on an object of non-union class type.
/// C++11 [dcl.init]p5:
/// To zero-initialize an object or reference of type T means:
/// [...]
/// -- if T is a (possibly cv-qualified) non-union class type,
/// each non-static data member and each base-class subobject is
/// zero-initialized
static bool HandleClassZeroInitialization(EvalInfo &Info, const Expr *E,
const RecordDecl *RD,
const LValue &This, APValue &Result) {
assert(!RD->isUnion() && "Expected non-union class type");
const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD);
Result = APValue(APValue::UninitStruct(), CD ? CD->getNumBases() : 0,
std::distance(RD->field_begin(), RD->field_end()));
if (RD->isInvalidDecl()) return false;
const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD);
if (CD) {
unsigned Index = 0;
for (CXXRecordDecl::base_class_const_iterator I = CD->bases_begin(),
End = CD->bases_end(); I != End; ++I, ++Index) {
const CXXRecordDecl *Base = I->getType()->getAsCXXRecordDecl();
LValue Subobject = This;
if (!HandleLValueDirectBase(Info, E, Subobject, CD, Base, &Layout))
return false;
if (!HandleClassZeroInitialization(Info, E, Base, Subobject,
Result.getStructBase(Index)))
return false;
}
}
for (RecordDecl::field_iterator I = RD->field_begin(), End = RD->field_end();
I != End; ++I) {
// -- if T is a reference type, no initialization is performed.
if (I->getType()->isReferenceType())
continue;
LValue Subobject = This;
if (!HandleLValueMember(Info, E, Subobject, *I, &Layout))
return false;
ImplicitValueInitExpr VIE(I->getType());
if (!EvaluateInPlace(
Result.getStructField(I->getFieldIndex()), Info, Subobject, &VIE))
return false;
}
return true;
}
bool RecordExprEvaluator::ZeroInitialization(const Expr *E) {
const RecordDecl *RD = E->getType()->castAs<RecordType>()->getDecl();
if (RD->isInvalidDecl()) return false;
if (RD->isUnion()) {
// C++11 [dcl.init]p5: If T is a (possibly cv-qualified) union type, the
// object's first non-static named data member is zero-initialized
RecordDecl::field_iterator I = RD->field_begin();
if (I == RD->field_end()) {
Result = APValue((const FieldDecl*)0);
return true;
}
LValue Subobject = This;
if (!HandleLValueMember(Info, E, Subobject, *I))
return false;
Result = APValue(*I);
ImplicitValueInitExpr VIE(I->getType());
return EvaluateInPlace(Result.getUnionValue(), Info, Subobject, &VIE);
}
if (isa<CXXRecordDecl>(RD) && cast<CXXRecordDecl>(RD)->getNumVBases()) {
Info.Diag(E, diag::note_constexpr_virtual_base) << RD;
return false;
}
return HandleClassZeroInitialization(Info, E, RD, This, Result);
}
bool RecordExprEvaluator::VisitCastExpr(const CastExpr *E) {
switch (E->getCastKind()) {
default:
return ExprEvaluatorBaseTy::VisitCastExpr(E);
case CK_ConstructorConversion:
return Visit(E->getSubExpr());
case CK_DerivedToBase:
case CK_UncheckedDerivedToBase: {
APValue DerivedObject;
if (!Evaluate(DerivedObject, Info, E->getSubExpr()))
return false;
if (!DerivedObject.isStruct())
return Error(E->getSubExpr());
// Derived-to-base rvalue conversion: just slice off the derived part.
APValue *Value = &DerivedObject;
const CXXRecordDecl *RD = E->getSubExpr()->getType()->getAsCXXRecordDecl();
for (CastExpr::path_const_iterator PathI = E->path_begin(),
PathE = E->path_end(); PathI != PathE; ++PathI) {
assert(!(*PathI)->isVirtual() && "record rvalue with virtual base");
const CXXRecordDecl *Base = (*PathI)->getType()->getAsCXXRecordDecl();
Value = &Value->getStructBase(getBaseIndex(RD, Base));
RD = Base;
}
Result = *Value;
return true;
}
}
}
bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) {
const RecordDecl *RD = E->getType()->castAs<RecordType>()->getDecl();
if (RD->isInvalidDecl()) return false;
const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD);
if (RD->isUnion()) {
const FieldDecl *Field = E->getInitializedFieldInUnion();
Result = APValue(Field);
if (!Field)
return true;
// If the initializer list for a union does not contain any elements, the
// first element of the union is value-initialized.
// FIXME: The element should be initialized from an initializer list.
// Is this difference ever observable for initializer lists which
// we don't build?
ImplicitValueInitExpr VIE(Field->getType());
const Expr *InitExpr = E->getNumInits() ? E->getInit(0) : &VIE;
LValue Subobject = This;
if (!HandleLValueMember(Info, InitExpr, Subobject, Field, &Layout))
return false;
// Temporarily override This, in case there's a CXXDefaultInitExpr in here.
ThisOverrideRAII ThisOverride(*Info.CurrentCall, &This,
isa<CXXDefaultInitExpr>(InitExpr));
return EvaluateInPlace(Result.getUnionValue(), Info, Subobject, InitExpr);
}
assert((!isa<CXXRecordDecl>(RD) || !cast<CXXRecordDecl>(RD)->getNumBases()) &&
"initializer list for class with base classes");
Result = APValue(APValue::UninitStruct(), 0,
std::distance(RD->field_begin(), RD->field_end()));
unsigned ElementNo = 0;
bool Success = true;
for (RecordDecl::field_iterator Field = RD->field_begin(),
FieldEnd = RD->field_end(); Field != FieldEnd; ++Field) {
// Anonymous bit-fields are not considered members of the class for
// purposes of aggregate initialization.
if (Field->isUnnamedBitfield())
continue;
LValue Subobject = This;
bool HaveInit = ElementNo < E->getNumInits();
// FIXME: Diagnostics here should point to the end of the initializer
// list, not the start.
if (!HandleLValueMember(Info, HaveInit ? E->getInit(ElementNo) : E,
Subobject, *Field, &Layout))
return false;
// Perform an implicit value-initialization for members beyond the end of
// the initializer list.
ImplicitValueInitExpr VIE(HaveInit ? Info.Ctx.IntTy : Field->getType());
const Expr *Init = HaveInit ? E->getInit(ElementNo++) : &VIE;
// Temporarily override This, in case there's a CXXDefaultInitExpr in here.
ThisOverrideRAII ThisOverride(*Info.CurrentCall, &This,
isa<CXXDefaultInitExpr>(Init));
APValue &FieldVal = Result.getStructField(Field->getFieldIndex());
if (!EvaluateInPlace(FieldVal, Info, Subobject, Init) ||
(Field->isBitField() && !truncateBitfieldValue(Info, Init,
FieldVal, *Field))) {
if (!Info.keepEvaluatingAfterFailure())
return false;
Success = false;
}
}
return Success;
}
bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) {
const CXXConstructorDecl *FD = E->getConstructor();
if (FD->isInvalidDecl() || FD->getParent()->isInvalidDecl()) return false;
bool ZeroInit = E->requiresZeroInitialization();
if (CheckTrivialDefaultConstructor(Info, E->getExprLoc(), FD, ZeroInit)) {
// If we've already performed zero-initialization, we're already done.
if (!Result.isUninit())
return true;
- if (ZeroInit)
- return ZeroInitialization(E);
-
- const CXXRecordDecl *RD = FD->getParent();
- if (RD->isUnion())
- Result = APValue((FieldDecl*)0);
- else
- Result = APValue(APValue::UninitStruct(), RD->getNumBases(),
- std::distance(RD->field_begin(), RD->field_end()));
- return true;
+ // We can get here in two different ways:
+ // 1) We're performing value-initialization, and should zero-initialize
+ // the object, or
+ // 2) We're performing default-initialization of an object with a trivial
+ // constexpr default constructor, in which case we should start the
+ // lifetimes of all the base subobjects (there can be no data member
+ // subobjects in this case) per [basic.life]p1.
+ // Either way, ZeroInitialization is appropriate.
+ return ZeroInitialization(E);
}
const FunctionDecl *Definition = 0;
FD->getBody(Definition);
if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition))
return false;
// Avoid materializing a temporary for an elidable copy/move constructor.
if (E->isElidable() && !ZeroInit)
if (const MaterializeTemporaryExpr *ME
= dyn_cast<MaterializeTemporaryExpr>(E->getArg(0)))
return Visit(ME->GetTemporaryExpr());
if (ZeroInit && !ZeroInitialization(E))
return false;
ArrayRef<const Expr *> Args(E->getArgs(), E->getNumArgs());
return HandleConstructorCall(E->getExprLoc(), This, Args,
cast<CXXConstructorDecl>(Definition), Info,
Result);
}
bool RecordExprEvaluator::VisitCXXStdInitializerListExpr(
const CXXStdInitializerListExpr *E) {
const ConstantArrayType *ArrayType =
Info.Ctx.getAsConstantArrayType(E->getSubExpr()->getType());
LValue Array;
if (!EvaluateLValue(E->getSubExpr(), Array, Info))
return false;
// Get a pointer to the first element of the array.
Array.addArray(Info, E, ArrayType);
// FIXME: Perform the checks on the field types in SemaInit.
RecordDecl *Record = E->getType()->castAs<RecordType>()->getDecl();
RecordDecl::field_iterator Field = Record->field_begin();
if (Field == Record->field_end())
return Error(E);
// Start pointer.
if (!Field->getType()->isPointerType() ||
!Info.Ctx.hasSameType(Field->getType()->getPointeeType(),
ArrayType->getElementType()))
return Error(E);
// FIXME: What if the initializer_list type has base classes, etc?
Result = APValue(APValue::UninitStruct(), 0, 2);
Array.moveInto(Result.getStructField(0));
if (++Field == Record->field_end())
return Error(E);
if (Field->getType()->isPointerType() &&
Info.Ctx.hasSameType(Field->getType()->getPointeeType(),
ArrayType->getElementType())) {
// End pointer.
if (!HandleLValueArrayAdjustment(Info, E, Array,
ArrayType->getElementType(),
ArrayType->getSize().getZExtValue()))
return false;
Array.moveInto(Result.getStructField(1));
} else if (Info.Ctx.hasSameType(Field->getType(), Info.Ctx.getSizeType()))
// Length.
Result.getStructField(1) = APValue(APSInt(ArrayType->getSize()));
else
return Error(E);
if (++Field != Record->field_end())
return Error(E);
return true;
}
static bool EvaluateRecord(const Expr *E, const LValue &This,
APValue &Result, EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isRecordType() &&
"can't evaluate expression as a record rvalue");
return RecordExprEvaluator(Info, This, Result).Visit(E);
}
//===----------------------------------------------------------------------===//
// Temporary Evaluation
//
// Temporaries are represented in the AST as rvalues, but generally behave like
// lvalues. The full-object of which the temporary is a subobject is implicitly
// materialized so that a reference can bind to it.
//===----------------------------------------------------------------------===//
namespace {
class TemporaryExprEvaluator
: public LValueExprEvaluatorBase<TemporaryExprEvaluator> {
public:
TemporaryExprEvaluator(EvalInfo &Info, LValue &Result) :
LValueExprEvaluatorBaseTy(Info, Result) {}
/// Visit an expression which constructs the value of this temporary.
bool VisitConstructExpr(const Expr *E) {
Result.set(E, Info.CurrentCall->Index);
return EvaluateInPlace(Info.CurrentCall->createTemporary(E, false),
Info, Result, E);
}
bool VisitCastExpr(const CastExpr *E) {
switch (E->getCastKind()) {
default:
return LValueExprEvaluatorBaseTy::VisitCastExpr(E);
case CK_ConstructorConversion:
return VisitConstructExpr(E->getSubExpr());
}
}
bool VisitInitListExpr(const InitListExpr *E) {
return VisitConstructExpr(E);
}
bool VisitCXXConstructExpr(const CXXConstructExpr *E) {
return VisitConstructExpr(E);
}
bool VisitCallExpr(const CallExpr *E) {
return VisitConstructExpr(E);
}
};
} // end anonymous namespace
/// Evaluate an expression of record type as a temporary.
static bool EvaluateTemporary(const Expr *E, LValue &Result, EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isRecordType());
return TemporaryExprEvaluator(Info, Result).Visit(E);
}
//===----------------------------------------------------------------------===//
// Vector Evaluation
//===----------------------------------------------------------------------===//
namespace {
class VectorExprEvaluator
: public ExprEvaluatorBase<VectorExprEvaluator, bool> {
APValue &Result;
public:
VectorExprEvaluator(EvalInfo &info, APValue &Result)
: ExprEvaluatorBaseTy(info), Result(Result) {}
bool Success(const ArrayRef<APValue> &V, const Expr *E) {
assert(V.size() == E->getType()->castAs<VectorType>()->getNumElements());
// FIXME: remove this APValue copy.
Result = APValue(V.data(), V.size());
return true;
}
bool Success(const APValue &V, const Expr *E) {
assert(V.isVector());
Result = V;
return true;
}
bool ZeroInitialization(const Expr *E);
bool VisitUnaryReal(const UnaryOperator *E)
{ return Visit(E->getSubExpr()); }
bool VisitCastExpr(const CastExpr* E);
bool VisitInitListExpr(const InitListExpr *E);
bool VisitUnaryImag(const UnaryOperator *E);
// FIXME: Missing: unary -, unary ~, binary add/sub/mul/div,
// binary comparisons, binary and/or/xor,
// shufflevector, ExtVectorElementExpr
};
} // end anonymous namespace
static bool EvaluateVector(const Expr* E, APValue& Result, EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isVectorType() &&"not a vector rvalue");
return VectorExprEvaluator(Info, Result).Visit(E);
}
bool VectorExprEvaluator::VisitCastExpr(const CastExpr* E) {
const VectorType *VTy = E->getType()->castAs<VectorType>();
unsigned NElts = VTy->getNumElements();
const Expr *SE = E->getSubExpr();
QualType SETy = SE->getType();
switch (E->getCastKind()) {
case CK_VectorSplat: {
APValue Val = APValue();
if (SETy->isIntegerType()) {
APSInt IntResult;
if (!EvaluateInteger(SE, IntResult, Info))
return false;
Val = APValue(IntResult);
} else if (SETy->isRealFloatingType()) {
APFloat F(0.0);
if (!EvaluateFloat(SE, F, Info))
return false;
Val = APValue(F);
} else {
return Error(E);
}
// Splat and create vector APValue.
SmallVector<APValue, 4> Elts(NElts, Val);
return Success(Elts, E);
}
case CK_BitCast: {
// Evaluate the operand into an APInt we can extract from.
llvm::APInt SValInt;
if (!EvalAndBitcastToAPInt(Info, SE, SValInt))
return false;
// Extract the elements
QualType EltTy = VTy->getElementType();
unsigned EltSize = Info.Ctx.getTypeSize(EltTy);
bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian();
SmallVector<APValue, 4> Elts;
if (EltTy->isRealFloatingType()) {
const llvm::fltSemantics &Sem = Info.Ctx.getFloatTypeSemantics(EltTy);
unsigned FloatEltSize = EltSize;
if (&Sem == &APFloat::x87DoubleExtended)
FloatEltSize = 80;
for (unsigned i = 0; i < NElts; i++) {
llvm::APInt Elt;
if (BigEndian)
Elt = SValInt.rotl(i*EltSize+FloatEltSize).trunc(FloatEltSize);
else
Elt = SValInt.rotr(i*EltSize).trunc(FloatEltSize);
Elts.push_back(APValue(APFloat(Sem, Elt)));
}
} else if (EltTy->isIntegerType()) {
for (unsigned i = 0; i < NElts; i++) {
llvm::APInt Elt;
if (BigEndian)
Elt = SValInt.rotl(i*EltSize+EltSize).zextOrTrunc(EltSize);
else
Elt = SValInt.rotr(i*EltSize).zextOrTrunc(EltSize);
Elts.push_back(APValue(APSInt(Elt, EltTy->isSignedIntegerType())));
}
} else {
return Error(E);
}
return Success(Elts, E);
}
default:
return ExprEvaluatorBaseTy::VisitCastExpr(E);
}
}
bool
VectorExprEvaluator::VisitInitListExpr(const InitListExpr *E) {
const VectorType *VT = E->getType()->castAs<VectorType>();
unsigned NumInits = E->getNumInits();
unsigned NumElements = VT->getNumElements();
QualType EltTy = VT->getElementType();
SmallVector<APValue, 4> Elements;
// The number of initializers can be less than the number of
// vector elements. For OpenCL, this can be due to nested vector
// initialization. For GCC compatibility, missing trailing elements
// should be initialized with zeroes.
unsigned CountInits = 0, CountElts = 0;
while (CountElts < NumElements) {
// Handle nested vector initialization.
if (CountInits < NumInits
&& E->getInit(CountInits)->getType()->isVectorType()) {
APValue v;
if (!EvaluateVector(E->getInit(CountInits), v, Info))
return Error(E);
unsigned vlen = v.getVectorLength();
for (unsigned j = 0; j < vlen; j++)
Elements.push_back(v.getVectorElt(j));
CountElts += vlen;
} else if (EltTy->isIntegerType()) {
llvm::APSInt sInt(32);
if (CountInits < NumInits) {
if (!EvaluateInteger(E->getInit(CountInits), sInt, Info))
return false;
} else // trailing integer zero.
sInt = Info.Ctx.MakeIntValue(0, EltTy);
Elements.push_back(APValue(sInt));
CountElts++;
} else {
llvm::APFloat f(0.0);
if (CountInits < NumInits) {
if (!EvaluateFloat(E->getInit(CountInits), f, Info))
return false;
} else // trailing float zero.
f = APFloat::getZero(Info.Ctx.getFloatTypeSemantics(EltTy));
Elements.push_back(APValue(f));
CountElts++;
}
CountInits++;
}
return Success(Elements, E);
}
bool
VectorExprEvaluator::ZeroInitialization(const Expr *E) {
const VectorType *VT = E->getType()->getAs<VectorType>();
QualType EltTy = VT->getElementType();
APValue ZeroElement;
if (EltTy->isIntegerType())
ZeroElement = APValue(Info.Ctx.MakeIntValue(0, EltTy));
else
ZeroElement =
APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(EltTy)));
SmallVector<APValue, 4> Elements(VT->getNumElements(), ZeroElement);
return Success(Elements, E);
}
bool VectorExprEvaluator::VisitUnaryImag(const UnaryOperator *E) {
VisitIgnoredValue(E->getSubExpr());
return ZeroInitialization(E);
}
//===----------------------------------------------------------------------===//
// Array Evaluation
//===----------------------------------------------------------------------===//
namespace {
class ArrayExprEvaluator
: public ExprEvaluatorBase<ArrayExprEvaluator, bool> {
const LValue &This;
APValue &Result;
public:
ArrayExprEvaluator(EvalInfo &Info, const LValue &This, APValue &Result)
: ExprEvaluatorBaseTy(Info), This(This), Result(Result) {}
bool Success(const APValue &V, const Expr *E) {
assert((V.isArray() || V.isLValue()) &&
"expected array or string literal");
Result = V;
return true;
}
bool ZeroInitialization(const Expr *E) {
const ConstantArrayType *CAT =
Info.Ctx.getAsConstantArrayType(E->getType());
if (!CAT)
return Error(E);
Result = APValue(APValue::UninitArray(), 0,
CAT->getSize().getZExtValue());
if (!Result.hasArrayFiller()) return true;
// Zero-initialize all elements.
LValue Subobject = This;
Subobject.addArray(Info, E, CAT);
ImplicitValueInitExpr VIE(CAT->getElementType());
return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject, &VIE);
}
bool VisitInitListExpr(const InitListExpr *E);
bool VisitCXXConstructExpr(const CXXConstructExpr *E);
bool VisitCXXConstructExpr(const CXXConstructExpr *E,
const LValue &Subobject,
APValue *Value, QualType Type);
};
} // end anonymous namespace
static bool EvaluateArray(const Expr *E, const LValue &This,
APValue &Result, EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isArrayType() && "not an array rvalue");
return ArrayExprEvaluator(Info, This, Result).Visit(E);
}
bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E) {
const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(E->getType());
if (!CAT)
return Error(E);
// C++11 [dcl.init.string]p1: A char array [...] can be initialized by [...]
// an appropriately-typed string literal enclosed in braces.
if (E->isStringLiteralInit()) {
LValue LV;
if (!EvaluateLValue(E->getInit(0), LV, Info))
return false;
APValue Val;
LV.moveInto(Val);
return Success(Val, E);
}
bool Success = true;
assert((!Result.isArray() || Result.getArrayInitializedElts() == 0) &&
"zero-initialized array shouldn't have any initialized elts");
APValue Filler;
if (Result.isArray() && Result.hasArrayFiller())
Filler = Result.getArrayFiller();
unsigned NumEltsToInit = E->getNumInits();
unsigned NumElts = CAT->getSize().getZExtValue();
const Expr *FillerExpr = E->hasArrayFiller() ? E->getArrayFiller() : 0;
// If the initializer might depend on the array index, run it for each
// array element. For now, just whitelist non-class value-initialization.
if (NumEltsToInit != NumElts && !isa<ImplicitValueInitExpr>(FillerExpr))
NumEltsToInit = NumElts;
Result = APValue(APValue::UninitArray(), NumEltsToInit, NumElts);
// If the array was previously zero-initialized, preserve the
// zero-initialized values.
if (!Filler.isUninit()) {
for (unsigned I = 0, E = Result.getArrayInitializedElts(); I != E; ++I)
Result.getArrayInitializedElt(I) = Filler;
if (Result.hasArrayFiller())
Result.getArrayFiller() = Filler;
}
LValue Subobject = This;
Subobject.addArray(Info, E, CAT);
for (unsigned Index = 0; Index != NumEltsToInit; ++Index) {
const Expr *Init =
Index < E->getNumInits() ? E->getInit(Index) : FillerExpr;
if (!EvaluateInPlace(Result.getArrayInitializedElt(Index),
Info, Subobject, Init) ||
!HandleLValueArrayAdjustment(Info, Init, Subobject,
CAT->getElementType(), 1)) {
if (!Info.keepEvaluatingAfterFailure())
return false;
Success = false;
}
}
if (!Result.hasArrayFiller())
return Success;
// If we get here, we have a trivial filler, which we can just evaluate
// once and splat over the rest of the array elements.
assert(FillerExpr && "no array filler for incomplete init list");
return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject,
FillerExpr) && Success;
}
bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) {
return VisitCXXConstructExpr(E, This, &Result, E->getType());
}
bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E,
const LValue &Subobject,
APValue *Value,
QualType Type) {
bool HadZeroInit = !Value->isUninit();
if (const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(Type)) {
unsigned N = CAT->getSize().getZExtValue();
// Preserve the array filler if we had prior zero-initialization.
APValue Filler =
HadZeroInit && Value->hasArrayFiller() ? Value->getArrayFiller()
: APValue();
*Value = APValue(APValue::UninitArray(), N, N);
if (HadZeroInit)
for (unsigned I = 0; I != N; ++I)
Value->getArrayInitializedElt(I) = Filler;
// Initialize the elements.
LValue ArrayElt = Subobject;
ArrayElt.addArray(Info, E, CAT);
for (unsigned I = 0; I != N; ++I)
if (!VisitCXXConstructExpr(E, ArrayElt, &Value->getArrayInitializedElt(I),
CAT->getElementType()) ||
!HandleLValueArrayAdjustment(Info, E, ArrayElt,
CAT->getElementType(), 1))
return false;
return true;
}
if (!Type->isRecordType())
return Error(E);
const CXXConstructorDecl *FD = E->getConstructor();
bool ZeroInit = E->requiresZeroInitialization();
if (CheckTrivialDefaultConstructor(Info, E->getExprLoc(), FD, ZeroInit)) {
if (HadZeroInit)
return true;
- if (ZeroInit) {
- ImplicitValueInitExpr VIE(Type);
- return EvaluateInPlace(*Value, Info, Subobject, &VIE);
- }
-
- const CXXRecordDecl *RD = FD->getParent();
- if (RD->isUnion())
- *Value = APValue((FieldDecl*)0);
- else
- *Value =
- APValue(APValue::UninitStruct(), RD->getNumBases(),
- std::distance(RD->field_begin(), RD->field_end()));
- return true;
+ // See RecordExprEvaluator::VisitCXXConstructExpr for explanation.
+ ImplicitValueInitExpr VIE(Type);
+ return EvaluateInPlace(*Value, Info, Subobject, &VIE);
}
const FunctionDecl *Definition = 0;
FD->getBody(Definition);
if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition))
return false;
if (ZeroInit && !HadZeroInit) {
ImplicitValueInitExpr VIE(Type);
if (!EvaluateInPlace(*Value, Info, Subobject, &VIE))
return false;
}
ArrayRef<const Expr *> Args(E->getArgs(), E->getNumArgs());
return HandleConstructorCall(E->getExprLoc(), Subobject, Args,
cast<CXXConstructorDecl>(Definition),
Info, *Value);
}
//===----------------------------------------------------------------------===//
// Integer Evaluation
//
// As a GNU extension, we support casting pointers to sufficiently-wide integer
// types and back in constant folding. Integer values are thus represented
// either as an integer-valued APValue, or as an lvalue-valued APValue.
//===----------------------------------------------------------------------===//
namespace {
class IntExprEvaluator
: public ExprEvaluatorBase<IntExprEvaluator, bool> {
APValue &Result;
public:
IntExprEvaluator(EvalInfo &info, APValue &result)
: ExprEvaluatorBaseTy(info), Result(result) {}
bool Success(const llvm::APSInt &SI, const Expr *E, APValue &Result) {
assert(E->getType()->isIntegralOrEnumerationType() &&
"Invalid evaluation result.");
assert(SI.isSigned() == E->getType()->isSignedIntegerOrEnumerationType() &&
"Invalid evaluation result.");
assert(SI.getBitWidth() == Info.Ctx.getIntWidth(E->getType()) &&
"Invalid evaluation result.");
Result = APValue(SI);
return true;
}
bool Success(const llvm::APSInt &SI, const Expr *E) {
return Success(SI, E, Result);
}
bool Success(const llvm::APInt &I, const Expr *E, APValue &Result) {
assert(E->getType()->isIntegralOrEnumerationType() &&
"Invalid evaluation result.");
assert(I.getBitWidth() == Info.Ctx.getIntWidth(E->getType()) &&
"Invalid evaluation result.");
Result = APValue(APSInt(I));
Result.getInt().setIsUnsigned(
E->getType()->isUnsignedIntegerOrEnumerationType());
return true;
}
bool Success(const llvm::APInt &I, const Expr *E) {
return Success(I, E, Result);
}
bool Success(uint64_t Value, const Expr *E, APValue &Result) {
assert(E->getType()->isIntegralOrEnumerationType() &&
"Invalid evaluation result.");
Result = APValue(Info.Ctx.MakeIntValue(Value, E->getType()));
return true;
}
bool Success(uint64_t Value, const Expr *E) {
return Success(Value, E, Result);
}
bool Success(CharUnits Size, const Expr *E) {
return Success(Size.getQuantity(), E);
}
bool Success(const APValue &V, const Expr *E) {
if (V.isLValue() || V.isAddrLabelDiff()) {
Result = V;
return true;
}
return Success(V.getInt(), E);
}
bool ZeroInitialization(const Expr *E) { return Success(0, E); }
//===--------------------------------------------------------------------===//
// Visitor Methods
//===--------------------------------------------------------------------===//
bool VisitIntegerLiteral(const IntegerLiteral *E) {
return Success(E->getValue(), E);
}
bool VisitCharacterLiteral(const CharacterLiteral *E) {
return Success(E->getValue(), E);
}
bool CheckReferencedDecl(const Expr *E, const Decl *D);
bool VisitDeclRefExpr(const DeclRefExpr *E) {
if (CheckReferencedDecl(E, E->getDecl()))
return true;
return ExprEvaluatorBaseTy::VisitDeclRefExpr(E);
}
bool VisitMemberExpr(const MemberExpr *E) {
if (CheckReferencedDecl(E, E->getMemberDecl())) {
VisitIgnoredValue(E->getBase());
return true;
}
return ExprEvaluatorBaseTy::VisitMemberExpr(E);
}
bool VisitCallExpr(const CallExpr *E);
bool VisitBinaryOperator(const BinaryOperator *E);
bool VisitOffsetOfExpr(const OffsetOfExpr *E);
bool VisitUnaryOperator(const UnaryOperator *E);
bool VisitCastExpr(const CastExpr* E);
bool VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *E);
bool VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *E) {
return Success(E->getValue(), E);
}
bool VisitObjCBoolLiteralExpr(const ObjCBoolLiteralExpr *E) {
return Success(E->getValue(), E);
}
// Note, GNU defines __null as an integer, not a pointer.
bool VisitGNUNullExpr(const GNUNullExpr *E) {
return ZeroInitialization(E);
}
bool VisitUnaryTypeTraitExpr(const UnaryTypeTraitExpr *E) {
return Success(E->getValue(), E);
}
bool VisitBinaryTypeTraitExpr(const BinaryTypeTraitExpr *E) {
return Success(E->getValue(), E);
}
bool VisitTypeTraitExpr(const TypeTraitExpr *E) {
return Success(E->getValue(), E);
}
bool VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *E) {
return Success(E->getValue(), E);
}
bool VisitExpressionTraitExpr(const ExpressionTraitExpr *E) {
return Success(E->getValue(), E);
}
bool VisitUnaryReal(const UnaryOperator *E);
bool VisitUnaryImag(const UnaryOperator *E);
bool VisitCXXNoexceptExpr(const CXXNoexceptExpr *E);
bool VisitSizeOfPackExpr(const SizeOfPackExpr *E);
private:
CharUnits GetAlignOfExpr(const Expr *E);
CharUnits GetAlignOfType(QualType T);
static QualType GetObjectType(APValue::LValueBase B);
bool TryEvaluateBuiltinObjectSize(const CallExpr *E);
// FIXME: Missing: array subscript of vector, member of vector
};
} // end anonymous namespace
/// EvaluateIntegerOrLValue - Evaluate an rvalue integral-typed expression, and
/// produce either the integer value or a pointer.
///
/// GCC has a heinous extension which folds casts between pointer types and
/// pointer-sized integral types. We support this by allowing the evaluation of
/// an integer rvalue to produce a pointer (represented as an lvalue) instead.
/// Some simple arithmetic on such values is supported (they are treated much
/// like char*).
static bool EvaluateIntegerOrLValue(const Expr *E, APValue &Result,
EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isIntegralOrEnumerationType());
return IntExprEvaluator(Info, Result).Visit(E);
}
static bool EvaluateInteger(const Expr *E, APSInt &Result, EvalInfo &Info) {
APValue Val;
if (!EvaluateIntegerOrLValue(E, Val, Info))
return false;
if (!Val.isInt()) {
// FIXME: It would be better to produce the diagnostic for casting
// a pointer to an integer.
Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}
Result = Val.getInt();
return true;
}
/// Check whether the given declaration can be directly converted to an integral
/// rvalue. If not, no diagnostic is produced; there are other things we can
/// try.
bool IntExprEvaluator::CheckReferencedDecl(const Expr* E, const Decl* D) {
// Enums are integer constant exprs.
if (const EnumConstantDecl *ECD = dyn_cast<EnumConstantDecl>(D)) {
// Check for signedness/width mismatches between E type and ECD value.
bool SameSign = (ECD->getInitVal().isSigned()
== E->getType()->isSignedIntegerOrEnumerationType());
bool SameWidth = (ECD->getInitVal().getBitWidth()
== Info.Ctx.getIntWidth(E->getType()));
if (SameSign && SameWidth)
return Success(ECD->getInitVal(), E);
else {
// Get rid of mismatch (otherwise Success assertions will fail)
// by computing a new value matching the type of E.
llvm::APSInt Val = ECD->getInitVal();
if (!SameSign)
Val.setIsSigned(!ECD->getInitVal().isSigned());
if (!SameWidth)
Val = Val.extOrTrunc(Info.Ctx.getIntWidth(E->getType()));
return Success(Val, E);
}
}
return false;
}
/// EvaluateBuiltinClassifyType - Evaluate __builtin_classify_type the same way
/// as GCC.
static int EvaluateBuiltinClassifyType(const CallExpr *E) {
// The following enum mimics the values returned by GCC.
// FIXME: Does GCC differ between lvalue and rvalue references here?
enum gcc_type_class {
no_type_class = -1,
void_type_class, integer_type_class, char_type_class,
enumeral_type_class, boolean_type_class,
pointer_type_class, reference_type_class, offset_type_class,
real_type_class, complex_type_class,
function_type_class, method_type_class,
record_type_class, union_type_class,
array_type_class, string_type_class,
lang_type_class
};
// If no argument was supplied, default to "no_type_class". This isn't
// ideal, however it is what gcc does.
if (E->getNumArgs() == 0)
return no_type_class;
QualType ArgTy = E->getArg(0)->getType();
if (ArgTy->isVoidType())
return void_type_class;
else if (ArgTy->isEnumeralType())
return enumeral_type_class;
else if (ArgTy->isBooleanType())
return boolean_type_class;
else if (ArgTy->isCharType())
return string_type_class; // gcc doesn't appear to use char_type_class
else if (ArgTy->isIntegerType())
return integer_type_class;
else if (ArgTy->isPointerType())
return pointer_type_class;
else if (ArgTy->isReferenceType())
return reference_type_class;
else if (ArgTy->isRealType())
return real_type_class;
else if (ArgTy->isComplexType())
return complex_type_class;
else if (ArgTy->isFunctionType())
return function_type_class;
else if (ArgTy->isStructureOrClassType())
return record_type_class;
else if (ArgTy->isUnionType())
return union_type_class;
else if (ArgTy->isArrayType())
return array_type_class;
else if (ArgTy->isUnionType())
return union_type_class;
else // FIXME: offset_type_class, method_type_class, & lang_type_class?
llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type");
}
/// EvaluateBuiltinConstantPForLValue - Determine the result of
/// __builtin_constant_p when applied to the given lvalue.
///
/// An lvalue is only "constant" if it is a pointer or reference to the first
/// character of a string literal.
template<typename LValue>
static bool EvaluateBuiltinConstantPForLValue(const LValue &LV) {
const Expr *E = LV.getLValueBase().template dyn_cast<const Expr*>();
return E && isa<StringLiteral>(E) && LV.getLValueOffset().isZero();
}
/// EvaluateBuiltinConstantP - Evaluate __builtin_constant_p as similarly to
/// GCC as we can manage.
static bool EvaluateBuiltinConstantP(ASTContext &Ctx, const Expr *Arg) {
QualType ArgType = Arg->getType();
// __builtin_constant_p always has one operand. The rules which gcc follows
// are not precisely documented, but are as follows:
//
// - If the operand is of integral, floating, complex or enumeration type,
// and can be folded to a known value of that type, it returns 1.
// - If the operand and can be folded to a pointer to the first character
// of a string literal (or such a pointer cast to an integral type), it
// returns 1.
//
// Otherwise, it returns 0.
//
// FIXME: GCC also intends to return 1 for literals of aggregate types, but
// its support for this does not currently work.
if (ArgType->isIntegralOrEnumerationType()) {
Expr::EvalResult Result;
if (!Arg->EvaluateAsRValue(Result, Ctx) || Result.HasSideEffects)
return false;
APValue &V = Result.Val;
if (V.getKind() == APValue::Int)
return true;
return EvaluateBuiltinConstantPForLValue(V);
} else if (ArgType->isFloatingType() || ArgType->isAnyComplexType()) {
return Arg->isEvaluatable(Ctx);
} else if (ArgType->isPointerType() || Arg->isGLValue()) {
LValue LV;
Expr::EvalStatus Status;
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
if ((Arg->isGLValue() ? EvaluateLValue(Arg, LV, Info)
: EvaluatePointer(Arg, LV, Info)) &&
!Status.HasSideEffects)
return EvaluateBuiltinConstantPForLValue(LV);
}
// Anything else isn't considered to be sufficiently constant.
return false;
}
/// Retrieves the "underlying object type" of the given expression,
/// as used by __builtin_object_size.
QualType IntExprEvaluator::GetObjectType(APValue::LValueBase B) {
if (const ValueDecl *D = B.dyn_cast<const ValueDecl*>()) {
if (const VarDecl *VD = dyn_cast<VarDecl>(D))
return VD->getType();
} else if (const Expr *E = B.get<const Expr*>()) {
if (isa<CompoundLiteralExpr>(E))
return E->getType();
}
return QualType();
}
bool IntExprEvaluator::TryEvaluateBuiltinObjectSize(const CallExpr *E) {
LValue Base;
{
// The operand of __builtin_object_size is never evaluated for side-effects.
// If there are any, but we can determine the pointed-to object anyway, then
// ignore the side-effects.
SpeculativeEvaluationRAII SpeculativeEval(Info);
if (!EvaluatePointer(E->getArg(0), Base, Info))
return false;
}
// If we can prove the base is null, lower to zero now.
if (!Base.getLValueBase()) return Success(0, E);
QualType T = GetObjectType(Base.getLValueBase());
if (T.isNull() ||
T->isIncompleteType() ||
T->isFunctionType() ||
T->isVariablyModifiedType() ||
T->isDependentType())
return Error(E);
CharUnits Size = Info.Ctx.getTypeSizeInChars(T);
CharUnits Offset = Base.getLValueOffset();
if (!Offset.isNegative() && Offset <= Size)
Size -= Offset;
else
Size = CharUnits::Zero();
return Success(Size, E);
}
bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) {
switch (unsigned BuiltinOp = E->isBuiltinCall()) {
default:
return ExprEvaluatorBaseTy::VisitCallExpr(E);
case Builtin::BI__builtin_object_size: {
if (TryEvaluateBuiltinObjectSize(E))
return true;
// If evaluating the argument has side-effects, we can't determine the size
// of the object, and so we lower it to unknown now. CodeGen relies on us to
// handle all cases where the expression has side-effects.
if (E->getArg(0)->HasSideEffects(Info.Ctx)) {
if (E->getArg(1)->EvaluateKnownConstInt(Info.Ctx).getZExtValue() <= 1)
return Success(-1ULL, E);
return Success(0, E);
}
// Expression had no side effects, but we couldn't statically determine the
// size of the referenced object.
return Error(E);
}
case Builtin::BI__builtin_bswap16:
case Builtin::BI__builtin_bswap32:
case Builtin::BI__builtin_bswap64: {
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
return Success(Val.byteSwap(), E);
}
case Builtin::BI__builtin_classify_type:
return Success(EvaluateBuiltinClassifyType(E), E);
// FIXME: BI__builtin_clrsb
// FIXME: BI__builtin_clrsbl
// FIXME: BI__builtin_clrsbll
case Builtin::BI__builtin_clz:
case Builtin::BI__builtin_clzl:
case Builtin::BI__builtin_clzll: {
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
if (!Val)
return Error(E);
return Success(Val.countLeadingZeros(), E);
}
case Builtin::BI__builtin_constant_p:
return Success(EvaluateBuiltinConstantP(Info.Ctx, E->getArg(0)), E);
case Builtin::BI__builtin_ctz:
case Builtin::BI__builtin_ctzl:
case Builtin::BI__builtin_ctzll: {
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
if (!Val)
return Error(E);
return Success(Val.countTrailingZeros(), E);
}
case Builtin::BI__builtin_eh_return_data_regno: {
int Operand = E->getArg(0)->EvaluateKnownConstInt(Info.Ctx).getZExtValue();
Operand = Info.Ctx.getTargetInfo().getEHDataRegisterNumber(Operand);
return Success(Operand, E);
}
case Builtin::BI__builtin_expect:
return Visit(E->getArg(0));
case Builtin::BI__builtin_ffs:
case Builtin::BI__builtin_ffsl:
case Builtin::BI__builtin_ffsll: {
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
unsigned N = Val.countTrailingZeros();
return Success(N == Val.getBitWidth() ? 0 : N + 1, E);
}
case Builtin::BI__builtin_fpclassify: {
APFloat Val(0.0);
if (!EvaluateFloat(E->getArg(5), Val, Info))
return false;
unsigned Arg;
switch (Val.getCategory()) {
case APFloat::fcNaN: Arg = 0; break;
case APFloat::fcInfinity: Arg = 1; break;
case APFloat::fcNormal: Arg = Val.isDenormal() ? 3 : 2; break;
case APFloat::fcZero: Arg = 4; break;
}
return Visit(E->getArg(Arg));
}
case Builtin::BI__builtin_isinf_sign: {
APFloat Val(0.0);
return EvaluateFloat(E->getArg(0), Val, Info) &&
Success(Val.isInfinity() ? (Val.isNegative() ? -1 : 1) : 0, E);
}
case Builtin::BI__builtin_isinf: {
APFloat Val(0.0);
return EvaluateFloat(E->getArg(0), Val, Info) &&
Success(Val.isInfinity() ? 1 : 0, E);
}
case Builtin::BI__builtin_isfinite: {
APFloat Val(0.0);
return EvaluateFloat(E->getArg(0), Val, Info) &&
Success(Val.isFinite() ? 1 : 0, E);
}
case Builtin::BI__builtin_isnan: {
APFloat Val(0.0);
return EvaluateFloat(E->getArg(0), Val, Info) &&
Success(Val.isNaN() ? 1 : 0, E);
}
case Builtin::BI__builtin_isnormal: {
APFloat Val(0.0);
return EvaluateFloat(E->getArg(0), Val, Info) &&
Success(Val.isNormal() ? 1 : 0, E);
}
case Builtin::BI__builtin_parity:
case Builtin::BI__builtin_parityl:
case Builtin::BI__builtin_parityll: {
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
return Success(Val.countPopulation() % 2, E);
}
case Builtin::BI__builtin_popcount:
case Builtin::BI__builtin_popcountl:
case Builtin::BI__builtin_popcountll: {
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
return Success(Val.countPopulation(), E);
}
case Builtin::BIstrlen:
// A call to strlen is not a constant expression.
if (Info.getLangOpts().CPlusPlus11)
Info.CCEDiag(E, diag::note_constexpr_invalid_function)
<< /*isConstexpr*/0 << /*isConstructor*/0 << "'strlen'";
else
Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
// Fall through.
case Builtin::BI__builtin_strlen: {
// As an extension, we support __builtin_strlen() as a constant expression,
// and support folding strlen() to a constant.
LValue String;
if (!EvaluatePointer(E->getArg(0), String, Info))
return false;
// Fast path: if it's a string literal, search the string value.
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
String.getLValueBase().dyn_cast<const Expr *>())) {
// The string literal may have embedded null characters. Find the first
// one and truncate there.
StringRef Str = S->getBytes();
int64_t Off = String.Offset.getQuantity();
if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() &&
S->getCharByteWidth() == 1) {
Str = Str.substr(Off);
StringRef::size_type Pos = Str.find(0);
if (Pos != StringRef::npos)
Str = Str.substr(0, Pos);
return Success(Str.size(), E);
}
// Fall through to slow path to issue appropriate diagnostic.
}
// Slow path: scan the bytes of the string looking for the terminating 0.
QualType CharTy = E->getArg(0)->getType()->getPointeeType();
for (uint64_t Strlen = 0; /**/; ++Strlen) {
APValue Char;
if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
!Char.isInt())
return false;
if (!Char.getInt())
return Success(Strlen, E);
if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
return false;
}
}
case Builtin::BI__atomic_always_lock_free:
case Builtin::BI__atomic_is_lock_free:
case Builtin::BI__c11_atomic_is_lock_free: {
APSInt SizeVal;
if (!EvaluateInteger(E->getArg(0), SizeVal, Info))
return false;
// For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power
// of two less than the maximum inline atomic width, we know it is
// lock-free. If the size isn't a power of two, or greater than the
// maximum alignment where we promote atomics, we know it is not lock-free
// (at least not in the sense of atomic_is_lock_free). Otherwise,
// the answer can only be determined at runtime; for example, 16-byte
// atomics have lock-free implementations on some, but not all,
// x86-64 processors.
// Check power-of-two.
CharUnits Size = CharUnits::fromQuantity(SizeVal.getZExtValue());
if (Size.isPowerOfTwo()) {
// Check against inlining width.
unsigned InlineWidthBits =
Info.Ctx.getTargetInfo().getMaxAtomicInlineWidth();
if (Size <= Info.Ctx.toCharUnitsFromBits(InlineWidthBits)) {
if (BuiltinOp == Builtin::BI__c11_atomic_is_lock_free ||
Size == CharUnits::One() ||
E->getArg(1)->isNullPointerConstant(Info.Ctx,
Expr::NPC_NeverValueDependent))
// OK, we will inline appropriately-aligned operations of this size,
// and _Atomic(T) is appropriately-aligned.
return Success(1, E);
QualType PointeeType = E->getArg(1)->IgnoreImpCasts()->getType()->
castAs<PointerType>()->getPointeeType();
if (!PointeeType->isIncompleteType() &&
Info.Ctx.getTypeAlignInChars(PointeeType) >= Size) {
// OK, we will inline operations on this object.
return Success(1, E);
}
}
}
return BuiltinOp == Builtin::BI__atomic_always_lock_free ?
Success(0, E) : Error(E);
}
}
}
static bool HasSameBase(const LValue &A, const LValue &B) {
if (!A.getLValueBase())
return !B.getLValueBase();
if (!B.getLValueBase())
return false;
if (A.getLValueBase().getOpaqueValue() !=
B.getLValueBase().getOpaqueValue()) {
const Decl *ADecl = GetLValueBaseDecl(A);
if (!ADecl)
return false;
const Decl *BDecl = GetLValueBaseDecl(B);
if (!BDecl || ADecl->getCanonicalDecl() != BDecl->getCanonicalDecl())
return false;
}
return IsGlobalLValue(A.getLValueBase()) ||
A.getLValueCallIndex() == B.getLValueCallIndex();
}
namespace {
/// \brief Data recursive integer evaluator of certain binary operators.
///
/// We use a data recursive algorithm for binary operators so that we are able
/// to handle extreme cases of chained binary operators without causing stack
/// overflow.
class DataRecursiveIntBinOpEvaluator {
struct EvalResult {
APValue Val;
bool Failed;
EvalResult() : Failed(false) { }
void swap(EvalResult &RHS) {
Val.swap(RHS.Val);
Failed = RHS.Failed;
RHS.Failed = false;
}
};
struct Job {
const Expr *E;
EvalResult LHSResult; // meaningful only for binary operator expression.
enum { AnyExprKind, BinOpKind, BinOpVisitedLHSKind } Kind;
Job() : StoredInfo(0) { }
void startSpeculativeEval(EvalInfo &Info) {
OldEvalStatus = Info.EvalStatus;
Info.EvalStatus.Diag = 0;
StoredInfo = &Info;
}
~Job() {
if (StoredInfo) {
StoredInfo->EvalStatus = OldEvalStatus;
}
}
private:
EvalInfo *StoredInfo; // non-null if status changed.
Expr::EvalStatus OldEvalStatus;
};
SmallVector<Job, 16> Queue;
IntExprEvaluator &IntEval;
EvalInfo &Info;
APValue &FinalResult;
public:
DataRecursiveIntBinOpEvaluator(IntExprEvaluator &IntEval, APValue &Result)
: IntEval(IntEval), Info(IntEval.getEvalInfo()), FinalResult(Result) { }
/// \brief True if \param E is a binary operator that we are going to handle
/// data recursively.
/// We handle binary operators that are comma, logical, or that have operands
/// with integral or enumeration type.
static bool shouldEnqueue(const BinaryOperator *E) {
return E->getOpcode() == BO_Comma ||
E->isLogicalOp() ||
(E->getLHS()->getType()->isIntegralOrEnumerationType() &&
E->getRHS()->getType()->isIntegralOrEnumerationType());
}
bool Traverse(const BinaryOperator *E) {
enqueue(E);
EvalResult PrevResult;
while (!Queue.empty())
process(PrevResult);
if (PrevResult.Failed) return false;
FinalResult.swap(PrevResult.Val);
return true;
}
private:
bool Success(uint64_t Value, const Expr *E, APValue &Result) {
return IntEval.Success(Value, E, Result);
}
bool Success(const APSInt &Value, const Expr *E, APValue &Result) {
return IntEval.Success(Value, E, Result);
}
bool Error(const Expr *E) {
return IntEval.Error(E);
}
bool Error(const Expr *E, diag::kind D) {
return IntEval.Error(E, D);
}
OptionalDiagnostic CCEDiag(const Expr *E, diag::kind D) {
return Info.CCEDiag(E, D);
}
// \brief Returns true if visiting the RHS is necessary, false otherwise.
bool VisitBinOpLHSOnly(EvalResult &LHSResult, const BinaryOperator *E,
bool &SuppressRHSDiags);
bool VisitBinOp(const EvalResult &LHSResult, const EvalResult &RHSResult,
const BinaryOperator *E, APValue &Result);
void EvaluateExpr(const Expr *E, EvalResult &Result) {
Result.Failed = !Evaluate(Result.Val, Info, E);
if (Result.Failed)
Result.Val = APValue();
}
void process(EvalResult &Result);
void enqueue(const Expr *E) {
E = E->IgnoreParens();
Queue.resize(Queue.size()+1);
Queue.back().E = E;
Queue.back().Kind = Job::AnyExprKind;
}
};
}
bool DataRecursiveIntBinOpEvaluator::
VisitBinOpLHSOnly(EvalResult &LHSResult, const BinaryOperator *E,
bool &SuppressRHSDiags) {
if (E->getOpcode() == BO_Comma) {
// Ignore LHS but note if we could not evaluate it.
if (LHSResult.Failed)
return Info.noteSideEffect();
return true;
}
if (E->isLogicalOp()) {
bool LHSAsBool;
if (!LHSResult.Failed && HandleConversionToBool(LHSResult.Val, LHSAsBool)) {
// We were able to evaluate the LHS, see if we can get away with not
// evaluating the RHS: 0 && X -> 0, 1 || X -> 1
if (LHSAsBool == (E->getOpcode() == BO_LOr)) {
Success(LHSAsBool, E, LHSResult.Val);
return false; // Ignore RHS
}
} else {
LHSResult.Failed = true;
// Since we weren't able to evaluate the left hand side, it
// must have had side effects.
if (!Info.noteSideEffect())
return false;
// We can't evaluate the LHS; however, sometimes the result
// is determined by the RHS: X && 0 -> 0, X || 1 -> 1.
// Don't ignore RHS and suppress diagnostics from this arm.
SuppressRHSDiags = true;
}
return true;
}
assert(E->getLHS()->getType()->isIntegralOrEnumerationType() &&
E->getRHS()->getType()->isIntegralOrEnumerationType());
if (LHSResult.Failed && !Info.keepEvaluatingAfterFailure())
return false; // Ignore RHS;
return true;
}
bool DataRecursiveIntBinOpEvaluator::
VisitBinOp(const EvalResult &LHSResult, const EvalResult &RHSResult,
const BinaryOperator *E, APValue &Result) {
if (E->getOpcode() == BO_Comma) {
if (RHSResult.Failed)
return false;
Result = RHSResult.Val;
return true;
}
if (E->isLogicalOp()) {
bool lhsResult, rhsResult;
bool LHSIsOK = HandleConversionToBool(LHSResult.Val, lhsResult);
bool RHSIsOK = HandleConversionToBool(RHSResult.Val, rhsResult);
if (LHSIsOK) {
if (RHSIsOK) {
if (E->getOpcode() == BO_LOr)
return Success(lhsResult || rhsResult, E, Result);
else
return Success(lhsResult && rhsResult, E, Result);
}
} else {
if (RHSIsOK) {
// We can't evaluate the LHS; however, sometimes the result
// is determined by the RHS: X && 0 -> 0, X || 1 -> 1.
if (rhsResult == (E->getOpcode() == BO_LOr))
return Success(rhsResult, E, Result);
}
}
return false;
}
assert(E->getLHS()->getType()->isIntegralOrEnumerationType() &&
E->getRHS()->getType()->isIntegralOrEnumerationType());
if (LHSResult.Failed || RHSResult.Failed)
return false;
const APValue &LHSVal = LHSResult.Val;
const APValue &RHSVal = RHSResult.Val;
// Handle cases like (unsigned long)&a + 4.
if (E->isAdditiveOp() && LHSVal.isLValue() && RHSVal.isInt()) {
Result = LHSVal;
CharUnits AdditionalOffset =
CharUnits::fromQuantity(RHSVal.getInt().getZExtValue());
if (E->getOpcode() == BO_Add)
Result.getLValueOffset() += AdditionalOffset;
else
Result.getLValueOffset() -= AdditionalOffset;
return true;
}
// Handle cases like 4 + (unsigned long)&a
if (E->getOpcode() == BO_Add &&
RHSVal.isLValue() && LHSVal.isInt()) {
Result = RHSVal;
Result.getLValueOffset() +=
CharUnits::fromQuantity(LHSVal.getInt().getZExtValue());
return true;
}
if (E->getOpcode() == BO_Sub && LHSVal.isLValue() && RHSVal.isLValue()) {
// Handle (intptr_t)&&A - (intptr_t)&&B.
if (!LHSVal.getLValueOffset().isZero() ||
!RHSVal.getLValueOffset().isZero())
return false;
const Expr *LHSExpr = LHSVal.getLValueBase().dyn_cast<const Expr*>();
const Expr *RHSExpr = RHSVal.getLValueBase().dyn_cast<const Expr*>();
if (!LHSExpr || !RHSExpr)
return false;
const AddrLabelExpr *LHSAddrExpr = dyn_cast<AddrLabelExpr>(LHSExpr);
const AddrLabelExpr *RHSAddrExpr = dyn_cast<AddrLabelExpr>(RHSExpr);
if (!LHSAddrExpr || !RHSAddrExpr)
return false;
// Make sure both labels come from the same function.
if (LHSAddrExpr->getLabel()->getDeclContext() !=
RHSAddrExpr->getLabel()->getDeclContext())
return false;
Result = APValue(LHSAddrExpr, RHSAddrExpr);
return true;
}
// All the remaining cases expect both operands to be an integer
if (!LHSVal.isInt() || !RHSVal.isInt())
return Error(E);
// Set up the width and signedness manually, in case it can't be deduced
// from the operation we're performing.
// FIXME: Don't do this in the cases where we can deduce it.
APSInt Value(Info.Ctx.getIntWidth(E->getType()),
E->getType()->isUnsignedIntegerOrEnumerationType());
if (!handleIntIntBinOp(Info, E, LHSVal.getInt(), E->getOpcode(),
RHSVal.getInt(), Value))
return false;
return Success(Value, E, Result);
}
void DataRecursiveIntBinOpEvaluator::process(EvalResult &Result) {
Job &job = Queue.back();
switch (job.Kind) {
case Job::AnyExprKind: {
if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(job.E)) {
if (shouldEnqueue(Bop)) {
job.Kind = Job::BinOpKind;
enqueue(Bop->getLHS());
return;
}
}
EvaluateExpr(job.E, Result);
Queue.pop_back();
return;
}
case Job::BinOpKind: {
const BinaryOperator *Bop = cast<BinaryOperator>(job.E);
bool SuppressRHSDiags = false;
if (!VisitBinOpLHSOnly(Result, Bop, SuppressRHSDiags)) {
Queue.pop_back();
return;
}
if (SuppressRHSDiags)
job.startSpeculativeEval(Info);
job.LHSResult.swap(Result);
job.Kind = Job::BinOpVisitedLHSKind;
enqueue(Bop->getRHS());
return;
}
case Job::BinOpVisitedLHSKind: {
const BinaryOperator *Bop = cast<BinaryOperator>(job.E);
EvalResult RHS;
RHS.swap(Result);
Result.Failed = !VisitBinOp(job.LHSResult, RHS, Bop, Result.Val);
Queue.pop_back();
return;
}
}
llvm_unreachable("Invalid Job::Kind!");
}
bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
if (E->isAssignmentOp())
return Error(E);
if (DataRecursiveIntBinOpEvaluator::shouldEnqueue(E))
return DataRecursiveIntBinOpEvaluator(*this, Result).Traverse(E);
QualType LHSTy = E->getLHS()->getType();
QualType RHSTy = E->getRHS()->getType();
if (LHSTy->isAnyComplexType()) {
assert(RHSTy->isAnyComplexType() && "Invalid comparison");
ComplexValue LHS, RHS;
bool LHSOK = EvaluateComplex(E->getLHS(), LHS, Info);
if (!LHSOK && !Info.keepEvaluatingAfterFailure())
return false;
if (!EvaluateComplex(E->getRHS(), RHS, Info) || !LHSOK)
return false;
if (LHS.isComplexFloat()) {
APFloat::cmpResult CR_r =
LHS.getComplexFloatReal().compare(RHS.getComplexFloatReal());
APFloat::cmpResult CR_i =
LHS.getComplexFloatImag().compare(RHS.getComplexFloatImag());
if (E->getOpcode() == BO_EQ)
return Success((CR_r == APFloat::cmpEqual &&
CR_i == APFloat::cmpEqual), E);
else {
assert(E->getOpcode() == BO_NE &&
"Invalid complex comparison.");
return Success(((CR_r == APFloat::cmpGreaterThan ||
CR_r == APFloat::cmpLessThan ||
CR_r == APFloat::cmpUnordered) ||
(CR_i == APFloat::cmpGreaterThan ||
CR_i == APFloat::cmpLessThan ||
CR_i == APFloat::cmpUnordered)), E);
}
} else {
if (E->getOpcode() == BO_EQ)
return Success((LHS.getComplexIntReal() == RHS.getComplexIntReal() &&
LHS.getComplexIntImag() == RHS.getComplexIntImag()), E);
else {
assert(E->getOpcode() == BO_NE &&
"Invalid compex comparison.");
return Success((LHS.getComplexIntReal() != RHS.getComplexIntReal() ||
LHS.getComplexIntImag() != RHS.getComplexIntImag()), E);
}
}
}
if (LHSTy->isRealFloatingType() &&
RHSTy->isRealFloatingType()) {
APFloat RHS(0.0), LHS(0.0);
bool LHSOK = EvaluateFloat(E->getRHS(), RHS, Info);
if (!LHSOK && !Info.keepEvaluatingAfterFailure())
return false;
if (!EvaluateFloat(E->getLHS(), LHS, Info) || !LHSOK)
return false;
APFloat::cmpResult CR = LHS.compare(RHS);
switch (E->getOpcode()) {
default:
llvm_unreachable("Invalid binary operator!");
case BO_LT:
return Success(CR == APFloat::cmpLessThan, E);
case BO_GT:
return Success(CR == APFloat::cmpGreaterThan, E);
case BO_LE:
return Success(CR == APFloat::cmpLessThan || CR == APFloat::cmpEqual, E);
case BO_GE:
return Success(CR == APFloat::cmpGreaterThan || CR == APFloat::cmpEqual,
E);
case BO_EQ:
return Success(CR == APFloat::cmpEqual, E);
case BO_NE:
return Success(CR == APFloat::cmpGreaterThan
|| CR == APFloat::cmpLessThan
|| CR == APFloat::cmpUnordered, E);
}
}
if (LHSTy->isPointerType() && RHSTy->isPointerType()) {
if (E->getOpcode() == BO_Sub || E->isComparisonOp()) {
LValue LHSValue, RHSValue;
bool LHSOK = EvaluatePointer(E->getLHS(), LHSValue, Info);
if (!LHSOK && Info.keepEvaluatingAfterFailure())
return false;
if (!EvaluatePointer(E->getRHS(), RHSValue, Info) || !LHSOK)
return false;
// Reject differing bases from the normal codepath; we special-case
// comparisons to null.
if (!HasSameBase(LHSValue, RHSValue)) {
if (E->getOpcode() == BO_Sub) {
// Handle &&A - &&B.
if (!LHSValue.Offset.isZero() || !RHSValue.Offset.isZero())
return false;
const Expr *LHSExpr = LHSValue.Base.dyn_cast<const Expr*>();
const Expr *RHSExpr = RHSValue.Base.dyn_cast<const Expr*>();
if (!LHSExpr || !RHSExpr)
return false;
const AddrLabelExpr *LHSAddrExpr = dyn_cast<AddrLabelExpr>(LHSExpr);
const AddrLabelExpr *RHSAddrExpr = dyn_cast<AddrLabelExpr>(RHSExpr);
if (!LHSAddrExpr || !RHSAddrExpr)
return false;
// Make sure both labels come from the same function.
if (LHSAddrExpr->getLabel()->getDeclContext() !=
RHSAddrExpr->getLabel()->getDeclContext())
return false;
Result = APValue(LHSAddrExpr, RHSAddrExpr);
return true;
}
// Inequalities and subtractions between unrelated pointers have
// unspecified or undefined behavior.
if (!E->isEqualityOp())
return Error(E);
// A constant address may compare equal to the address of a symbol.
// The one exception is that address of an object cannot compare equal
// to a null pointer constant.
if ((!LHSValue.Base && !LHSValue.Offset.isZero()) ||
(!RHSValue.Base && !RHSValue.Offset.isZero()))
return Error(E);
// It's implementation-defined whether distinct literals will have
// distinct addresses. In clang, the result of such a comparison is
// unspecified, so it is not a constant expression. However, we do know
// that the address of a literal will be non-null.
if ((IsLiteralLValue(LHSValue) || IsLiteralLValue(RHSValue)) &&
LHSValue.Base && RHSValue.Base)
return Error(E);
// We can't tell whether weak symbols will end up pointing to the same
// object.
if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue))
return Error(E);
// Pointers with different bases cannot represent the same object.
// (Note that clang defaults to -fmerge-all-constants, which can
// lead to inconsistent results for comparisons involving the address
// of a constant; this generally doesn't matter in practice.)
return Success(E->getOpcode() == BO_NE, E);
}
const CharUnits &LHSOffset = LHSValue.getLValueOffset();
const CharUnits &RHSOffset = RHSValue.getLValueOffset();
SubobjectDesignator &LHSDesignator = LHSValue.getLValueDesignator();
SubobjectDesignator &RHSDesignator = RHSValue.getLValueDesignator();
if (E->getOpcode() == BO_Sub) {
// C++11 [expr.add]p6:
// Unless both pointers point to elements of the same array object, or
// one past the last element of the array object, the behavior is
// undefined.
if (!LHSDesignator.Invalid && !RHSDesignator.Invalid &&
!AreElementsOfSameArray(getType(LHSValue.Base),
LHSDesignator, RHSDesignator))
CCEDiag(E, diag::note_constexpr_pointer_subtraction_not_same_array);
QualType Type = E->getLHS()->getType();
QualType ElementType = Type->getAs<PointerType>()->getPointeeType();
CharUnits ElementSize;
if (!HandleSizeof(Info, E->getExprLoc(), ElementType, ElementSize))
return false;
// As an extension, a type may have zero size (empty struct or union in
// C, array of zero length). Pointer subtraction in such cases has
// undefined behavior, so is not constant.
if (ElementSize.isZero()) {
Info.Diag(E, diag::note_constexpr_pointer_subtraction_zero_size)
<< ElementType;
return false;
}
// FIXME: LLVM and GCC both compute LHSOffset - RHSOffset at runtime,
// and produce incorrect results when it overflows. Such behavior
// appears to be non-conforming, but is common, so perhaps we should
// assume the standard intended for such cases to be undefined behavior
// and check for them.
// Compute (LHSOffset - RHSOffset) / Size carefully, checking for
// overflow in the final conversion to ptrdiff_t.
APSInt LHS(
llvm::APInt(65, (int64_t)LHSOffset.getQuantity(), true), false);
APSInt RHS(
llvm::APInt(65, (int64_t)RHSOffset.getQuantity(), true), false);
APSInt ElemSize(
llvm::APInt(65, (int64_t)ElementSize.getQuantity(), true), false);
APSInt TrueResult = (LHS - RHS) / ElemSize;
APSInt Result = TrueResult.trunc(Info.Ctx.getIntWidth(E->getType()));
if (Result.extend(65) != TrueResult)
HandleOverflow(Info, E, TrueResult, E->getType());
return Success(Result, E);
}
// C++11 [expr.rel]p3:
// Pointers to void (after pointer conversions) can be compared, with a
// result defined as follows: If both pointers represent the same
// address or are both the null pointer value, the result is true if the
// operator is <= or >= and false otherwise; otherwise the result is
// unspecified.
// We interpret this as applying to pointers to *cv* void.
if (LHSTy->isVoidPointerType() && LHSOffset != RHSOffset &&
E->isRelationalOp())
CCEDiag(E, diag::note_constexpr_void_comparison);
// C++11 [expr.rel]p2:
// - If two pointers point to non-static data members of the same object,
// or to subobjects or array elements fo such members, recursively, the
// pointer to the later declared member compares greater provided the
// two members have the same access control and provided their class is
// not a union.
// [...]
// - Otherwise pointer comparisons are unspecified.
if (!LHSDesignator.Invalid && !RHSDesignator.Invalid &&
E->isRelationalOp()) {
bool WasArrayIndex;
unsigned Mismatch =
FindDesignatorMismatch(getType(LHSValue.Base), LHSDesignator,
RHSDesignator, WasArrayIndex);
// At the point where the designators diverge, the comparison has a
// specified value if:
// - we are comparing array indices
// - we are comparing fields of a union, or fields with the same access
// Otherwise, the result is unspecified and thus the comparison is not a
// constant expression.
if (!WasArrayIndex && Mismatch < LHSDesignator.Entries.size() &&
Mismatch < RHSDesignator.Entries.size()) {
const FieldDecl *LF = getAsField(LHSDesignator.Entries[Mismatch]);
const FieldDecl *RF = getAsField(RHSDesignator.Entries[Mismatch]);
if (!LF && !RF)
CCEDiag(E, diag::note_constexpr_pointer_comparison_base_classes);
else if (!LF)
CCEDiag(E, diag::note_constexpr_pointer_comparison_base_field)
<< getAsBaseClass(LHSDesignator.Entries[Mismatch])
<< RF->getParent() << RF;
else if (!RF)
CCEDiag(E, diag::note_constexpr_pointer_comparison_base_field)
<< getAsBaseClass(RHSDesignator.Entries[Mismatch])
<< LF->getParent() << LF;
else if (!LF->getParent()->isUnion() &&
LF->getAccess() != RF->getAccess())
CCEDiag(E, diag::note_constexpr_pointer_comparison_differing_access)
<< LF << LF->getAccess() << RF << RF->getAccess()
<< LF->getParent();
}
}
// The comparison here must be unsigned, and performed with the same
// width as the pointer.
unsigned PtrSize = Info.Ctx.getTypeSize(LHSTy);
uint64_t CompareLHS = LHSOffset.getQuantity();
uint64_t CompareRHS = RHSOffset.getQuantity();
assert(PtrSize <= 64 && "Unexpected pointer width");
uint64_t Mask = ~0ULL >> (64 - PtrSize);
CompareLHS &= Mask;
CompareRHS &= Mask;
// If there is a base and this is a relational operator, we can only
// compare pointers within the object in question; otherwise, the result
// depends on where the object is located in memory.
if (!LHSValue.Base.isNull() && E->isRelationalOp()) {
QualType BaseTy = getType(LHSValue.Base);
if (BaseTy->isIncompleteType())
return Error(E);
CharUnits Size = Info.Ctx.getTypeSizeInChars(BaseTy);
uint64_t OffsetLimit = Size.getQuantity();
if (CompareLHS > OffsetLimit || CompareRHS > OffsetLimit)
return Error(E);
}
switch (E->getOpcode()) {
default: llvm_unreachable("missing comparison operator");
case BO_LT: return Success(CompareLHS < CompareRHS, E);
case BO_GT: return Success(CompareLHS > CompareRHS, E);
case BO_LE: return Success(CompareLHS <= CompareRHS, E);
case BO_GE: return Success(CompareLHS >= CompareRHS, E);
case BO_EQ: return Success(CompareLHS == CompareRHS, E);
case BO_NE: return Success(CompareLHS != CompareRHS, E);
}
}
}
if (LHSTy->isMemberPointerType()) {
assert(E->isEqualityOp() && "unexpected member pointer operation");
assert(RHSTy->isMemberPointerType() && "invalid comparison");
MemberPtr LHSValue, RHSValue;
bool LHSOK = EvaluateMemberPointer(E->getLHS(), LHSValue, Info);
if (!LHSOK && Info.keepEvaluatingAfterFailure())
return false;
if (!EvaluateMemberPointer(E->getRHS(), RHSValue, Info) || !LHSOK)
return false;
// C++11 [expr.eq]p2:
// If both operands are null, they compare equal. Otherwise if only one is
// null, they compare unequal.
if (!LHSValue.getDecl() || !RHSValue.getDecl()) {
bool Equal = !LHSValue.getDecl() && !RHSValue.getDecl();
return Success(E->getOpcode() == BO_EQ ? Equal : !Equal, E);
}
// Otherwise if either is a pointer to a virtual member function, the
// result is unspecified.
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(LHSValue.getDecl()))
if (MD->isVirtual())
CCEDiag(E, diag::note_constexpr_compare_virtual_mem_ptr) << MD;
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(RHSValue.getDecl()))
if (MD->isVirtual())
CCEDiag(E, diag::note_constexpr_compare_virtual_mem_ptr) << MD;
// Otherwise they compare equal if and only if they would refer to the
// same member of the same most derived object or the same subobject if
// they were dereferenced with a hypothetical object of the associated
// class type.
bool Equal = LHSValue == RHSValue;
return Success(E->getOpcode() == BO_EQ ? Equal : !Equal, E);
}
if (LHSTy->isNullPtrType()) {
assert(E->isComparisonOp() && "unexpected nullptr operation");
assert(RHSTy->isNullPtrType() && "missing pointer conversion");
// C++11 [expr.rel]p4, [expr.eq]p3: If two operands of type std::nullptr_t
// are compared, the result is true of the operator is <=, >= or ==, and
// false otherwise.
BinaryOperator::Opcode Opcode = E->getOpcode();
return Success(Opcode == BO_EQ || Opcode == BO_LE || Opcode == BO_GE, E);
}
assert((!LHSTy->isIntegralOrEnumerationType() ||
!RHSTy->isIntegralOrEnumerationType()) &&
"DataRecursiveIntBinOpEvaluator should have handled integral types");
// We can't continue from here for non-integral types.
return ExprEvaluatorBaseTy::VisitBinaryOperator(E);
}
CharUnits IntExprEvaluator::GetAlignOfType(QualType T) {
// C++ [expr.alignof]p3: "When alignof is applied to a reference type, the
// result shall be the alignment of the referenced type."
if (const ReferenceType *Ref = T->getAs<ReferenceType>())
T = Ref->getPointeeType();
// __alignof is defined to return the preferred alignment.
return Info.Ctx.toCharUnitsFromBits(
Info.Ctx.getPreferredTypeAlign(T.getTypePtr()));
}
CharUnits IntExprEvaluator::GetAlignOfExpr(const Expr *E) {
E = E->IgnoreParens();
// The kinds of expressions that we have special-case logic here for
// should be kept up to date with the special checks for those
// expressions in Sema.
// alignof decl is always accepted, even if it doesn't make sense: we default
// to 1 in those cases.
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E))
return Info.Ctx.getDeclAlign(DRE->getDecl(),
/*RefAsPointee*/true);
if (const MemberExpr *ME = dyn_cast<MemberExpr>(E))
return Info.Ctx.getDeclAlign(ME->getMemberDecl(),
/*RefAsPointee*/true);
return GetAlignOfType(E->getType());
}
/// VisitUnaryExprOrTypeTraitExpr - Evaluate a sizeof, alignof or vec_step with
/// a result as the expression's type.
bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr(
const UnaryExprOrTypeTraitExpr *E) {
switch(E->getKind()) {
case UETT_AlignOf: {
if (E->isArgumentType())
return Success(GetAlignOfType(E->getArgumentType()), E);
else
return Success(GetAlignOfExpr(E->getArgumentExpr()), E);
}
case UETT_VecStep: {
QualType Ty = E->getTypeOfArgument();
if (Ty->isVectorType()) {
unsigned n = Ty->castAs<VectorType>()->getNumElements();
// The vec_step built-in functions that take a 3-component
// vector return 4. (OpenCL 1.1 spec 6.11.12)
if (n == 3)
n = 4;
return Success(n, E);
} else
return Success(1, E);
}
case UETT_SizeOf: {
QualType SrcTy = E->getTypeOfArgument();
// C++ [expr.sizeof]p2: "When applied to a reference or a reference type,
// the result is the size of the referenced type."
if (const ReferenceType *Ref = SrcTy->getAs<ReferenceType>())
SrcTy = Ref->getPointeeType();
CharUnits Sizeof;
if (!HandleSizeof(Info, E->getExprLoc(), SrcTy, Sizeof))
return false;
return Success(Sizeof, E);
}
}
llvm_unreachable("unknown expr/type trait");
}
bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) {
CharUnits Result;
unsigned n = OOE->getNumComponents();
if (n == 0)
return Error(OOE);
QualType CurrentType = OOE->getTypeSourceInfo()->getType();
for (unsigned i = 0; i != n; ++i) {
OffsetOfExpr::OffsetOfNode ON = OOE->getComponent(i);
switch (ON.getKind()) {
case OffsetOfExpr::OffsetOfNode::Array: {
const Expr *Idx = OOE->getIndexExpr(ON.getArrayExprIndex());
APSInt IdxResult;
if (!EvaluateInteger(Idx, IdxResult, Info))
return false;
const ArrayType *AT = Info.Ctx.getAsArrayType(CurrentType);
if (!AT)
return Error(OOE);
CurrentType = AT->getElementType();
CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
Result += IdxResult.getSExtValue() * ElementSize;
break;
}
case OffsetOfExpr::OffsetOfNode::Field: {
FieldDecl *MemberDecl = ON.getField();
const RecordType *RT = CurrentType->getAs<RecordType>();
if (!RT)
return Error(OOE);
RecordDecl *RD = RT->getDecl();
if (RD->isInvalidDecl()) return false;
const ASTRecordLayout &RL = Info.Ctx.getASTRecordLayout(RD);
unsigned i = MemberDecl->getFieldIndex();
assert(i < RL.getFieldCount() && "offsetof field in wrong type");
Result += Info.Ctx.toCharUnitsFromBits(RL.getFieldOffset(i));
CurrentType = MemberDecl->getType().getNonReferenceType();
break;
}
case OffsetOfExpr::OffsetOfNode::Identifier:
llvm_unreachable("dependent __builtin_offsetof");
case OffsetOfExpr::OffsetOfNode::Base: {
CXXBaseSpecifier *BaseSpec = ON.getBase();
if (BaseSpec->isVirtual())
return Error(OOE);
// Find the layout of the class whose base we are looking into.
const RecordType *RT = CurrentType->getAs<RecordType>();
if (!RT)
return Error(OOE);
RecordDecl *RD = RT->getDecl();
if (RD->isInvalidDecl()) return false;
const ASTRecordLayout &RL = Info.Ctx.getASTRecordLayout(RD);
// Find the base class itself.
CurrentType = BaseSpec->getType();
const RecordType *BaseRT = CurrentType->getAs<RecordType>();
if (!BaseRT)
return Error(OOE);
// Add the offset to the base.
Result += RL.getBaseClassOffset(cast<CXXRecordDecl>(BaseRT->getDecl()));
break;
}
}
}
return Success(Result, OOE);
}
bool IntExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) {
switch (E->getOpcode()) {
default:
// Address, indirect, pre/post inc/dec, etc are not valid constant exprs.
// See C99 6.6p3.
return Error(E);
case UO_Extension:
// FIXME: Should extension allow i-c-e extension expressions in its scope?
// If so, we could clear the diagnostic ID.
return Visit(E->getSubExpr());
case UO_Plus:
// The result is just the value.
return Visit(E->getSubExpr());
case UO_Minus: {
if (!Visit(E->getSubExpr()))
return false;
if (!Result.isInt()) return Error(E);
const APSInt &Value = Result.getInt();
if (Value.isSigned() && Value.isMinSignedValue())
HandleOverflow(Info, E, -Value.extend(Value.getBitWidth() + 1),
E->getType());
return Success(-Value, E);
}
case UO_Not: {
if (!Visit(E->getSubExpr()))
return false;
if (!Result.isInt()) return Error(E);
return Success(~Result.getInt(), E);
}
case UO_LNot: {
bool bres;
if (!EvaluateAsBooleanCondition(E->getSubExpr(), bres, Info))
return false;
return Success(!bres, E);
}
}
}
/// HandleCast - This is used to evaluate implicit or explicit casts where the
/// result type is integer.
bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
const Expr *SubExpr = E->getSubExpr();
QualType DestType = E->getType();
QualType SrcType = SubExpr->getType();
switch (E->getCastKind()) {
case CK_BaseToDerived:
case CK_DerivedToBase:
case CK_UncheckedDerivedToBase:
case CK_Dynamic:
case CK_ToUnion:
case CK_ArrayToPointerDecay:
case CK_FunctionToPointerDecay:
case CK_NullToPointer:
case CK_NullToMemberPointer:
case CK_BaseToDerivedMemberPointer:
case CK_DerivedToBaseMemberPointer:
case CK_ReinterpretMemberPointer:
case CK_ConstructorConversion:
case CK_IntegralToPointer:
case CK_ToVoid:
case CK_VectorSplat:
case CK_IntegralToFloating:
case CK_FloatingCast:
case CK_CPointerToObjCPointerCast:
case CK_BlockPointerToObjCPointerCast:
case CK_AnyPointerToBlockPointerCast:
case CK_ObjCObjectLValueCast:
case CK_FloatingRealToComplex:
case CK_FloatingComplexToReal:
case CK_FloatingComplexCast:
case CK_FloatingComplexToIntegralComplex:
case CK_IntegralRealToComplex:
case CK_IntegralComplexCast:
case CK_IntegralComplexToFloatingComplex:
case CK_BuiltinFnToFnPtr:
case CK_ZeroToOCLEvent:
case CK_NonAtomicToAtomic:
llvm_unreachable("invalid cast kind for integral value");
case CK_BitCast:
case CK_Dependent:
case CK_LValueBitCast:
case CK_ARCProduceObject:
case CK_ARCConsumeObject:
case CK_ARCReclaimReturnedObject:
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
return Error(E);
case CK_UserDefinedConversion:
case CK_LValueToRValue:
case CK_AtomicToNonAtomic:
case CK_NoOp:
return ExprEvaluatorBaseTy::VisitCastExpr(E);
case CK_MemberPointerToBoolean:
case CK_PointerToBoolean:
case CK_IntegralToBoolean:
case CK_FloatingToBoolean:
case CK_FloatingComplexToBoolean:
case CK_IntegralComplexToBoolean: {
bool BoolResult;
if (!EvaluateAsBooleanCondition(SubExpr, BoolResult, Info))
return false;
return Success(BoolResult, E);
}
case CK_IntegralCast: {
if (!Visit(SubExpr))
return false;
if (!Result.isInt()) {
// Allow casts of address-of-label differences if they are no-ops
// or narrowing. (The narrowing case isn't actually guaranteed to
// be constant-evaluatable except in some narrow cases which are hard
// to detect here. We let it through on the assumption the user knows
// what they are doing.)
if (Result.isAddrLabelDiff())
return Info.Ctx.getTypeSize(DestType) <= Info.Ctx.getTypeSize(SrcType);
// Only allow casts of lvalues if they are lossless.
return Info.Ctx.getTypeSize(DestType) == Info.Ctx.getTypeSize(SrcType);
}
return Success(HandleIntToIntCast(Info, E, DestType, SrcType,
Result.getInt()), E);
}
case CK_PointerToIntegral: {
CCEDiag(E, diag::note_constexpr_invalid_cast) << 2;
LValue LV;
if (!EvaluatePointer(SubExpr, LV, Info))
return false;
if (LV.getLValueBase()) {
// Only allow based lvalue casts if they are lossless.
// FIXME: Allow a larger integer size than the pointer size, and allow
// narrowing back down to pointer width in subsequent integral casts.
// FIXME: Check integer type's active bits, not its type size.
if (Info.Ctx.getTypeSize(DestType) != Info.Ctx.getTypeSize(SrcType))
return Error(E);
LV.Designator.setInvalid();
LV.moveInto(Result);
return true;
}
APSInt AsInt = Info.Ctx.MakeIntValue(LV.getLValueOffset().getQuantity(),
SrcType);
return Success(HandleIntToIntCast(Info, E, DestType, SrcType, AsInt), E);
}
case CK_IntegralComplexToReal: {
ComplexValue C;
if (!EvaluateComplex(SubExpr, C, Info))
return false;
return Success(C.getComplexIntReal(), E);
}
case CK_FloatingToIntegral: {
APFloat F(0.0);
if (!EvaluateFloat(SubExpr, F, Info))
return false;
APSInt Value;
if (!HandleFloatToIntCast(Info, E, SrcType, F, DestType, Value))
return false;
return Success(Value, E);
}
}
llvm_unreachable("unknown cast resulting in integral value");
}
bool IntExprEvaluator::VisitUnaryReal(const UnaryOperator *E) {
if (E->getSubExpr()->getType()->isAnyComplexType()) {
ComplexValue LV;
if (!EvaluateComplex(E->getSubExpr(), LV, Info))
return false;
if (!LV.isComplexInt())
return Error(E);
return Success(LV.getComplexIntReal(), E);
}
return Visit(E->getSubExpr());
}
bool IntExprEvaluator::VisitUnaryImag(const UnaryOperator *E) {
if (E->getSubExpr()->getType()->isComplexIntegerType()) {
ComplexValue LV;
if (!EvaluateComplex(E->getSubExpr(), LV, Info))
return false;
if (!LV.isComplexInt())
return Error(E);
return Success(LV.getComplexIntImag(), E);
}
VisitIgnoredValue(E->getSubExpr());
return Success(0, E);
}
bool IntExprEvaluator::VisitSizeOfPackExpr(const SizeOfPackExpr *E) {
return Success(E->getPackLength(), E);
}
bool IntExprEvaluator::VisitCXXNoexceptExpr(const CXXNoexceptExpr *E) {
return Success(E->getValue(), E);
}
//===----------------------------------------------------------------------===//
// Float Evaluation
//===----------------------------------------------------------------------===//
namespace {
class FloatExprEvaluator
: public ExprEvaluatorBase<FloatExprEvaluator, bool> {
APFloat &Result;
public:
FloatExprEvaluator(EvalInfo &info, APFloat &result)
: ExprEvaluatorBaseTy(info), Result(result) {}
bool Success(const APValue &V, const Expr *e) {
Result = V.getFloat();
return true;
}
bool ZeroInitialization(const Expr *E) {
Result = APFloat::getZero(Info.Ctx.getFloatTypeSemantics(E->getType()));
return true;
}
bool VisitCallExpr(const CallExpr *E);
bool VisitUnaryOperator(const UnaryOperator *E);
bool VisitBinaryOperator(const BinaryOperator *E);
bool VisitFloatingLiteral(const FloatingLiteral *E);
bool VisitCastExpr(const CastExpr *E);
bool VisitUnaryReal(const UnaryOperator *E);
bool VisitUnaryImag(const UnaryOperator *E);
// FIXME: Missing: array subscript of vector, member of vector
};
} // end anonymous namespace
static bool EvaluateFloat(const Expr* E, APFloat& Result, EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isRealFloatingType());
return FloatExprEvaluator(Info, Result).Visit(E);
}
static bool TryEvaluateBuiltinNaN(const ASTContext &Context,
QualType ResultTy,
const Expr *Arg,
bool SNaN,
llvm::APFloat &Result) {
const StringLiteral *S = dyn_cast<StringLiteral>(Arg->IgnoreParenCasts());
if (!S) return false;
const llvm::fltSemantics &Sem = Context.getFloatTypeSemantics(ResultTy);
llvm::APInt fill;
// Treat empty strings as if they were zero.
if (S->getString().empty())
fill = llvm::APInt(32, 0);
else if (S->getString().getAsInteger(0, fill))
return false;
if (SNaN)
Result = llvm::APFloat::getSNaN(Sem, false, &fill);
else
Result = llvm::APFloat::getQNaN(Sem, false, &fill);
return true;
}
bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) {
switch (E->isBuiltinCall()) {
default:
return ExprEvaluatorBaseTy::VisitCallExpr(E);
case Builtin::BI__builtin_huge_val:
case Builtin::BI__builtin_huge_valf:
case Builtin::BI__builtin_huge_vall:
case Builtin::BI__builtin_inf:
case Builtin::BI__builtin_inff:
case Builtin::BI__builtin_infl: {
const llvm::fltSemantics &Sem =
Info.Ctx.getFloatTypeSemantics(E->getType());
Result = llvm::APFloat::getInf(Sem);
return true;
}
case Builtin::BI__builtin_nans:
case Builtin::BI__builtin_nansf:
case Builtin::BI__builtin_nansl:
if (!TryEvaluateBuiltinNaN(Info.Ctx, E->getType(), E->getArg(0),
true, Result))
return Error(E);
return true;
case Builtin::BI__builtin_nan:
case Builtin::BI__builtin_nanf:
case Builtin::BI__builtin_nanl:
// If this is __builtin_nan() turn this into a nan, otherwise we
// can't constant fold it.
if (!TryEvaluateBuiltinNaN(Info.Ctx, E->getType(), E->getArg(0),
false, Result))
return Error(E);
return true;
case Builtin::BI__builtin_fabs:
case Builtin::BI__builtin_fabsf:
case Builtin::BI__builtin_fabsl:
if (!EvaluateFloat(E->getArg(0), Result, Info))
return false;
if (Result.isNegative())
Result.changeSign();
return true;
// FIXME: Builtin::BI__builtin_powi
// FIXME: Builtin::BI__builtin_powif
// FIXME: Builtin::BI__builtin_powil
case Builtin::BI__builtin_copysign:
case Builtin::BI__builtin_copysignf:
case Builtin::BI__builtin_copysignl: {
APFloat RHS(0.);
if (!EvaluateFloat(E->getArg(0), Result, Info) ||
!EvaluateFloat(E->getArg(1), RHS, Info))
return false;
Result.copySign(RHS);
return true;
}
}
}
bool FloatExprEvaluator::VisitUnaryReal(const UnaryOperator *E) {
if (E->getSubExpr()->getType()->isAnyComplexType()) {
ComplexValue CV;
if (!EvaluateComplex(E->getSubExpr(), CV, Info))
return false;
Result = CV.FloatReal;
return true;
}
return Visit(E->getSubExpr());
}
bool FloatExprEvaluator::VisitUnaryImag(const UnaryOperator *E) {
if (E->getSubExpr()->getType()->isAnyComplexType()) {
ComplexValue CV;
if (!EvaluateComplex(E->getSubExpr(), CV, Info))
return false;
Result = CV.FloatImag;
return true;
}
VisitIgnoredValue(E->getSubExpr());
const llvm::fltSemantics &Sem = Info.Ctx.getFloatTypeSemantics(E->getType());
Result = llvm::APFloat::getZero(Sem);
return true;
}
bool FloatExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) {
switch (E->getOpcode()) {
default: return Error(E);
case UO_Plus:
return EvaluateFloat(E->getSubExpr(), Result, Info);
case UO_Minus:
if (!EvaluateFloat(E->getSubExpr(), Result, Info))
return false;
Result.changeSign();
return true;
}
}
bool FloatExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
if (E->isPtrMemOp() || E->isAssignmentOp() || E->getOpcode() == BO_Comma)
return ExprEvaluatorBaseTy::VisitBinaryOperator(E);
APFloat RHS(0.0);
bool LHSOK = EvaluateFloat(E->getLHS(), Result, Info);
if (!LHSOK && !Info.keepEvaluatingAfterFailure())
return false;
return EvaluateFloat(E->getRHS(), RHS, Info) && LHSOK &&
handleFloatFloatBinOp(Info, E, Result, E->getOpcode(), RHS);
}
bool FloatExprEvaluator::VisitFloatingLiteral(const FloatingLiteral *E) {
Result = E->getValue();
return true;
}
bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) {
const Expr* SubExpr = E->getSubExpr();
switch (E->getCastKind()) {
default:
return ExprEvaluatorBaseTy::VisitCastExpr(E);
case CK_IntegralToFloating: {
APSInt IntResult;
return EvaluateInteger(SubExpr, IntResult, Info) &&
HandleIntToFloatCast(Info, E, SubExpr->getType(), IntResult,
E->getType(), Result);
}
case CK_FloatingCast: {
if (!Visit(SubExpr))
return false;
return HandleFloatToFloatCast(Info, E, SubExpr->getType(), E->getType(),
Result);
}
case CK_FloatingComplexToReal: {
ComplexValue V;
if (!EvaluateComplex(SubExpr, V, Info))
return false;
Result = V.getComplexFloatReal();
return true;
}
}
}
//===----------------------------------------------------------------------===//
// Complex Evaluation (for float and integer)
//===----------------------------------------------------------------------===//
namespace {
class ComplexExprEvaluator
: public ExprEvaluatorBase<ComplexExprEvaluator, bool> {
ComplexValue &Result;
public:
ComplexExprEvaluator(EvalInfo &info, ComplexValue &Result)
: ExprEvaluatorBaseTy(info), Result(Result) {}
bool Success(const APValue &V, const Expr *e) {
Result.setFrom(V);
return true;
}
bool ZeroInitialization(const Expr *E);
//===--------------------------------------------------------------------===//
// Visitor Methods
//===--------------------------------------------------------------------===//
bool VisitImaginaryLiteral(const ImaginaryLiteral *E);
bool VisitCastExpr(const CastExpr *E);
bool VisitBinaryOperator(const BinaryOperator *E);
bool VisitUnaryOperator(const UnaryOperator *E);
bool VisitInitListExpr(const InitListExpr *E);
};
} // end anonymous namespace
static bool EvaluateComplex(const Expr *E, ComplexValue &Result,
EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isAnyComplexType());
return ComplexExprEvaluator(Info, Result).Visit(E);
}
bool ComplexExprEvaluator::ZeroInitialization(const Expr *E) {
QualType ElemTy = E->getType()->castAs<ComplexType>()->getElementType();
if (ElemTy->isRealFloatingType()) {
Result.makeComplexFloat();
APFloat Zero = APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy));
Result.FloatReal = Zero;
Result.FloatImag = Zero;
} else {
Result.makeComplexInt();
APSInt Zero = Info.Ctx.MakeIntValue(0, ElemTy);
Result.IntReal = Zero;
Result.IntImag = Zero;
}
return true;
}
bool ComplexExprEvaluator::VisitImaginaryLiteral(const ImaginaryLiteral *E) {
const Expr* SubExpr = E->getSubExpr();
if (SubExpr->getType()->isRealFloatingType()) {
Result.makeComplexFloat();
APFloat &Imag = Result.FloatImag;
if (!EvaluateFloat(SubExpr, Imag, Info))
return false;
Result.FloatReal = APFloat(Imag.getSemantics());
return true;
} else {
assert(SubExpr->getType()->isIntegerType() &&
"Unexpected imaginary literal.");
Result.makeComplexInt();
APSInt &Imag = Result.IntImag;
if (!EvaluateInteger(SubExpr, Imag, Info))
return false;
Result.IntReal = APSInt(Imag.getBitWidth(), !Imag.isSigned());
return true;
}
}
bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) {
switch (E->getCastKind()) {
case CK_BitCast:
case CK_BaseToDerived:
case CK_DerivedToBase:
case CK_UncheckedDerivedToBase:
case CK_Dynamic:
case CK_ToUnion:
case CK_ArrayToPointerDecay:
case CK_FunctionToPointerDecay:
case CK_NullToPointer:
case CK_NullToMemberPointer:
case CK_BaseToDerivedMemberPointer:
case CK_DerivedToBaseMemberPointer:
case CK_MemberPointerToBoolean:
case CK_ReinterpretMemberPointer:
case CK_ConstructorConversion:
case CK_IntegralToPointer:
case CK_PointerToIntegral:
case CK_PointerToBoolean:
case CK_ToVoid:
case CK_VectorSplat:
case CK_IntegralCast:
case CK_IntegralToBoolean:
case CK_IntegralToFloating:
case CK_FloatingToIntegral:
case CK_FloatingToBoolean:
case CK_FloatingCast:
case CK_CPointerToObjCPointerCast:
case CK_BlockPointerToObjCPointerCast:
case CK_AnyPointerToBlockPointerCast:
case CK_ObjCObjectLValueCast:
case CK_FloatingComplexToReal:
case CK_FloatingComplexToBoolean:
case CK_IntegralComplexToReal:
case CK_IntegralComplexToBoolean:
case CK_ARCProduceObject:
case CK_ARCConsumeObject:
case CK_ARCReclaimReturnedObject:
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
case CK_BuiltinFnToFnPtr:
case CK_ZeroToOCLEvent:
case CK_NonAtomicToAtomic:
llvm_unreachable("invalid cast kind for complex value");
case CK_LValueToRValue:
case CK_AtomicToNonAtomic:
case CK_NoOp:
return ExprEvaluatorBaseTy::VisitCastExpr(E);
case CK_Dependent:
case CK_LValueBitCast:
case CK_UserDefinedConversion:
return Error(E);
case CK_FloatingRealToComplex: {
APFloat &Real = Result.FloatReal;
if (!EvaluateFloat(E->getSubExpr(), Real, Info))
return false;
Result.makeComplexFloat();
Result.FloatImag = APFloat(Real.getSemantics());
return true;
}
case CK_FloatingComplexCast: {
if (!Visit(E->getSubExpr()))
return false;
QualType To = E->getType()->getAs<ComplexType>()->getElementType();
QualType From
= E->getSubExpr()->getType()->getAs<ComplexType>()->getElementType();
return HandleFloatToFloatCast(Info, E, From, To, Result.FloatReal) &&
HandleFloatToFloatCast(Info, E, From, To, Result.FloatImag);
}
case CK_FloatingComplexToIntegralComplex: {
if (!Visit(E->getSubExpr()))
return false;
QualType To = E->getType()->getAs<ComplexType>()->getElementType();
QualType From
= E->getSubExpr()->getType()->getAs<ComplexType>()->getElementType();
Result.makeComplexInt();
return HandleFloatToIntCast(Info, E, From, Result.FloatReal,
To, Result.IntReal) &&
HandleFloatToIntCast(Info, E, From, Result.FloatImag,
To, Result.IntImag);
}
case CK_IntegralRealToComplex: {
APSInt &Real = Result.IntReal;
if (!EvaluateInteger(E->getSubExpr(), Real, Info))
return false;
Result.makeComplexInt();
Result.IntImag = APSInt(Real.getBitWidth(), !Real.isSigned());
return true;
}
case CK_IntegralComplexCast: {
if (!Visit(E->getSubExpr()))
return false;
QualType To = E->getType()->getAs<ComplexType>()->getElementType();
QualType From
= E->getSubExpr()->getType()->getAs<ComplexType>()->getElementType();
Result.IntReal = HandleIntToIntCast(Info, E, To, From, Result.IntReal);
Result.IntImag = HandleIntToIntCast(Info, E, To, From, Result.IntImag);
return true;
}
case CK_IntegralComplexToFloatingComplex: {
if (!Visit(E->getSubExpr()))
return false;
QualType To = E->getType()->castAs<ComplexType>()->getElementType();
QualType From
= E->getSubExpr()->getType()->castAs<ComplexType>()->getElementType();
Result.makeComplexFloat();
return HandleIntToFloatCast(Info, E, From, Result.IntReal,
To, Result.FloatReal) &&
HandleIntToFloatCast(Info, E, From, Result.IntImag,
To, Result.FloatImag);
}
}
llvm_unreachable("unknown cast resulting in complex value");
}
bool ComplexExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
if (E->isPtrMemOp() || E->isAssignmentOp() || E->getOpcode() == BO_Comma)
return ExprEvaluatorBaseTy::VisitBinaryOperator(E);
bool LHSOK = Visit(E->getLHS());
if (!LHSOK && !Info.keepEvaluatingAfterFailure())
return false;
ComplexValue RHS;
if (!EvaluateComplex(E->getRHS(), RHS, Info) || !LHSOK)
return false;
assert(Result.isComplexFloat() == RHS.isComplexFloat() &&
"Invalid operands to binary operator.");
switch (E->getOpcode()) {
default: return Error(E);
case BO_Add:
if (Result.isComplexFloat()) {
Result.getComplexFloatReal().add(RHS.getComplexFloatReal(),
APFloat::rmNearestTiesToEven);
Result.getComplexFloatImag().add(RHS.getComplexFloatImag(),
APFloat::rmNearestTiesToEven);
} else {
Result.getComplexIntReal() += RHS.getComplexIntReal();
Result.getComplexIntImag() += RHS.getComplexIntImag();
}
break;
case BO_Sub:
if (Result.isComplexFloat()) {
Result.getComplexFloatReal().subtract(RHS.getComplexFloatReal(),
APFloat::rmNearestTiesToEven);
Result.getComplexFloatImag().subtract(RHS.getComplexFloatImag(),
APFloat::rmNearestTiesToEven);
} else {
Result.getComplexIntReal() -= RHS.getComplexIntReal();
Result.getComplexIntImag() -= RHS.getComplexIntImag();
}
break;
case BO_Mul:
if (Result.isComplexFloat()) {
ComplexValue LHS = Result;
APFloat &LHS_r = LHS.getComplexFloatReal();
APFloat &LHS_i = LHS.getComplexFloatImag();
APFloat &RHS_r = RHS.getComplexFloatReal();
APFloat &RHS_i = RHS.getComplexFloatImag();
APFloat Tmp = LHS_r;
Tmp.multiply(RHS_r, APFloat::rmNearestTiesToEven);
Result.getComplexFloatReal() = Tmp;
Tmp = LHS_i;
Tmp.multiply(RHS_i, APFloat::rmNearestTiesToEven);
Result.getComplexFloatReal().subtract(Tmp, APFloat::rmNearestTiesToEven);
Tmp = LHS_r;
Tmp.multiply(RHS_i, APFloat::rmNearestTiesToEven);
Result.getComplexFloatImag() = Tmp;
Tmp = LHS_i;
Tmp.multiply(RHS_r, APFloat::rmNearestTiesToEven);
Result.getComplexFloatImag().add(Tmp, APFloat::rmNearestTiesToEven);
} else {
ComplexValue LHS = Result;
Result.getComplexIntReal() =
(LHS.getComplexIntReal() * RHS.getComplexIntReal() -
LHS.getComplexIntImag() * RHS.getComplexIntImag());
Result.getComplexIntImag() =
(LHS.getComplexIntReal() * RHS.getComplexIntImag() +
LHS.getComplexIntImag() * RHS.getComplexIntReal());
}
break;
case BO_Div:
if (Result.isComplexFloat()) {
ComplexValue LHS = Result;
APFloat &LHS_r = LHS.getComplexFloatReal();
APFloat &LHS_i = LHS.getComplexFloatImag();
APFloat &RHS_r = RHS.getComplexFloatReal();
APFloat &RHS_i = RHS.getComplexFloatImag();
APFloat &Res_r = Result.getComplexFloatReal();
APFloat &Res_i = Result.getComplexFloatImag();
APFloat Den = RHS_r;
Den.multiply(RHS_r, APFloat::rmNearestTiesToEven);
APFloat Tmp = RHS_i;
Tmp.multiply(RHS_i, APFloat::rmNearestTiesToEven);
Den.add(Tmp, APFloat::rmNearestTiesToEven);
Res_r = LHS_r;
Res_r.multiply(RHS_r, APFloat::rmNearestTiesToEven);
Tmp = LHS_i;
Tmp.multiply(RHS_i, APFloat::rmNearestTiesToEven);
Res_r.add(Tmp, APFloat::rmNearestTiesToEven);
Res_r.divide(Den, APFloat::rmNearestTiesToEven);
Res_i = LHS_i;
Res_i.multiply(RHS_r, APFloat::rmNearestTiesToEven);
Tmp = LHS_r;
Tmp.multiply(RHS_i, APFloat::rmNearestTiesToEven);
Res_i.subtract(Tmp, APFloat::rmNearestTiesToEven);
Res_i.divide(Den, APFloat::rmNearestTiesToEven);
} else {
if (RHS.getComplexIntReal() == 0 && RHS.getComplexIntImag() == 0)
return Error(E, diag::note_expr_divide_by_zero);
ComplexValue LHS = Result;
APSInt Den = RHS.getComplexIntReal() * RHS.getComplexIntReal() +
RHS.getComplexIntImag() * RHS.getComplexIntImag();
Result.getComplexIntReal() =
(LHS.getComplexIntReal() * RHS.getComplexIntReal() +
LHS.getComplexIntImag() * RHS.getComplexIntImag()) / Den;
Result.getComplexIntImag() =
(LHS.getComplexIntImag() * RHS.getComplexIntReal() -
LHS.getComplexIntReal() * RHS.getComplexIntImag()) / Den;
}
break;
}
return true;
}
bool ComplexExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) {
// Get the operand value into 'Result'.
if (!Visit(E->getSubExpr()))
return false;
switch (E->getOpcode()) {
default:
return Error(E);
case UO_Extension:
return true;
case UO_Plus:
// The result is always just the subexpr.
return true;
case UO_Minus:
if (Result.isComplexFloat()) {
Result.getComplexFloatReal().changeSign();
Result.getComplexFloatImag().changeSign();
}
else {
Result.getComplexIntReal() = -Result.getComplexIntReal();
Result.getComplexIntImag() = -Result.getComplexIntImag();
}
return true;
case UO_Not:
if (Result.isComplexFloat())
Result.getComplexFloatImag().changeSign();
else
Result.getComplexIntImag() = -Result.getComplexIntImag();
return true;
}
}
bool ComplexExprEvaluator::VisitInitListExpr(const InitListExpr *E) {
if (E->getNumInits() == 2) {
if (E->getType()->isComplexType()) {
Result.makeComplexFloat();
if (!EvaluateFloat(E->getInit(0), Result.FloatReal, Info))
return false;
if (!EvaluateFloat(E->getInit(1), Result.FloatImag, Info))
return false;
} else {
Result.makeComplexInt();
if (!EvaluateInteger(E->getInit(0), Result.IntReal, Info))
return false;
if (!EvaluateInteger(E->getInit(1), Result.IntImag, Info))
return false;
}
return true;
}
return ExprEvaluatorBaseTy::VisitInitListExpr(E);
}
//===----------------------------------------------------------------------===//
// Atomic expression evaluation, essentially just handling the NonAtomicToAtomic
// implicit conversion.
//===----------------------------------------------------------------------===//
namespace {
class AtomicExprEvaluator :
public ExprEvaluatorBase<AtomicExprEvaluator, bool> {
APValue &Result;
public:
AtomicExprEvaluator(EvalInfo &Info, APValue &Result)
: ExprEvaluatorBaseTy(Info), Result(Result) {}
bool Success(const APValue &V, const Expr *E) {
Result = V;
return true;
}
bool ZeroInitialization(const Expr *E) {
ImplicitValueInitExpr VIE(
E->getType()->castAs<AtomicType>()->getValueType());
return Evaluate(Result, Info, &VIE);
}
bool VisitCastExpr(const CastExpr *E) {
switch (E->getCastKind()) {
default:
return ExprEvaluatorBaseTy::VisitCastExpr(E);
case CK_NonAtomicToAtomic:
return Evaluate(Result, Info, E->getSubExpr());
}
}
};
} // end anonymous namespace
static bool EvaluateAtomic(const Expr *E, APValue &Result, EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isAtomicType());
return AtomicExprEvaluator(Info, Result).Visit(E);
}
//===----------------------------------------------------------------------===//
// Void expression evaluation, primarily for a cast to void on the LHS of a
// comma operator
//===----------------------------------------------------------------------===//
namespace {
class VoidExprEvaluator
: public ExprEvaluatorBase<VoidExprEvaluator, bool> {
public:
VoidExprEvaluator(EvalInfo &Info) : ExprEvaluatorBaseTy(Info) {}
bool Success(const APValue &V, const Expr *e) { return true; }
bool VisitCastExpr(const CastExpr *E) {
switch (E->getCastKind()) {
default:
return ExprEvaluatorBaseTy::VisitCastExpr(E);
case CK_ToVoid:
VisitIgnoredValue(E->getSubExpr());
return true;
}
}
};
} // end anonymous namespace
static bool EvaluateVoid(const Expr *E, EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isVoidType());
return VoidExprEvaluator(Info).Visit(E);
}
//===----------------------------------------------------------------------===//
// Top level Expr::EvaluateAsRValue method.
//===----------------------------------------------------------------------===//
static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E) {
// In C, function designators are not lvalues, but we evaluate them as if they
// are.
QualType T = E->getType();
if (E->isGLValue() || T->isFunctionType()) {
LValue LV;
if (!EvaluateLValue(E, LV, Info))
return false;
LV.moveInto(Result);
} else if (T->isVectorType()) {
if (!EvaluateVector(E, Result, Info))
return false;
} else if (T->isIntegralOrEnumerationType()) {
if (!IntExprEvaluator(Info, Result).Visit(E))
return false;
} else if (T->hasPointerRepresentation()) {
LValue LV;
if (!EvaluatePointer(E, LV, Info))
return false;
LV.moveInto(Result);
} else if (T->isRealFloatingType()) {
llvm::APFloat F(0.0);
if (!EvaluateFloat(E, F, Info))
return false;
Result = APValue(F);
} else if (T->isAnyComplexType()) {
ComplexValue C;
if (!EvaluateComplex(E, C, Info))
return false;
C.moveInto(Result);
} else if (T->isMemberPointerType()) {
MemberPtr P;
if (!EvaluateMemberPointer(E, P, Info))
return false;
P.moveInto(Result);
return true;
} else if (T->isArrayType()) {
LValue LV;
LV.set(E, Info.CurrentCall->Index);
APValue &Value = Info.CurrentCall->createTemporary(E, false);
if (!EvaluateArray(E, LV, Value, Info))
return false;
Result = Value;
} else if (T->isRecordType()) {
LValue LV;
LV.set(E, Info.CurrentCall->Index);
APValue &Value = Info.CurrentCall->createTemporary(E, false);
if (!EvaluateRecord(E, LV, Value, Info))
return false;
Result = Value;
} else if (T->isVoidType()) {
if (!Info.getLangOpts().CPlusPlus11)
Info.CCEDiag(E, diag::note_constexpr_nonliteral)
<< E->getType();
if (!EvaluateVoid(E, Info))
return false;
} else if (T->isAtomicType()) {
if (!EvaluateAtomic(E, Result, Info))
return false;
} else if (Info.getLangOpts().CPlusPlus11) {
Info.Diag(E, diag::note_constexpr_nonliteral) << E->getType();
return false;
} else {
Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}
return true;
}
/// EvaluateInPlace - Evaluate an expression in-place in an APValue. In some
/// cases, the in-place evaluation is essential, since later initializers for
/// an object can indirectly refer to subobjects which were initialized earlier.
static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This,
const Expr *E, bool AllowNonLiteralTypes) {
if (!AllowNonLiteralTypes && !CheckLiteralType(Info, E, &This))
return false;
if (E->isRValue()) {
// Evaluate arrays and record types in-place, so that later initializers can
// refer to earlier-initialized members of the object.
if (E->getType()->isArrayType())
return EvaluateArray(E, This, Result, Info);
else if (E->getType()->isRecordType())
return EvaluateRecord(E, This, Result, Info);
}
// For any other type, in-place evaluation is unimportant.
return Evaluate(Result, Info, E);
}
/// EvaluateAsRValue - Try to evaluate this expression, performing an implicit
/// lvalue-to-rvalue cast if it is an lvalue.
static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result) {
if (!CheckLiteralType(Info, E))
return false;
if (!::Evaluate(Result, Info, E))
return false;
if (E->isGLValue()) {
LValue LV;
LV.setFrom(Info.Ctx, Result);
if (!handleLValueToRValueConversion(Info, E, E->getType(), LV, Result))
return false;
}
// Check this core constant expression is a constant expression.
return CheckConstantExpression(Info, E->getExprLoc(), E->getType(), Result);
}
static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result,
const ASTContext &Ctx, bool &IsConst) {
// Fast-path evaluations of integer literals, since we sometimes see files
// containing vast quantities of these.
if (const IntegerLiteral *L = dyn_cast<IntegerLiteral>(Exp)) {
Result.Val = APValue(APSInt(L->getValue(),
L->getType()->isUnsignedIntegerType()));
IsConst = true;
return true;
}
// FIXME: Evaluating values of large array and record types can cause
// performance problems. Only do so in C++11 for now.
if (Exp->isRValue() && (Exp->getType()->isArrayType() ||
Exp->getType()->isRecordType()) &&
!Ctx.getLangOpts().CPlusPlus11) {
IsConst = false;
return true;
}
return false;
}
/// EvaluateAsRValue - Return true if this is a constant which we can fold using
/// any crazy technique (that has nothing to do with language standards) that
/// we want to. If this function returns true, it returns the folded constant
/// in Result. If this expression is a glvalue, an lvalue-to-rvalue conversion
/// will be applied to the result.
bool Expr::EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx) const {
bool IsConst;
if (FastEvaluateAsRValue(this, Result, Ctx, IsConst))
return IsConst;
EvalInfo Info(Ctx, Result, EvalInfo::EM_IgnoreSideEffects);
return ::EvaluateAsRValue(Info, this, Result.Val);
}
bool Expr::EvaluateAsBooleanCondition(bool &Result,
const ASTContext &Ctx) const {
EvalResult Scratch;
return EvaluateAsRValue(Scratch, Ctx) &&
HandleConversionToBool(Scratch.Val, Result);
}
bool Expr::EvaluateAsInt(APSInt &Result, const ASTContext &Ctx,
SideEffectsKind AllowSideEffects) const {
if (!getType()->isIntegralOrEnumerationType())
return false;
EvalResult ExprResult;
if (!EvaluateAsRValue(ExprResult, Ctx) || !ExprResult.Val.isInt() ||
(!AllowSideEffects && ExprResult.HasSideEffects))
return false;
Result = ExprResult.Val.getInt();
return true;
}
bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx) const {
EvalInfo Info(Ctx, Result, EvalInfo::EM_ConstantFold);
LValue LV;
if (!EvaluateLValue(this, LV, Info) || Result.HasSideEffects ||
!CheckLValueConstantExpression(Info, getExprLoc(),
Ctx.getLValueReferenceType(getType()), LV))
return false;
LV.moveInto(Result.Val);
return true;
}
bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx,
const VarDecl *VD,
SmallVectorImpl<PartialDiagnosticAt> &Notes) const {
// FIXME: Evaluating initializers for large array and record types can cause
// performance problems. Only do so in C++11 for now.
if (isRValue() && (getType()->isArrayType() || getType()->isRecordType()) &&
!Ctx.getLangOpts().CPlusPlus11)
return false;
Expr::EvalStatus EStatus;
EStatus.Diag = &Notes;
EvalInfo InitInfo(Ctx, EStatus, EvalInfo::EM_ConstantFold);
InitInfo.setEvaluatingDecl(VD, Value);
LValue LVal;
LVal.set(VD);
// C++11 [basic.start.init]p2:
// Variables with static storage duration or thread storage duration shall be
// zero-initialized before any other initialization takes place.
// This behavior is not present in C.
if (Ctx.getLangOpts().CPlusPlus && !VD->hasLocalStorage() &&
!VD->getType()->isReferenceType()) {
ImplicitValueInitExpr VIE(VD->getType());
if (!EvaluateInPlace(Value, InitInfo, LVal, &VIE,
/*AllowNonLiteralTypes=*/true))
return false;
}
if (!EvaluateInPlace(Value, InitInfo, LVal, this,
/*AllowNonLiteralTypes=*/true) ||
EStatus.HasSideEffects)
return false;
return CheckConstantExpression(InitInfo, VD->getLocation(), VD->getType(),
Value);
}
/// isEvaluatable - Call EvaluateAsRValue to see if this expression can be
/// constant folded, but discard the result.
bool Expr::isEvaluatable(const ASTContext &Ctx) const {
EvalResult Result;
return EvaluateAsRValue(Result, Ctx) && !Result.HasSideEffects;
}
APSInt Expr::EvaluateKnownConstInt(const ASTContext &Ctx,
SmallVectorImpl<PartialDiagnosticAt> *Diag) const {
EvalResult EvalResult;
EvalResult.Diag = Diag;
bool Result = EvaluateAsRValue(EvalResult, Ctx);
(void)Result;
assert(Result && "Could not evaluate expression");
assert(EvalResult.Val.isInt() && "Expression did not evaluate to integer");
return EvalResult.Val.getInt();
}
void Expr::EvaluateForOverflow(const ASTContext &Ctx) const {
bool IsConst;
EvalResult EvalResult;
if (!FastEvaluateAsRValue(this, EvalResult, Ctx, IsConst)) {
EvalInfo Info(Ctx, EvalResult, EvalInfo::EM_EvaluateForOverflow);
(void)::EvaluateAsRValue(Info, this, EvalResult.Val);
}
}
bool Expr::EvalResult::isGlobalLValue() const {
assert(Val.isLValue());
return IsGlobalLValue(Val.getLValueBase());
}
/// isIntegerConstantExpr - this recursive routine will test if an expression is
/// an integer constant expression.
/// FIXME: Pass up a reason why! Invalid operation in i-c-e, division by zero,
/// comma, etc
// CheckICE - This function does the fundamental ICE checking: the returned
// ICEDiag contains an ICEKind indicating whether the expression is an ICE,
// and a (possibly null) SourceLocation indicating the location of the problem.
//
// Note that to reduce code duplication, this helper does no evaluation
// itself; the caller checks whether the expression is evaluatable, and
// in the rare cases where CheckICE actually cares about the evaluated
// value, it calls into Evalute.
namespace {
enum ICEKind {
/// This expression is an ICE.
IK_ICE,
/// This expression is not an ICE, but if it isn't evaluated, it's
/// a legal subexpression for an ICE. This return value is used to handle
/// the comma operator in C99 mode, and non-constant subexpressions.
IK_ICEIfUnevaluated,
/// This expression is not an ICE, and is not a legal subexpression for one.
IK_NotICE
};
struct ICEDiag {
ICEKind Kind;
SourceLocation Loc;
ICEDiag(ICEKind IK, SourceLocation l) : Kind(IK), Loc(l) {}
};
}
static ICEDiag NoDiag() { return ICEDiag(IK_ICE, SourceLocation()); }
static ICEDiag Worst(ICEDiag A, ICEDiag B) { return A.Kind >= B.Kind ? A : B; }
static ICEDiag CheckEvalInICE(const Expr* E, const ASTContext &Ctx) {
Expr::EvalResult EVResult;
if (!E->EvaluateAsRValue(EVResult, Ctx) || EVResult.HasSideEffects ||
!EVResult.Val.isInt())
return ICEDiag(IK_NotICE, E->getLocStart());
return NoDiag();
}
static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
assert(!E->isValueDependent() && "Should not see value dependent exprs!");
if (!E->getType()->isIntegralOrEnumerationType())
return ICEDiag(IK_NotICE, E->getLocStart());
switch (E->getStmtClass()) {
#define ABSTRACT_STMT(Node)
#define STMT(Node, Base) case Expr::Node##Class:
#define EXPR(Node, Base)
#include "clang/AST/StmtNodes.inc"
case Expr::PredefinedExprClass:
case Expr::FloatingLiteralClass:
case Expr::ImaginaryLiteralClass:
case Expr::StringLiteralClass:
case Expr::ArraySubscriptExprClass:
case Expr::MemberExprClass:
case Expr::CompoundAssignOperatorClass:
case Expr::CompoundLiteralExprClass:
case Expr::ExtVectorElementExprClass:
case Expr::DesignatedInitExprClass:
case Expr::ImplicitValueInitExprClass:
case Expr::ParenListExprClass:
case Expr::VAArgExprClass:
case Expr::AddrLabelExprClass:
case Expr::StmtExprClass:
case Expr::CXXMemberCallExprClass:
case Expr::CUDAKernelCallExprClass:
case Expr::CXXDynamicCastExprClass:
case Expr::CXXTypeidExprClass:
case Expr::CXXUuidofExprClass:
case Expr::MSPropertyRefExprClass:
case Expr::CXXNullPtrLiteralExprClass:
case Expr::UserDefinedLiteralClass:
case Expr::CXXThisExprClass:
case Expr::CXXThrowExprClass:
case Expr::CXXNewExprClass:
case Expr::CXXDeleteExprClass:
case Expr::CXXPseudoDestructorExprClass:
case Expr::UnresolvedLookupExprClass:
case Expr::DependentScopeDeclRefExprClass:
case Expr::CXXConstructExprClass:
case Expr::CXXStdInitializerListExprClass:
case Expr::CXXBindTemporaryExprClass:
case Expr::ExprWithCleanupsClass:
case Expr::CXXTemporaryObjectExprClass:
case Expr::CXXUnresolvedConstructExprClass:
case Expr::CXXDependentScopeMemberExprClass:
case Expr::UnresolvedMemberExprClass:
case Expr::ObjCStringLiteralClass:
case Expr::ObjCBoxedExprClass:
case Expr::ObjCArrayLiteralClass:
case Expr::ObjCDictionaryLiteralClass:
case Expr::ObjCEncodeExprClass:
case Expr::ObjCMessageExprClass:
case Expr::ObjCSelectorExprClass:
case Expr::ObjCProtocolExprClass:
case Expr::ObjCIvarRefExprClass:
case Expr::ObjCPropertyRefExprClass:
case Expr::ObjCSubscriptRefExprClass:
case Expr::ObjCIsaExprClass:
case Expr::ShuffleVectorExprClass:
case Expr::ConvertVectorExprClass:
case Expr::BlockExprClass:
case Expr::NoStmtClass:
case Expr::OpaqueValueExprClass:
case Expr::PackExpansionExprClass:
case Expr::SubstNonTypeTemplateParmPackExprClass:
case Expr::FunctionParmPackExprClass:
case Expr::AsTypeExprClass:
case Expr::ObjCIndirectCopyRestoreExprClass:
case Expr::MaterializeTemporaryExprClass:
case Expr::PseudoObjectExprClass:
case Expr::AtomicExprClass:
case Expr::InitListExprClass:
case Expr::LambdaExprClass:
return ICEDiag(IK_NotICE, E->getLocStart());
case Expr::SizeOfPackExprClass:
case Expr::GNUNullExprClass:
// GCC considers the GNU __null value to be an integral constant expression.
return NoDiag();
case Expr::SubstNonTypeTemplateParmExprClass:
return
CheckICE(cast<SubstNonTypeTemplateParmExpr>(E)->getReplacement(), Ctx);
case Expr::ParenExprClass:
return CheckICE(cast<ParenExpr>(E)->getSubExpr(), Ctx);
case Expr::GenericSelectionExprClass:
return CheckICE(cast<GenericSelectionExpr>(E)->getResultExpr(), Ctx);
case Expr::IntegerLiteralClass:
case Expr::CharacterLiteralClass:
case Expr::ObjCBoolLiteralExprClass:
case Expr::CXXBoolLiteralExprClass:
case Expr::CXXScalarValueInitExprClass:
case Expr::UnaryTypeTraitExprClass:
case Expr::BinaryTypeTraitExprClass:
case Expr::TypeTraitExprClass:
case Expr::ArrayTypeTraitExprClass:
case Expr::ExpressionTraitExprClass:
case Expr::CXXNoexceptExprClass:
return NoDiag();
case Expr::CallExprClass:
case Expr::CXXOperatorCallExprClass: {
// C99 6.6/3 allows function calls within unevaluated subexpressions of
// constant expressions, but they can never be ICEs because an ICE cannot
// contain an operand of (pointer to) function type.
const CallExpr *CE = cast<CallExpr>(E);
if (CE->isBuiltinCall())
return CheckEvalInICE(E, Ctx);
return ICEDiag(IK_NotICE, E->getLocStart());
}
case Expr::DeclRefExprClass: {
if (isa<EnumConstantDecl>(cast<DeclRefExpr>(E)->getDecl()))
return NoDiag();
const ValueDecl *D = dyn_cast<ValueDecl>(cast<DeclRefExpr>(E)->getDecl());
if (Ctx.getLangOpts().CPlusPlus &&
D && IsConstNonVolatile(D->getType())) {
// Parameter variables are never constants. Without this check,
// getAnyInitializer() can find a default argument, which leads
// to chaos.
if (isa<ParmVarDecl>(D))
return ICEDiag(IK_NotICE, cast<DeclRefExpr>(E)->getLocation());
// C++ 7.1.5.1p2
// A variable of non-volatile const-qualified integral or enumeration
// type initialized by an ICE can be used in ICEs.
if (const VarDecl *Dcl = dyn_cast<VarDecl>(D)) {
if (!Dcl->getType()->isIntegralOrEnumerationType())
return ICEDiag(IK_NotICE, cast<DeclRefExpr>(E)->getLocation());
const VarDecl *VD;
// Look for a declaration of this variable that has an initializer, and
// check whether it is an ICE.
if (Dcl->getAnyInitializer(VD) && VD->checkInitIsICE())
return NoDiag();
else
return ICEDiag(IK_NotICE, cast<DeclRefExpr>(E)->getLocation());
}
}
return ICEDiag(IK_NotICE, E->getLocStart());
}
case Expr::UnaryOperatorClass: {
const UnaryOperator *Exp = cast<UnaryOperator>(E);
switch (Exp->getOpcode()) {
case UO_PostInc:
case UO_PostDec:
case UO_PreInc:
case UO_PreDec:
case UO_AddrOf:
case UO_Deref:
// C99 6.6/3 allows increment and decrement within unevaluated
// subexpressions of constant expressions, but they can never be ICEs
// because an ICE cannot contain an lvalue operand.
return ICEDiag(IK_NotICE, E->getLocStart());
case UO_Extension:
case UO_LNot:
case UO_Plus:
case UO_Minus:
case UO_Not:
case UO_Real:
case UO_Imag:
return CheckICE(Exp->getSubExpr(), Ctx);
}
// OffsetOf falls through here.
}
case Expr::OffsetOfExprClass: {
// Note that per C99, offsetof must be an ICE. And AFAIK, using
// EvaluateAsRValue matches the proposed gcc behavior for cases like
// "offsetof(struct s{int x[4];}, x[1.0])". This doesn't affect
// compliance: we should warn earlier for offsetof expressions with
// array subscripts that aren't ICEs, and if the array subscripts
// are ICEs, the value of the offsetof must be an integer constant.
return CheckEvalInICE(E, Ctx);
}
case Expr::UnaryExprOrTypeTraitExprClass: {
const UnaryExprOrTypeTraitExpr *Exp = cast<UnaryExprOrTypeTraitExpr>(E);
if ((Exp->getKind() == UETT_SizeOf) &&
Exp->getTypeOfArgument()->isVariableArrayType())
return ICEDiag(IK_NotICE, E->getLocStart());
return NoDiag();
}
case Expr::BinaryOperatorClass: {
const BinaryOperator *Exp = cast<BinaryOperator>(E);
switch (Exp->getOpcode()) {
case BO_PtrMemD:
case BO_PtrMemI:
case BO_Assign:
case BO_MulAssign:
case BO_DivAssign:
case BO_RemAssign:
case BO_AddAssign:
case BO_SubAssign:
case BO_ShlAssign:
case BO_ShrAssign:
case BO_AndAssign:
case BO_XorAssign:
case BO_OrAssign:
// C99 6.6/3 allows assignments within unevaluated subexpressions of
// constant expressions, but they can never be ICEs because an ICE cannot
// contain an lvalue operand.
return ICEDiag(IK_NotICE, E->getLocStart());
case BO_Mul:
case BO_Div:
case BO_Rem:
case BO_Add:
case BO_Sub:
case BO_Shl:
case BO_Shr:
case BO_LT:
case BO_GT:
case BO_LE:
case BO_GE:
case BO_EQ:
case BO_NE:
case BO_And:
case BO_Xor:
case BO_Or:
case BO_Comma: {
ICEDiag LHSResult = CheckICE(Exp->getLHS(), Ctx);
ICEDiag RHSResult = CheckICE(Exp->getRHS(), Ctx);
if (Exp->getOpcode() == BO_Div ||
Exp->getOpcode() == BO_Rem) {
// EvaluateAsRValue gives an error for undefined Div/Rem, so make sure
// we don't evaluate one.
if (LHSResult.Kind == IK_ICE && RHSResult.Kind == IK_ICE) {
llvm::APSInt REval = Exp->getRHS()->EvaluateKnownConstInt(Ctx);
if (REval == 0)
return ICEDiag(IK_ICEIfUnevaluated, E->getLocStart());
if (REval.isSigned() && REval.isAllOnesValue()) {
llvm::APSInt LEval = Exp->getLHS()->EvaluateKnownConstInt(Ctx);
if (LEval.isMinSignedValue())
return ICEDiag(IK_ICEIfUnevaluated, E->getLocStart());
}
}
}
if (Exp->getOpcode() == BO_Comma) {
if (Ctx.getLangOpts().C99) {
// C99 6.6p3 introduces a strange edge case: comma can be in an ICE
// if it isn't evaluated.
if (LHSResult.Kind == IK_ICE && RHSResult.Kind == IK_ICE)
return ICEDiag(IK_ICEIfUnevaluated, E->getLocStart());
} else {
// In both C89 and C++, commas in ICEs are illegal.
return ICEDiag(IK_NotICE, E->getLocStart());
}
}
return Worst(LHSResult, RHSResult);
}
case BO_LAnd:
case BO_LOr: {
ICEDiag LHSResult = CheckICE(Exp->getLHS(), Ctx);
ICEDiag RHSResult = CheckICE(Exp->getRHS(), Ctx);
if (LHSResult.Kind == IK_ICE && RHSResult.Kind == IK_ICEIfUnevaluated) {
// Rare case where the RHS has a comma "side-effect"; we need
// to actually check the condition to see whether the side
// with the comma is evaluated.
if ((Exp->getOpcode() == BO_LAnd) !=
(Exp->getLHS()->EvaluateKnownConstInt(Ctx) == 0))
return RHSResult;
return NoDiag();
}
return Worst(LHSResult, RHSResult);
}
}
}
case Expr::ImplicitCastExprClass:
case Expr::CStyleCastExprClass:
case Expr::CXXFunctionalCastExprClass:
case Expr::CXXStaticCastExprClass:
case Expr::CXXReinterpretCastExprClass:
case Expr::CXXConstCastExprClass:
case Expr::ObjCBridgedCastExprClass: {
const Expr *SubExpr = cast<CastExpr>(E)->getSubExpr();
if (isa<ExplicitCastExpr>(E)) {
if (const FloatingLiteral *FL
= dyn_cast<FloatingLiteral>(SubExpr->IgnoreParenImpCasts())) {
unsigned DestWidth = Ctx.getIntWidth(E->getType());
bool DestSigned = E->getType()->isSignedIntegerOrEnumerationType();
APSInt IgnoredVal(DestWidth, !DestSigned);
bool Ignored;
// If the value does not fit in the destination type, the behavior is
// undefined, so we are not required to treat it as a constant
// expression.
if (FL->getValue().convertToInteger(IgnoredVal,
llvm::APFloat::rmTowardZero,
&Ignored) & APFloat::opInvalidOp)
return ICEDiag(IK_NotICE, E->getLocStart());
return NoDiag();
}
}
switch (cast<CastExpr>(E)->getCastKind()) {
case CK_LValueToRValue:
case CK_AtomicToNonAtomic:
case CK_NonAtomicToAtomic:
case CK_NoOp:
case CK_IntegralToBoolean:
case CK_IntegralCast:
return CheckICE(SubExpr, Ctx);
default:
return ICEDiag(IK_NotICE, E->getLocStart());
}
}
case Expr::BinaryConditionalOperatorClass: {
const BinaryConditionalOperator *Exp = cast<BinaryConditionalOperator>(E);
ICEDiag CommonResult = CheckICE(Exp->getCommon(), Ctx);
if (CommonResult.Kind == IK_NotICE) return CommonResult;
ICEDiag FalseResult = CheckICE(Exp->getFalseExpr(), Ctx);
if (FalseResult.Kind == IK_NotICE) return FalseResult;
if (CommonResult.Kind == IK_ICEIfUnevaluated) return CommonResult;
if (FalseResult.Kind == IK_ICEIfUnevaluated &&
Exp->getCommon()->EvaluateKnownConstInt(Ctx) != 0) return NoDiag();
return FalseResult;
}
case Expr::ConditionalOperatorClass: {
const ConditionalOperator *Exp = cast<ConditionalOperator>(E);
// If the condition (ignoring parens) is a __builtin_constant_p call,
// then only the true side is actually considered in an integer constant
// expression, and it is fully evaluated. This is an important GNU
// extension. See GCC PR38377 for discussion.
if (const CallExpr *CallCE
= dyn_cast<CallExpr>(Exp->getCond()->IgnoreParenCasts()))
if (CallCE->isBuiltinCall() == Builtin::BI__builtin_constant_p)
return CheckEvalInICE(E, Ctx);
ICEDiag CondResult = CheckICE(Exp->getCond(), Ctx);
if (CondResult.Kind == IK_NotICE)
return CondResult;
ICEDiag TrueResult = CheckICE(Exp->getTrueExpr(), Ctx);
ICEDiag FalseResult = CheckICE(Exp->getFalseExpr(), Ctx);
if (TrueResult.Kind == IK_NotICE)
return TrueResult;
if (FalseResult.Kind == IK_NotICE)
return FalseResult;
if (CondResult.Kind == IK_ICEIfUnevaluated)
return CondResult;
if (TrueResult.Kind == IK_ICE && FalseResult.Kind == IK_ICE)
return NoDiag();
// Rare case where the diagnostics depend on which side is evaluated
// Note that if we get here, CondResult is 0, and at least one of
// TrueResult and FalseResult is non-zero.
if (Exp->getCond()->EvaluateKnownConstInt(Ctx) == 0)
return FalseResult;
return TrueResult;
}
case Expr::CXXDefaultArgExprClass:
return CheckICE(cast<CXXDefaultArgExpr>(E)->getExpr(), Ctx);
case Expr::CXXDefaultInitExprClass:
return CheckICE(cast<CXXDefaultInitExpr>(E)->getExpr(), Ctx);
case Expr::ChooseExprClass: {
return CheckICE(cast<ChooseExpr>(E)->getChosenSubExpr(), Ctx);
}
}
llvm_unreachable("Invalid StmtClass!");
}
/// Evaluate an expression as a C++11 integral constant expression.
static bool EvaluateCPlusPlus11IntegralConstantExpr(const ASTContext &Ctx,
const Expr *E,
llvm::APSInt *Value,
SourceLocation *Loc) {
if (!E->getType()->isIntegralOrEnumerationType()) {
if (Loc) *Loc = E->getExprLoc();
return false;
}
APValue Result;
if (!E->isCXX11ConstantExpr(Ctx, &Result, Loc))
return false;
assert(Result.isInt() && "pointer cast to int is not an ICE");
if (Value) *Value = Result.getInt();
return true;
}
bool Expr::isIntegerConstantExpr(const ASTContext &Ctx,
SourceLocation *Loc) const {
if (Ctx.getLangOpts().CPlusPlus11)
return EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, 0, Loc);
ICEDiag D = CheckICE(this, Ctx);
if (D.Kind != IK_ICE) {
if (Loc) *Loc = D.Loc;
return false;
}
return true;
}
bool Expr::isIntegerConstantExpr(llvm::APSInt &Value, const ASTContext &Ctx,
SourceLocation *Loc, bool isEvaluated) const {
if (Ctx.getLangOpts().CPlusPlus11)
return EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, &Value, Loc);
if (!isIntegerConstantExpr(Ctx, Loc))
return false;
if (!EvaluateAsInt(Value, Ctx))
llvm_unreachable("ICE cannot be evaluated!");
return true;
}
bool Expr::isCXX98IntegralConstantExpr(const ASTContext &Ctx) const {
return CheckICE(this, Ctx).Kind == IK_ICE;
}
bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result,
SourceLocation *Loc) const {
// We support this checking in C++98 mode in order to diagnose compatibility
// issues.
assert(Ctx.getLangOpts().CPlusPlus);
// Build evaluation settings.
Expr::EvalStatus Status;
SmallVector<PartialDiagnosticAt, 8> Diags;
Status.Diag = &Diags;
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression);
APValue Scratch;
bool IsConstExpr = ::EvaluateAsRValue(Info, this, Result ? *Result : Scratch);
if (!Diags.empty()) {
IsConstExpr = false;
if (Loc) *Loc = Diags[0].first;
} else if (!IsConstExpr) {
// FIXME: This shouldn't happen.
if (Loc) *Loc = getExprLoc();
}
return IsConstExpr;
}
bool Expr::isPotentialConstantExpr(const FunctionDecl *FD,
SmallVectorImpl<
PartialDiagnosticAt> &Diags) {
// FIXME: It would be useful to check constexpr function templates, but at the
// moment the constant expression evaluator cannot cope with the non-rigorous
// ASTs which we build for dependent expressions.
if (FD->isDependentContext())
return true;
Expr::EvalStatus Status;
Status.Diag = &Diags;
EvalInfo Info(FD->getASTContext(), Status,
EvalInfo::EM_PotentialConstantExpression);
const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
const CXXRecordDecl *RD = MD ? MD->getParent()->getCanonicalDecl() : 0;
// Fabricate an arbitrary expression on the stack and pretend that it
// is a temporary being used as the 'this' pointer.
LValue This;
ImplicitValueInitExpr VIE(RD ? Info.Ctx.getRecordType(RD) : Info.Ctx.IntTy);
This.set(&VIE, Info.CurrentCall->Index);
ArrayRef<const Expr*> Args;
SourceLocation Loc = FD->getLocation();
APValue Scratch;
if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD)) {
// Evaluate the call as a constant initializer, to allow the construction
// of objects of non-literal types.
Info.setEvaluatingDecl(This.getLValueBase(), Scratch);
HandleConstructorCall(Loc, This, Args, CD, Info, Scratch);
} else
HandleFunctionCall(Loc, FD, (MD && MD->isInstance()) ? &This : 0,
Args, FD->getBody(), Info, Scratch);
return Diags.empty();
}
Index: head/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp (revision 265925)
@@ -1,2011 +1,2013 @@
//===--- StmtPrinter.cpp - Printing implementation for Stmt ASTs ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Stmt::dumpPretty/Stmt::printPretty methods, which
// pretty print the AST back out to C code.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/PrettyPrinter.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/CharInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Format.h"
using namespace clang;
//===----------------------------------------------------------------------===//
// StmtPrinter Visitor
//===----------------------------------------------------------------------===//
namespace {
class StmtPrinter : public StmtVisitor<StmtPrinter> {
raw_ostream &OS;
unsigned IndentLevel;
clang::PrinterHelper* Helper;
PrintingPolicy Policy;
public:
StmtPrinter(raw_ostream &os, PrinterHelper* helper,
const PrintingPolicy &Policy,
unsigned Indentation = 0)
: OS(os), IndentLevel(Indentation), Helper(helper), Policy(Policy) {}
void PrintStmt(Stmt *S) {
PrintStmt(S, Policy.Indentation);
}
void PrintStmt(Stmt *S, int SubIndent) {
IndentLevel += SubIndent;
if (S && isa<Expr>(S)) {
// If this is an expr used in a stmt context, indent and newline it.
Indent();
Visit(S);
OS << ";\n";
} else if (S) {
Visit(S);
} else {
Indent() << "<<<NULL STATEMENT>>>\n";
}
IndentLevel -= SubIndent;
}
void PrintRawCompoundStmt(CompoundStmt *S);
void PrintRawDecl(Decl *D);
void PrintRawDeclStmt(const DeclStmt *S);
void PrintRawIfStmt(IfStmt *If);
void PrintRawCXXCatchStmt(CXXCatchStmt *Catch);
void PrintCallArgs(CallExpr *E);
void PrintRawSEHExceptHandler(SEHExceptStmt *S);
void PrintRawSEHFinallyStmt(SEHFinallyStmt *S);
void PrintExpr(Expr *E) {
if (E)
Visit(E);
else
OS << "<null expr>";
}
raw_ostream &Indent(int Delta = 0) {
for (int i = 0, e = IndentLevel+Delta; i < e; ++i)
OS << " ";
return OS;
}
void Visit(Stmt* S) {
if (Helper && Helper->handledStmt(S,OS))
return;
else StmtVisitor<StmtPrinter>::Visit(S);
}
void VisitStmt(Stmt *Node) LLVM_ATTRIBUTE_UNUSED {
Indent() << "<<unknown stmt type>>\n";
}
void VisitExpr(Expr *Node) LLVM_ATTRIBUTE_UNUSED {
OS << "<<unknown expr type>>";
}
void VisitCXXNamedCastExpr(CXXNamedCastExpr *Node);
#define ABSTRACT_STMT(CLASS)
#define STMT(CLASS, PARENT) \
void Visit##CLASS(CLASS *Node);
#include "clang/AST/StmtNodes.inc"
};
}
//===----------------------------------------------------------------------===//
// Stmt printing methods.
//===----------------------------------------------------------------------===//
/// PrintRawCompoundStmt - Print a compound stmt without indenting the {, and
/// with no newline after the }.
void StmtPrinter::PrintRawCompoundStmt(CompoundStmt *Node) {
OS << "{\n";
for (CompoundStmt::body_iterator I = Node->body_begin(), E = Node->body_end();
I != E; ++I)
PrintStmt(*I);
Indent() << "}";
}
void StmtPrinter::PrintRawDecl(Decl *D) {
D->print(OS, Policy, IndentLevel);
}
void StmtPrinter::PrintRawDeclStmt(const DeclStmt *S) {
DeclStmt::const_decl_iterator Begin = S->decl_begin(), End = S->decl_end();
SmallVector<Decl*, 2> Decls;
for ( ; Begin != End; ++Begin)
Decls.push_back(*Begin);
Decl::printGroup(Decls.data(), Decls.size(), OS, Policy, IndentLevel);
}
void StmtPrinter::VisitNullStmt(NullStmt *Node) {
Indent() << ";\n";
}
void StmtPrinter::VisitDeclStmt(DeclStmt *Node) {
Indent();
PrintRawDeclStmt(Node);
OS << ";\n";
}
void StmtPrinter::VisitCompoundStmt(CompoundStmt *Node) {
Indent();
PrintRawCompoundStmt(Node);
OS << "\n";
}
void StmtPrinter::VisitCaseStmt(CaseStmt *Node) {
Indent(-1) << "case ";
PrintExpr(Node->getLHS());
if (Node->getRHS()) {
OS << " ... ";
PrintExpr(Node->getRHS());
}
OS << ":\n";
PrintStmt(Node->getSubStmt(), 0);
}
void StmtPrinter::VisitDefaultStmt(DefaultStmt *Node) {
Indent(-1) << "default:\n";
PrintStmt(Node->getSubStmt(), 0);
}
void StmtPrinter::VisitLabelStmt(LabelStmt *Node) {
Indent(-1) << Node->getName() << ":\n";
PrintStmt(Node->getSubStmt(), 0);
}
void StmtPrinter::VisitAttributedStmt(AttributedStmt *Node) {
OS << "[[";
bool first = true;
for (ArrayRef<const Attr*>::iterator it = Node->getAttrs().begin(),
end = Node->getAttrs().end();
it != end; ++it) {
if (!first) {
OS << ", ";
first = false;
}
// TODO: check this
(*it)->printPretty(OS, Policy);
}
OS << "]] ";
PrintStmt(Node->getSubStmt(), 0);
}
void StmtPrinter::PrintRawIfStmt(IfStmt *If) {
OS << "if (";
if (const DeclStmt *DS = If->getConditionVariableDeclStmt())
PrintRawDeclStmt(DS);
else
PrintExpr(If->getCond());
OS << ')';
if (CompoundStmt *CS = dyn_cast<CompoundStmt>(If->getThen())) {
OS << ' ';
PrintRawCompoundStmt(CS);
OS << (If->getElse() ? ' ' : '\n');
} else {
OS << '\n';
PrintStmt(If->getThen());
if (If->getElse()) Indent();
}
if (Stmt *Else = If->getElse()) {
OS << "else";
if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Else)) {
OS << ' ';
PrintRawCompoundStmt(CS);
OS << '\n';
} else if (IfStmt *ElseIf = dyn_cast<IfStmt>(Else)) {
OS << ' ';
PrintRawIfStmt(ElseIf);
} else {
OS << '\n';
PrintStmt(If->getElse());
}
}
}
void StmtPrinter::VisitIfStmt(IfStmt *If) {
Indent();
PrintRawIfStmt(If);
}
void StmtPrinter::VisitSwitchStmt(SwitchStmt *Node) {
Indent() << "switch (";
if (const DeclStmt *DS = Node->getConditionVariableDeclStmt())
PrintRawDeclStmt(DS);
else
PrintExpr(Node->getCond());
OS << ")";
// Pretty print compoundstmt bodies (very common).
if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Node->getBody())) {
OS << " ";
PrintRawCompoundStmt(CS);
OS << "\n";
} else {
OS << "\n";
PrintStmt(Node->getBody());
}
}
void StmtPrinter::VisitWhileStmt(WhileStmt *Node) {
Indent() << "while (";
if (const DeclStmt *DS = Node->getConditionVariableDeclStmt())
PrintRawDeclStmt(DS);
else
PrintExpr(Node->getCond());
OS << ")\n";
PrintStmt(Node->getBody());
}
void StmtPrinter::VisitDoStmt(DoStmt *Node) {
Indent() << "do ";
if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Node->getBody())) {
PrintRawCompoundStmt(CS);
OS << " ";
} else {
OS << "\n";
PrintStmt(Node->getBody());
Indent();
}
OS << "while (";
PrintExpr(Node->getCond());
OS << ");\n";
}
void StmtPrinter::VisitForStmt(ForStmt *Node) {
Indent() << "for (";
if (Node->getInit()) {
if (DeclStmt *DS = dyn_cast<DeclStmt>(Node->getInit()))
PrintRawDeclStmt(DS);
else
PrintExpr(cast<Expr>(Node->getInit()));
}
OS << ";";
if (Node->getCond()) {
OS << " ";
PrintExpr(Node->getCond());
}
OS << ";";
if (Node->getInc()) {
OS << " ";
PrintExpr(Node->getInc());
}
OS << ") ";
if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Node->getBody())) {
PrintRawCompoundStmt(CS);
OS << "\n";
} else {
OS << "\n";
PrintStmt(Node->getBody());
}
}
void StmtPrinter::VisitObjCForCollectionStmt(ObjCForCollectionStmt *Node) {
Indent() << "for (";
if (DeclStmt *DS = dyn_cast<DeclStmt>(Node->getElement()))
PrintRawDeclStmt(DS);
else
PrintExpr(cast<Expr>(Node->getElement()));
OS << " in ";
PrintExpr(Node->getCollection());
OS << ") ";
if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Node->getBody())) {
PrintRawCompoundStmt(CS);
OS << "\n";
} else {
OS << "\n";
PrintStmt(Node->getBody());
}
}
void StmtPrinter::VisitCXXForRangeStmt(CXXForRangeStmt *Node) {
Indent() << "for (";
PrintingPolicy SubPolicy(Policy);
SubPolicy.SuppressInitializers = true;
Node->getLoopVariable()->print(OS, SubPolicy, IndentLevel);
OS << " : ";
PrintExpr(Node->getRangeInit());
OS << ") {\n";
PrintStmt(Node->getBody());
Indent() << "}\n";
}
void StmtPrinter::VisitMSDependentExistsStmt(MSDependentExistsStmt *Node) {
Indent();
if (Node->isIfExists())
OS << "__if_exists (";
else
OS << "__if_not_exists (";
if (NestedNameSpecifier *Qualifier
= Node->getQualifierLoc().getNestedNameSpecifier())
Qualifier->print(OS, Policy);
OS << Node->getNameInfo() << ") ";
PrintRawCompoundStmt(Node->getSubStmt());
}
void StmtPrinter::VisitGotoStmt(GotoStmt *Node) {
Indent() << "goto " << Node->getLabel()->getName() << ";\n";
}
void StmtPrinter::VisitIndirectGotoStmt(IndirectGotoStmt *Node) {
Indent() << "goto *";
PrintExpr(Node->getTarget());
OS << ";\n";
}
void StmtPrinter::VisitContinueStmt(ContinueStmt *Node) {
Indent() << "continue;\n";
}
void StmtPrinter::VisitBreakStmt(BreakStmt *Node) {
Indent() << "break;\n";
}
void StmtPrinter::VisitReturnStmt(ReturnStmt *Node) {
Indent() << "return";
if (Node->getRetValue()) {
OS << " ";
PrintExpr(Node->getRetValue());
}
OS << ";\n";
}
void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
Indent() << "asm ";
if (Node->isVolatile())
OS << "volatile ";
OS << "(";
VisitStringLiteral(Node->getAsmString());
// Outputs
if (Node->getNumOutputs() != 0 || Node->getNumInputs() != 0 ||
Node->getNumClobbers() != 0)
OS << " : ";
for (unsigned i = 0, e = Node->getNumOutputs(); i != e; ++i) {
if (i != 0)
OS << ", ";
if (!Node->getOutputName(i).empty()) {
OS << '[';
OS << Node->getOutputName(i);
OS << "] ";
}
VisitStringLiteral(Node->getOutputConstraintLiteral(i));
OS << " ";
Visit(Node->getOutputExpr(i));
}
// Inputs
if (Node->getNumInputs() != 0 || Node->getNumClobbers() != 0)
OS << " : ";
for (unsigned i = 0, e = Node->getNumInputs(); i != e; ++i) {
if (i != 0)
OS << ", ";
if (!Node->getInputName(i).empty()) {
OS << '[';
OS << Node->getInputName(i);
OS << "] ";
}
VisitStringLiteral(Node->getInputConstraintLiteral(i));
OS << " ";
Visit(Node->getInputExpr(i));
}
// Clobbers
if (Node->getNumClobbers() != 0)
OS << " : ";
for (unsigned i = 0, e = Node->getNumClobbers(); i != e; ++i) {
if (i != 0)
OS << ", ";
VisitStringLiteral(Node->getClobberStringLiteral(i));
}
OS << ");\n";
}
void StmtPrinter::VisitMSAsmStmt(MSAsmStmt *Node) {
// FIXME: Implement MS style inline asm statement printer.
Indent() << "__asm ";
if (Node->hasBraces())
OS << "{\n";
OS << Node->getAsmString() << "\n";
if (Node->hasBraces())
Indent() << "}\n";
}
void StmtPrinter::VisitCapturedStmt(CapturedStmt *Node) {
PrintStmt(Node->getCapturedDecl()->getBody());
}
void StmtPrinter::VisitObjCAtTryStmt(ObjCAtTryStmt *Node) {
Indent() << "@try";
if (CompoundStmt *TS = dyn_cast<CompoundStmt>(Node->getTryBody())) {
PrintRawCompoundStmt(TS);
OS << "\n";
}
for (unsigned I = 0, N = Node->getNumCatchStmts(); I != N; ++I) {
ObjCAtCatchStmt *catchStmt = Node->getCatchStmt(I);
Indent() << "@catch(";
if (catchStmt->getCatchParamDecl()) {
if (Decl *DS = catchStmt->getCatchParamDecl())
PrintRawDecl(DS);
}
OS << ")";
if (CompoundStmt *CS = dyn_cast<CompoundStmt>(catchStmt->getCatchBody())) {
PrintRawCompoundStmt(CS);
OS << "\n";
}
}
if (ObjCAtFinallyStmt *FS = static_cast<ObjCAtFinallyStmt *>(
Node->getFinallyStmt())) {
Indent() << "@finally";
PrintRawCompoundStmt(dyn_cast<CompoundStmt>(FS->getFinallyBody()));
OS << "\n";
}
}
void StmtPrinter::VisitObjCAtFinallyStmt(ObjCAtFinallyStmt *Node) {
}
void StmtPrinter::VisitObjCAtCatchStmt (ObjCAtCatchStmt *Node) {
Indent() << "@catch (...) { /* todo */ } \n";
}
void StmtPrinter::VisitObjCAtThrowStmt(ObjCAtThrowStmt *Node) {
Indent() << "@throw";
if (Node->getThrowExpr()) {
OS << " ";
PrintExpr(Node->getThrowExpr());
}
OS << ";\n";
}
void StmtPrinter::VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *Node) {
Indent() << "@synchronized (";
PrintExpr(Node->getSynchExpr());
OS << ")";
PrintRawCompoundStmt(Node->getSynchBody());
OS << "\n";
}
void StmtPrinter::VisitObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *Node) {
Indent() << "@autoreleasepool";
PrintRawCompoundStmt(dyn_cast<CompoundStmt>(Node->getSubStmt()));
OS << "\n";
}
void StmtPrinter::PrintRawCXXCatchStmt(CXXCatchStmt *Node) {
OS << "catch (";
if (Decl *ExDecl = Node->getExceptionDecl())
PrintRawDecl(ExDecl);
else
OS << "...";
OS << ") ";
PrintRawCompoundStmt(cast<CompoundStmt>(Node->getHandlerBlock()));
}
void StmtPrinter::VisitCXXCatchStmt(CXXCatchStmt *Node) {
Indent();
PrintRawCXXCatchStmt(Node);
OS << "\n";
}
void StmtPrinter::VisitCXXTryStmt(CXXTryStmt *Node) {
Indent() << "try ";
PrintRawCompoundStmt(Node->getTryBlock());
for (unsigned i = 0, e = Node->getNumHandlers(); i < e; ++i) {
OS << " ";
PrintRawCXXCatchStmt(Node->getHandler(i));
}
OS << "\n";
}
void StmtPrinter::VisitSEHTryStmt(SEHTryStmt *Node) {
Indent() << (Node->getIsCXXTry() ? "try " : "__try ");
PrintRawCompoundStmt(Node->getTryBlock());
SEHExceptStmt *E = Node->getExceptHandler();
SEHFinallyStmt *F = Node->getFinallyHandler();
if(E)
PrintRawSEHExceptHandler(E);
else {
assert(F && "Must have a finally block...");
PrintRawSEHFinallyStmt(F);
}
OS << "\n";
}
void StmtPrinter::PrintRawSEHFinallyStmt(SEHFinallyStmt *Node) {
OS << "__finally ";
PrintRawCompoundStmt(Node->getBlock());
OS << "\n";
}
void StmtPrinter::PrintRawSEHExceptHandler(SEHExceptStmt *Node) {
OS << "__except (";
VisitExpr(Node->getFilterExpr());
OS << ")\n";
PrintRawCompoundStmt(Node->getBlock());
OS << "\n";
}
void StmtPrinter::VisitSEHExceptStmt(SEHExceptStmt *Node) {
Indent();
PrintRawSEHExceptHandler(Node);
OS << "\n";
}
void StmtPrinter::VisitSEHFinallyStmt(SEHFinallyStmt *Node) {
Indent();
PrintRawSEHFinallyStmt(Node);
OS << "\n";
}
//===----------------------------------------------------------------------===//
// OpenMP clauses printing methods
//===----------------------------------------------------------------------===//
namespace {
class OMPClausePrinter : public OMPClauseVisitor<OMPClausePrinter> {
raw_ostream &OS;
/// \brief Process clauses with list of variables.
template <typename T>
void VisitOMPClauseList(T *Node, char StartSym);
public:
OMPClausePrinter(raw_ostream &OS) : OS(OS) { }
#define OPENMP_CLAUSE(Name, Class) \
void Visit##Class(Class *S);
#include "clang/Basic/OpenMPKinds.def"
};
void OMPClausePrinter::VisitOMPDefaultClause(OMPDefaultClause *Node) {
OS << "default("
<< getOpenMPSimpleClauseTypeName(OMPC_default, Node->getDefaultKind())
<< ")";
}
template<typename T>
void OMPClausePrinter::VisitOMPClauseList(T *Node, char StartSym) {
for (typename T::varlist_iterator I = Node->varlist_begin(),
E = Node->varlist_end();
I != E; ++I)
OS << (I == Node->varlist_begin() ? StartSym : ',')
<< *cast<NamedDecl>(cast<DeclRefExpr>(*I)->getDecl());
}
void OMPClausePrinter::VisitOMPPrivateClause(OMPPrivateClause *Node) {
if (!Node->varlist_empty()) {
OS << "private";
VisitOMPClauseList(Node, '(');
OS << ")";
}
}
void OMPClausePrinter::VisitOMPFirstprivateClause(OMPFirstprivateClause *Node) {
if (!Node->varlist_empty()) {
OS << "firstprivate";
VisitOMPClauseList(Node, '(');
OS << ")";
}
}
void OMPClausePrinter::VisitOMPSharedClause(OMPSharedClause *Node) {
if (!Node->varlist_empty()) {
OS << "shared";
VisitOMPClauseList(Node, '(');
OS << ")";
}
}
}
//===----------------------------------------------------------------------===//
// OpenMP directives printing methods
//===----------------------------------------------------------------------===//
void StmtPrinter::VisitOMPParallelDirective(OMPParallelDirective *Node) {
Indent() << "#pragma omp parallel ";
OMPClausePrinter Printer(OS);
ArrayRef<OMPClause *> Clauses = Node->clauses();
for (ArrayRef<OMPClause *>::iterator I = Clauses.begin(), E = Clauses.end();
I != E; ++I)
if (*I && !(*I)->isImplicit()) {
Printer.Visit(*I);
OS << ' ';
}
OS << "\n";
if (Node->getAssociatedStmt()) {
assert(isa<CapturedStmt>(Node->getAssociatedStmt()) &&
"Expected captured statement!");
Stmt *CS = cast<CapturedStmt>(Node->getAssociatedStmt())->getCapturedStmt();
PrintStmt(CS);
}
}
//===----------------------------------------------------------------------===//
// Expr printing methods.
//===----------------------------------------------------------------------===//
void StmtPrinter::VisitDeclRefExpr(DeclRefExpr *Node) {
if (NestedNameSpecifier *Qualifier = Node->getQualifier())
Qualifier->print(OS, Policy);
if (Node->hasTemplateKeyword())
OS << "template ";
OS << Node->getNameInfo();
if (Node->hasExplicitTemplateArgs())
TemplateSpecializationType::PrintTemplateArgumentList(
OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
}
void StmtPrinter::VisitDependentScopeDeclRefExpr(
DependentScopeDeclRefExpr *Node) {
if (NestedNameSpecifier *Qualifier = Node->getQualifier())
Qualifier->print(OS, Policy);
if (Node->hasTemplateKeyword())
OS << "template ";
OS << Node->getNameInfo();
if (Node->hasExplicitTemplateArgs())
TemplateSpecializationType::PrintTemplateArgumentList(
OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
}
void StmtPrinter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *Node) {
if (Node->getQualifier())
Node->getQualifier()->print(OS, Policy);
if (Node->hasTemplateKeyword())
OS << "template ";
OS << Node->getNameInfo();
if (Node->hasExplicitTemplateArgs())
TemplateSpecializationType::PrintTemplateArgumentList(
OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
}
void StmtPrinter::VisitObjCIvarRefExpr(ObjCIvarRefExpr *Node) {
if (Node->getBase()) {
PrintExpr(Node->getBase());
OS << (Node->isArrow() ? "->" : ".");
}
OS << *Node->getDecl();
}
void StmtPrinter::VisitObjCPropertyRefExpr(ObjCPropertyRefExpr *Node) {
if (Node->isSuperReceiver())
OS << "super.";
- else if (Node->getBase()) {
+ else if (Node->isObjectReceiver() && Node->getBase()) {
PrintExpr(Node->getBase());
OS << ".";
+ } else if (Node->isClassReceiver() && Node->getClassReceiver()) {
+ OS << Node->getClassReceiver()->getName() << ".";
}
if (Node->isImplicitProperty())
OS << Node->getImplicitPropertyGetter()->getSelector().getAsString();
else
OS << Node->getExplicitProperty()->getName();
}
void StmtPrinter::VisitObjCSubscriptRefExpr(ObjCSubscriptRefExpr *Node) {
PrintExpr(Node->getBaseExpr());
OS << "[";
PrintExpr(Node->getKeyExpr());
OS << "]";
}
void StmtPrinter::VisitPredefinedExpr(PredefinedExpr *Node) {
switch (Node->getIdentType()) {
default:
llvm_unreachable("unknown case");
case PredefinedExpr::Func:
OS << "__func__";
break;
case PredefinedExpr::Function:
OS << "__FUNCTION__";
break;
case PredefinedExpr::FuncDName:
OS << "__FUNCDNAME__";
break;
case PredefinedExpr::LFunction:
OS << "L__FUNCTION__";
break;
case PredefinedExpr::PrettyFunction:
OS << "__PRETTY_FUNCTION__";
break;
}
}
void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) {
unsigned value = Node->getValue();
switch (Node->getKind()) {
case CharacterLiteral::Ascii: break; // no prefix.
case CharacterLiteral::Wide: OS << 'L'; break;
case CharacterLiteral::UTF16: OS << 'u'; break;
case CharacterLiteral::UTF32: OS << 'U'; break;
}
switch (value) {
case '\\':
OS << "'\\\\'";
break;
case '\'':
OS << "'\\''";
break;
case '\a':
// TODO: K&R: the meaning of '\\a' is different in traditional C
OS << "'\\a'";
break;
case '\b':
OS << "'\\b'";
break;
// Nonstandard escape sequence.
/*case '\e':
OS << "'\\e'";
break;*/
case '\f':
OS << "'\\f'";
break;
case '\n':
OS << "'\\n'";
break;
case '\r':
OS << "'\\r'";
break;
case '\t':
OS << "'\\t'";
break;
case '\v':
OS << "'\\v'";
break;
default:
if (value < 256 && isPrintable((unsigned char)value))
OS << "'" << (char)value << "'";
else if (value < 256)
OS << "'\\x" << llvm::format("%02x", value) << "'";
else if (value <= 0xFFFF)
OS << "'\\u" << llvm::format("%04x", value) << "'";
else
OS << "'\\U" << llvm::format("%08x", value) << "'";
}
}
void StmtPrinter::VisitIntegerLiteral(IntegerLiteral *Node) {
bool isSigned = Node->getType()->isSignedIntegerType();
OS << Node->getValue().toString(10, isSigned);
// Emit suffixes. Integer literals are always a builtin integer type.
switch (Node->getType()->getAs<BuiltinType>()->getKind()) {
default: llvm_unreachable("Unexpected type for integer literal!");
// FIXME: The Short and UShort cases are to handle cases where a short
// integeral literal is formed during template instantiation. They should
// be removed when template instantiation no longer needs integer literals.
case BuiltinType::Short:
case BuiltinType::UShort:
case BuiltinType::Int: break; // no suffix.
case BuiltinType::UInt: OS << 'U'; break;
case BuiltinType::Long: OS << 'L'; break;
case BuiltinType::ULong: OS << "UL"; break;
case BuiltinType::LongLong: OS << "LL"; break;
case BuiltinType::ULongLong: OS << "ULL"; break;
case BuiltinType::Int128: OS << "i128"; break;
case BuiltinType::UInt128: OS << "Ui128"; break;
}
}
static void PrintFloatingLiteral(raw_ostream &OS, FloatingLiteral *Node,
bool PrintSuffix) {
SmallString<16> Str;
Node->getValue().toString(Str);
OS << Str;
if (Str.find_first_not_of("-0123456789") == StringRef::npos)
OS << '.'; // Trailing dot in order to separate from ints.
if (!PrintSuffix)
return;
// Emit suffixes. Float literals are always a builtin float type.
switch (Node->getType()->getAs<BuiltinType>()->getKind()) {
default: llvm_unreachable("Unexpected type for float literal!");
case BuiltinType::Half: break; // FIXME: suffix?
case BuiltinType::Double: break; // no suffix.
case BuiltinType::Float: OS << 'F'; break;
case BuiltinType::LongDouble: OS << 'L'; break;
}
}
void StmtPrinter::VisitFloatingLiteral(FloatingLiteral *Node) {
PrintFloatingLiteral(OS, Node, /*PrintSuffix=*/true);
}
void StmtPrinter::VisitImaginaryLiteral(ImaginaryLiteral *Node) {
PrintExpr(Node->getSubExpr());
OS << "i";
}
void StmtPrinter::VisitStringLiteral(StringLiteral *Str) {
Str->outputString(OS);
}
void StmtPrinter::VisitParenExpr(ParenExpr *Node) {
OS << "(";
PrintExpr(Node->getSubExpr());
OS << ")";
}
void StmtPrinter::VisitUnaryOperator(UnaryOperator *Node) {
if (!Node->isPostfix()) {
OS << UnaryOperator::getOpcodeStr(Node->getOpcode());
// Print a space if this is an "identifier operator" like __real, or if
// it might be concatenated incorrectly like '+'.
switch (Node->getOpcode()) {
default: break;
case UO_Real:
case UO_Imag:
case UO_Extension:
OS << ' ';
break;
case UO_Plus:
case UO_Minus:
if (isa<UnaryOperator>(Node->getSubExpr()))
OS << ' ';
break;
}
}
PrintExpr(Node->getSubExpr());
if (Node->isPostfix())
OS << UnaryOperator::getOpcodeStr(Node->getOpcode());
}
void StmtPrinter::VisitOffsetOfExpr(OffsetOfExpr *Node) {
OS << "__builtin_offsetof(";
Node->getTypeSourceInfo()->getType().print(OS, Policy);
OS << ", ";
bool PrintedSomething = false;
for (unsigned i = 0, n = Node->getNumComponents(); i < n; ++i) {
OffsetOfExpr::OffsetOfNode ON = Node->getComponent(i);
if (ON.getKind() == OffsetOfExpr::OffsetOfNode::Array) {
// Array node
OS << "[";
PrintExpr(Node->getIndexExpr(ON.getArrayExprIndex()));
OS << "]";
PrintedSomething = true;
continue;
}
// Skip implicit base indirections.
if (ON.getKind() == OffsetOfExpr::OffsetOfNode::Base)
continue;
// Field or identifier node.
IdentifierInfo *Id = ON.getFieldName();
if (!Id)
continue;
if (PrintedSomething)
OS << ".";
else
PrintedSomething = true;
OS << Id->getName();
}
OS << ")";
}
void StmtPrinter::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node){
switch(Node->getKind()) {
case UETT_SizeOf:
OS << "sizeof";
break;
case UETT_AlignOf:
if (Policy.LangOpts.CPlusPlus)
OS << "alignof";
else if (Policy.LangOpts.C11)
OS << "_Alignof";
else
OS << "__alignof";
break;
case UETT_VecStep:
OS << "vec_step";
break;
}
if (Node->isArgumentType()) {
OS << '(';
Node->getArgumentType().print(OS, Policy);
OS << ')';
} else {
OS << " ";
PrintExpr(Node->getArgumentExpr());
}
}
void StmtPrinter::VisitGenericSelectionExpr(GenericSelectionExpr *Node) {
OS << "_Generic(";
PrintExpr(Node->getControllingExpr());
for (unsigned i = 0; i != Node->getNumAssocs(); ++i) {
OS << ", ";
QualType T = Node->getAssocType(i);
if (T.isNull())
OS << "default";
else
T.print(OS, Policy);
OS << ": ";
PrintExpr(Node->getAssocExpr(i));
}
OS << ")";
}
void StmtPrinter::VisitArraySubscriptExpr(ArraySubscriptExpr *Node) {
PrintExpr(Node->getLHS());
OS << "[";
PrintExpr(Node->getRHS());
OS << "]";
}
void StmtPrinter::PrintCallArgs(CallExpr *Call) {
for (unsigned i = 0, e = Call->getNumArgs(); i != e; ++i) {
if (isa<CXXDefaultArgExpr>(Call->getArg(i))) {
// Don't print any defaulted arguments
break;
}
if (i) OS << ", ";
PrintExpr(Call->getArg(i));
}
}
void StmtPrinter::VisitCallExpr(CallExpr *Call) {
PrintExpr(Call->getCallee());
OS << "(";
PrintCallArgs(Call);
OS << ")";
}
void StmtPrinter::VisitMemberExpr(MemberExpr *Node) {
// FIXME: Suppress printing implicit bases (like "this")
PrintExpr(Node->getBase());
MemberExpr *ParentMember = dyn_cast<MemberExpr>(Node->getBase());
FieldDecl *ParentDecl = ParentMember
? dyn_cast<FieldDecl>(ParentMember->getMemberDecl()) : NULL;
if (!ParentDecl || !ParentDecl->isAnonymousStructOrUnion())
OS << (Node->isArrow() ? "->" : ".");
if (FieldDecl *FD = dyn_cast<FieldDecl>(Node->getMemberDecl()))
if (FD->isAnonymousStructOrUnion())
return;
if (NestedNameSpecifier *Qualifier = Node->getQualifier())
Qualifier->print(OS, Policy);
if (Node->hasTemplateKeyword())
OS << "template ";
OS << Node->getMemberNameInfo();
if (Node->hasExplicitTemplateArgs())
TemplateSpecializationType::PrintTemplateArgumentList(
OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
}
void StmtPrinter::VisitObjCIsaExpr(ObjCIsaExpr *Node) {
PrintExpr(Node->getBase());
OS << (Node->isArrow() ? "->isa" : ".isa");
}
void StmtPrinter::VisitExtVectorElementExpr(ExtVectorElementExpr *Node) {
PrintExpr(Node->getBase());
OS << ".";
OS << Node->getAccessor().getName();
}
void StmtPrinter::VisitCStyleCastExpr(CStyleCastExpr *Node) {
OS << '(';
Node->getTypeAsWritten().print(OS, Policy);
OS << ')';
PrintExpr(Node->getSubExpr());
}
void StmtPrinter::VisitCompoundLiteralExpr(CompoundLiteralExpr *Node) {
OS << '(';
Node->getType().print(OS, Policy);
OS << ')';
PrintExpr(Node->getInitializer());
}
void StmtPrinter::VisitImplicitCastExpr(ImplicitCastExpr *Node) {
// No need to print anything, simply forward to the sub expression.
PrintExpr(Node->getSubExpr());
}
void StmtPrinter::VisitBinaryOperator(BinaryOperator *Node) {
PrintExpr(Node->getLHS());
OS << " " << BinaryOperator::getOpcodeStr(Node->getOpcode()) << " ";
PrintExpr(Node->getRHS());
}
void StmtPrinter::VisitCompoundAssignOperator(CompoundAssignOperator *Node) {
PrintExpr(Node->getLHS());
OS << " " << BinaryOperator::getOpcodeStr(Node->getOpcode()) << " ";
PrintExpr(Node->getRHS());
}
void StmtPrinter::VisitConditionalOperator(ConditionalOperator *Node) {
PrintExpr(Node->getCond());
OS << " ? ";
PrintExpr(Node->getLHS());
OS << " : ";
PrintExpr(Node->getRHS());
}
// GNU extensions.
void
StmtPrinter::VisitBinaryConditionalOperator(BinaryConditionalOperator *Node) {
PrintExpr(Node->getCommon());
OS << " ?: ";
PrintExpr(Node->getFalseExpr());
}
void StmtPrinter::VisitAddrLabelExpr(AddrLabelExpr *Node) {
OS << "&&" << Node->getLabel()->getName();
}
void StmtPrinter::VisitStmtExpr(StmtExpr *E) {
OS << "(";
PrintRawCompoundStmt(E->getSubStmt());
OS << ")";
}
void StmtPrinter::VisitChooseExpr(ChooseExpr *Node) {
OS << "__builtin_choose_expr(";
PrintExpr(Node->getCond());
OS << ", ";
PrintExpr(Node->getLHS());
OS << ", ";
PrintExpr(Node->getRHS());
OS << ")";
}
void StmtPrinter::VisitGNUNullExpr(GNUNullExpr *) {
OS << "__null";
}
void StmtPrinter::VisitShuffleVectorExpr(ShuffleVectorExpr *Node) {
OS << "__builtin_shufflevector(";
for (unsigned i = 0, e = Node->getNumSubExprs(); i != e; ++i) {
if (i) OS << ", ";
PrintExpr(Node->getExpr(i));
}
OS << ")";
}
void StmtPrinter::VisitConvertVectorExpr(ConvertVectorExpr *Node) {
OS << "__builtin_convertvector(";
PrintExpr(Node->getSrcExpr());
OS << ", ";
Node->getType().print(OS, Policy);
OS << ")";
}
void StmtPrinter::VisitInitListExpr(InitListExpr* Node) {
if (Node->getSyntacticForm()) {
Visit(Node->getSyntacticForm());
return;
}
OS << "{ ";
for (unsigned i = 0, e = Node->getNumInits(); i != e; ++i) {
if (i) OS << ", ";
if (Node->getInit(i))
PrintExpr(Node->getInit(i));
else
OS << "0";
}
OS << " }";
}
void StmtPrinter::VisitParenListExpr(ParenListExpr* Node) {
OS << "( ";
for (unsigned i = 0, e = Node->getNumExprs(); i != e; ++i) {
if (i) OS << ", ";
PrintExpr(Node->getExpr(i));
}
OS << " )";
}
void StmtPrinter::VisitDesignatedInitExpr(DesignatedInitExpr *Node) {
for (DesignatedInitExpr::designators_iterator D = Node->designators_begin(),
DEnd = Node->designators_end();
D != DEnd; ++D) {
if (D->isFieldDesignator()) {
if (D->getDotLoc().isInvalid())
OS << D->getFieldName()->getName() << ":";
else
OS << "." << D->getFieldName()->getName();
} else {
OS << "[";
if (D->isArrayDesignator()) {
PrintExpr(Node->getArrayIndex(*D));
} else {
PrintExpr(Node->getArrayRangeStart(*D));
OS << " ... ";
PrintExpr(Node->getArrayRangeEnd(*D));
}
OS << "]";
}
}
OS << " = ";
PrintExpr(Node->getInit());
}
void StmtPrinter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *Node) {
if (Policy.LangOpts.CPlusPlus) {
OS << "/*implicit*/";
Node->getType().print(OS, Policy);
OS << "()";
} else {
OS << "/*implicit*/(";
Node->getType().print(OS, Policy);
OS << ')';
if (Node->getType()->isRecordType())
OS << "{}";
else
OS << 0;
}
}
void StmtPrinter::VisitVAArgExpr(VAArgExpr *Node) {
OS << "__builtin_va_arg(";
PrintExpr(Node->getSubExpr());
OS << ", ";
Node->getType().print(OS, Policy);
OS << ")";
}
void StmtPrinter::VisitPseudoObjectExpr(PseudoObjectExpr *Node) {
PrintExpr(Node->getSyntacticForm());
}
void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
const char *Name = 0;
switch (Node->getOp()) {
#define BUILTIN(ID, TYPE, ATTRS)
#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
case AtomicExpr::AO ## ID: \
Name = #ID "("; \
break;
#include "clang/Basic/Builtins.def"
}
OS << Name;
// AtomicExpr stores its subexpressions in a permuted order.
PrintExpr(Node->getPtr());
if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
Node->getOp() != AtomicExpr::AO__atomic_load_n) {
OS << ", ";
PrintExpr(Node->getVal1());
}
if (Node->getOp() == AtomicExpr::AO__atomic_exchange ||
Node->isCmpXChg()) {
OS << ", ";
PrintExpr(Node->getVal2());
}
if (Node->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
Node->getOp() == AtomicExpr::AO__atomic_compare_exchange_n) {
OS << ", ";
PrintExpr(Node->getWeak());
}
if (Node->getOp() != AtomicExpr::AO__c11_atomic_init) {
OS << ", ";
PrintExpr(Node->getOrder());
}
if (Node->isCmpXChg()) {
OS << ", ";
PrintExpr(Node->getOrderFail());
}
OS << ")";
}
// C++
void StmtPrinter::VisitCXXOperatorCallExpr(CXXOperatorCallExpr *Node) {
const char *OpStrings[NUM_OVERLOADED_OPERATORS] = {
"",
#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
Spelling,
#include "clang/Basic/OperatorKinds.def"
};
OverloadedOperatorKind Kind = Node->getOperator();
if (Kind == OO_PlusPlus || Kind == OO_MinusMinus) {
if (Node->getNumArgs() == 1) {
OS << OpStrings[Kind] << ' ';
PrintExpr(Node->getArg(0));
} else {
PrintExpr(Node->getArg(0));
OS << ' ' << OpStrings[Kind];
}
} else if (Kind == OO_Arrow) {
PrintExpr(Node->getArg(0));
} else if (Kind == OO_Call) {
PrintExpr(Node->getArg(0));
OS << '(';
for (unsigned ArgIdx = 1; ArgIdx < Node->getNumArgs(); ++ArgIdx) {
if (ArgIdx > 1)
OS << ", ";
if (!isa<CXXDefaultArgExpr>(Node->getArg(ArgIdx)))
PrintExpr(Node->getArg(ArgIdx));
}
OS << ')';
} else if (Kind == OO_Subscript) {
PrintExpr(Node->getArg(0));
OS << '[';
PrintExpr(Node->getArg(1));
OS << ']';
} else if (Node->getNumArgs() == 1) {
OS << OpStrings[Kind] << ' ';
PrintExpr(Node->getArg(0));
} else if (Node->getNumArgs() == 2) {
PrintExpr(Node->getArg(0));
OS << ' ' << OpStrings[Kind] << ' ';
PrintExpr(Node->getArg(1));
} else {
llvm_unreachable("unknown overloaded operator");
}
}
void StmtPrinter::VisitCXXMemberCallExpr(CXXMemberCallExpr *Node) {
VisitCallExpr(cast<CallExpr>(Node));
}
void StmtPrinter::VisitCUDAKernelCallExpr(CUDAKernelCallExpr *Node) {
PrintExpr(Node->getCallee());
OS << "<<<";
PrintCallArgs(Node->getConfig());
OS << ">>>(";
PrintCallArgs(Node);
OS << ")";
}
void StmtPrinter::VisitCXXNamedCastExpr(CXXNamedCastExpr *Node) {
OS << Node->getCastName() << '<';
Node->getTypeAsWritten().print(OS, Policy);
OS << ">(";
PrintExpr(Node->getSubExpr());
OS << ")";
}
void StmtPrinter::VisitCXXStaticCastExpr(CXXStaticCastExpr *Node) {
VisitCXXNamedCastExpr(Node);
}
void StmtPrinter::VisitCXXDynamicCastExpr(CXXDynamicCastExpr *Node) {
VisitCXXNamedCastExpr(Node);
}
void StmtPrinter::VisitCXXReinterpretCastExpr(CXXReinterpretCastExpr *Node) {
VisitCXXNamedCastExpr(Node);
}
void StmtPrinter::VisitCXXConstCastExpr(CXXConstCastExpr *Node) {
VisitCXXNamedCastExpr(Node);
}
void StmtPrinter::VisitCXXTypeidExpr(CXXTypeidExpr *Node) {
OS << "typeid(";
if (Node->isTypeOperand()) {
Node->getTypeOperandSourceInfo()->getType().print(OS, Policy);
} else {
PrintExpr(Node->getExprOperand());
}
OS << ")";
}
void StmtPrinter::VisitCXXUuidofExpr(CXXUuidofExpr *Node) {
OS << "__uuidof(";
if (Node->isTypeOperand()) {
Node->getTypeOperandSourceInfo()->getType().print(OS, Policy);
} else {
PrintExpr(Node->getExprOperand());
}
OS << ")";
}
void StmtPrinter::VisitMSPropertyRefExpr(MSPropertyRefExpr *Node) {
PrintExpr(Node->getBaseExpr());
if (Node->isArrow())
OS << "->";
else
OS << ".";
if (NestedNameSpecifier *Qualifier =
Node->getQualifierLoc().getNestedNameSpecifier())
Qualifier->print(OS, Policy);
OS << Node->getPropertyDecl()->getDeclName();
}
void StmtPrinter::VisitUserDefinedLiteral(UserDefinedLiteral *Node) {
switch (Node->getLiteralOperatorKind()) {
case UserDefinedLiteral::LOK_Raw:
OS << cast<StringLiteral>(Node->getArg(0)->IgnoreImpCasts())->getString();
break;
case UserDefinedLiteral::LOK_Template: {
DeclRefExpr *DRE = cast<DeclRefExpr>(Node->getCallee()->IgnoreImpCasts());
const TemplateArgumentList *Args =
cast<FunctionDecl>(DRE->getDecl())->getTemplateSpecializationArgs();
assert(Args);
const TemplateArgument &Pack = Args->get(0);
for (TemplateArgument::pack_iterator I = Pack.pack_begin(),
E = Pack.pack_end(); I != E; ++I) {
char C = (char)I->getAsIntegral().getZExtValue();
OS << C;
}
break;
}
case UserDefinedLiteral::LOK_Integer: {
// Print integer literal without suffix.
IntegerLiteral *Int = cast<IntegerLiteral>(Node->getCookedLiteral());
OS << Int->getValue().toString(10, /*isSigned*/false);
break;
}
case UserDefinedLiteral::LOK_Floating: {
// Print floating literal without suffix.
FloatingLiteral *Float = cast<FloatingLiteral>(Node->getCookedLiteral());
PrintFloatingLiteral(OS, Float, /*PrintSuffix=*/false);
break;
}
case UserDefinedLiteral::LOK_String:
case UserDefinedLiteral::LOK_Character:
PrintExpr(Node->getCookedLiteral());
break;
}
OS << Node->getUDSuffix()->getName();
}
void StmtPrinter::VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *Node) {
OS << (Node->getValue() ? "true" : "false");
}
void StmtPrinter::VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *Node) {
OS << "nullptr";
}
void StmtPrinter::VisitCXXThisExpr(CXXThisExpr *Node) {
OS << "this";
}
void StmtPrinter::VisitCXXThrowExpr(CXXThrowExpr *Node) {
if (Node->getSubExpr() == 0)
OS << "throw";
else {
OS << "throw ";
PrintExpr(Node->getSubExpr());
}
}
void StmtPrinter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *Node) {
// Nothing to print: we picked up the default argument.
}
void StmtPrinter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *Node) {
// Nothing to print: we picked up the default initializer.
}
void StmtPrinter::VisitCXXFunctionalCastExpr(CXXFunctionalCastExpr *Node) {
Node->getType().print(OS, Policy);
OS << "(";
PrintExpr(Node->getSubExpr());
OS << ")";
}
void StmtPrinter::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *Node) {
PrintExpr(Node->getSubExpr());
}
void StmtPrinter::VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *Node) {
Node->getType().print(OS, Policy);
OS << "(";
for (CXXTemporaryObjectExpr::arg_iterator Arg = Node->arg_begin(),
ArgEnd = Node->arg_end();
Arg != ArgEnd; ++Arg) {
if (Arg->isDefaultArgument())
break;
if (Arg != Node->arg_begin())
OS << ", ";
PrintExpr(*Arg);
}
OS << ")";
}
void StmtPrinter::VisitLambdaExpr(LambdaExpr *Node) {
OS << '[';
bool NeedComma = false;
switch (Node->getCaptureDefault()) {
case LCD_None:
break;
case LCD_ByCopy:
OS << '=';
NeedComma = true;
break;
case LCD_ByRef:
OS << '&';
NeedComma = true;
break;
}
for (LambdaExpr::capture_iterator C = Node->explicit_capture_begin(),
CEnd = Node->explicit_capture_end();
C != CEnd;
++C) {
if (NeedComma)
OS << ", ";
NeedComma = true;
switch (C->getCaptureKind()) {
case LCK_This:
OS << "this";
break;
case LCK_ByRef:
if (Node->getCaptureDefault() != LCD_ByRef || C->isInitCapture())
OS << '&';
OS << C->getCapturedVar()->getName();
break;
case LCK_ByCopy:
OS << C->getCapturedVar()->getName();
break;
}
if (C->isInitCapture())
PrintExpr(C->getCapturedVar()->getInit());
}
OS << ']';
if (Node->hasExplicitParameters()) {
OS << " (";
CXXMethodDecl *Method = Node->getCallOperator();
NeedComma = false;
for (CXXMethodDecl::param_iterator P = Method->param_begin(),
PEnd = Method->param_end();
P != PEnd; ++P) {
if (NeedComma) {
OS << ", ";
} else {
NeedComma = true;
}
std::string ParamStr = (*P)->getNameAsString();
(*P)->getOriginalType().print(OS, Policy, ParamStr);
}
if (Method->isVariadic()) {
if (NeedComma)
OS << ", ";
OS << "...";
}
OS << ')';
if (Node->isMutable())
OS << " mutable";
const FunctionProtoType *Proto
= Method->getType()->getAs<FunctionProtoType>();
Proto->printExceptionSpecification(OS, Policy);
// FIXME: Attributes
// Print the trailing return type if it was specified in the source.
if (Node->hasExplicitResultType()) {
OS << " -> ";
Proto->getResultType().print(OS, Policy);
}
}
// Print the body.
CompoundStmt *Body = Node->getBody();
OS << ' ';
PrintStmt(Body);
}
void StmtPrinter::VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *Node) {
if (TypeSourceInfo *TSInfo = Node->getTypeSourceInfo())
TSInfo->getType().print(OS, Policy);
else
Node->getType().print(OS, Policy);
OS << "()";
}
void StmtPrinter::VisitCXXNewExpr(CXXNewExpr *E) {
if (E->isGlobalNew())
OS << "::";
OS << "new ";
unsigned NumPlace = E->getNumPlacementArgs();
if (NumPlace > 0 && !isa<CXXDefaultArgExpr>(E->getPlacementArg(0))) {
OS << "(";
PrintExpr(E->getPlacementArg(0));
for (unsigned i = 1; i < NumPlace; ++i) {
if (isa<CXXDefaultArgExpr>(E->getPlacementArg(i)))
break;
OS << ", ";
PrintExpr(E->getPlacementArg(i));
}
OS << ") ";
}
if (E->isParenTypeId())
OS << "(";
std::string TypeS;
if (Expr *Size = E->getArraySize()) {
llvm::raw_string_ostream s(TypeS);
s << '[';
Size->printPretty(s, Helper, Policy);
s << ']';
}
E->getAllocatedType().print(OS, Policy, TypeS);
if (E->isParenTypeId())
OS << ")";
CXXNewExpr::InitializationStyle InitStyle = E->getInitializationStyle();
if (InitStyle) {
if (InitStyle == CXXNewExpr::CallInit)
OS << "(";
PrintExpr(E->getInitializer());
if (InitStyle == CXXNewExpr::CallInit)
OS << ")";
}
}
void StmtPrinter::VisitCXXDeleteExpr(CXXDeleteExpr *E) {
if (E->isGlobalDelete())
OS << "::";
OS << "delete ";
if (E->isArrayForm())
OS << "[] ";
PrintExpr(E->getArgument());
}
void StmtPrinter::VisitCXXPseudoDestructorExpr(CXXPseudoDestructorExpr *E) {
PrintExpr(E->getBase());
if (E->isArrow())
OS << "->";
else
OS << '.';
if (E->getQualifier())
E->getQualifier()->print(OS, Policy);
OS << "~";
if (IdentifierInfo *II = E->getDestroyedTypeIdentifier())
OS << II->getName();
else
E->getDestroyedType().print(OS, Policy);
}
void StmtPrinter::VisitCXXConstructExpr(CXXConstructExpr *E) {
if (E->isListInitialization())
OS << "{ ";
for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
if (isa<CXXDefaultArgExpr>(E->getArg(i))) {
// Don't print any defaulted arguments
break;
}
if (i) OS << ", ";
PrintExpr(E->getArg(i));
}
if (E->isListInitialization())
OS << " }";
}
void StmtPrinter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
PrintExpr(E->getSubExpr());
}
void StmtPrinter::VisitExprWithCleanups(ExprWithCleanups *E) {
// Just forward to the sub expression.
PrintExpr(E->getSubExpr());
}
void
StmtPrinter::VisitCXXUnresolvedConstructExpr(
CXXUnresolvedConstructExpr *Node) {
Node->getTypeAsWritten().print(OS, Policy);
OS << "(";
for (CXXUnresolvedConstructExpr::arg_iterator Arg = Node->arg_begin(),
ArgEnd = Node->arg_end();
Arg != ArgEnd; ++Arg) {
if (Arg != Node->arg_begin())
OS << ", ";
PrintExpr(*Arg);
}
OS << ")";
}
void StmtPrinter::VisitCXXDependentScopeMemberExpr(
CXXDependentScopeMemberExpr *Node) {
if (!Node->isImplicitAccess()) {
PrintExpr(Node->getBase());
OS << (Node->isArrow() ? "->" : ".");
}
if (NestedNameSpecifier *Qualifier = Node->getQualifier())
Qualifier->print(OS, Policy);
if (Node->hasTemplateKeyword())
OS << "template ";
OS << Node->getMemberNameInfo();
if (Node->hasExplicitTemplateArgs())
TemplateSpecializationType::PrintTemplateArgumentList(
OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
}
void StmtPrinter::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *Node) {
if (!Node->isImplicitAccess()) {
PrintExpr(Node->getBase());
OS << (Node->isArrow() ? "->" : ".");
}
if (NestedNameSpecifier *Qualifier = Node->getQualifier())
Qualifier->print(OS, Policy);
if (Node->hasTemplateKeyword())
OS << "template ";
OS << Node->getMemberNameInfo();
if (Node->hasExplicitTemplateArgs())
TemplateSpecializationType::PrintTemplateArgumentList(
OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
}
static const char *getTypeTraitName(UnaryTypeTrait UTT) {
switch (UTT) {
case UTT_HasNothrowAssign: return "__has_nothrow_assign";
case UTT_HasNothrowMoveAssign: return "__has_nothrow_move_assign";
case UTT_HasNothrowConstructor: return "__has_nothrow_constructor";
case UTT_HasNothrowCopy: return "__has_nothrow_copy";
case UTT_HasTrivialAssign: return "__has_trivial_assign";
case UTT_HasTrivialMoveAssign: return "__has_trivial_move_assign";
case UTT_HasTrivialMoveConstructor: return "__has_trivial_move_constructor";
case UTT_HasTrivialDefaultConstructor: return "__has_trivial_constructor";
case UTT_HasTrivialCopy: return "__has_trivial_copy";
case UTT_HasTrivialDestructor: return "__has_trivial_destructor";
case UTT_HasVirtualDestructor: return "__has_virtual_destructor";
case UTT_IsAbstract: return "__is_abstract";
case UTT_IsArithmetic: return "__is_arithmetic";
case UTT_IsArray: return "__is_array";
case UTT_IsClass: return "__is_class";
case UTT_IsCompleteType: return "__is_complete_type";
case UTT_IsCompound: return "__is_compound";
case UTT_IsConst: return "__is_const";
case UTT_IsEmpty: return "__is_empty";
case UTT_IsEnum: return "__is_enum";
case UTT_IsFinal: return "__is_final";
case UTT_IsFloatingPoint: return "__is_floating_point";
case UTT_IsFunction: return "__is_function";
case UTT_IsFundamental: return "__is_fundamental";
case UTT_IsIntegral: return "__is_integral";
case UTT_IsInterfaceClass: return "__is_interface_class";
case UTT_IsLiteral: return "__is_literal";
case UTT_IsLvalueReference: return "__is_lvalue_reference";
case UTT_IsMemberFunctionPointer: return "__is_member_function_pointer";
case UTT_IsMemberObjectPointer: return "__is_member_object_pointer";
case UTT_IsMemberPointer: return "__is_member_pointer";
case UTT_IsObject: return "__is_object";
case UTT_IsPOD: return "__is_pod";
case UTT_IsPointer: return "__is_pointer";
case UTT_IsPolymorphic: return "__is_polymorphic";
case UTT_IsReference: return "__is_reference";
case UTT_IsRvalueReference: return "__is_rvalue_reference";
case UTT_IsScalar: return "__is_scalar";
case UTT_IsSealed: return "__is_sealed";
case UTT_IsSigned: return "__is_signed";
case UTT_IsStandardLayout: return "__is_standard_layout";
case UTT_IsTrivial: return "__is_trivial";
case UTT_IsTriviallyCopyable: return "__is_trivially_copyable";
case UTT_IsUnion: return "__is_union";
case UTT_IsUnsigned: return "__is_unsigned";
case UTT_IsVoid: return "__is_void";
case UTT_IsVolatile: return "__is_volatile";
}
llvm_unreachable("Type trait not covered by switch statement");
}
static const char *getTypeTraitName(BinaryTypeTrait BTT) {
switch (BTT) {
case BTT_IsBaseOf: return "__is_base_of";
case BTT_IsConvertible: return "__is_convertible";
case BTT_IsSame: return "__is_same";
case BTT_TypeCompatible: return "__builtin_types_compatible_p";
case BTT_IsConvertibleTo: return "__is_convertible_to";
case BTT_IsTriviallyAssignable: return "__is_trivially_assignable";
}
llvm_unreachable("Binary type trait not covered by switch");
}
static const char *getTypeTraitName(TypeTrait TT) {
switch (TT) {
case clang::TT_IsTriviallyConstructible:return "__is_trivially_constructible";
}
llvm_unreachable("Type trait not covered by switch");
}
static const char *getTypeTraitName(ArrayTypeTrait ATT) {
switch (ATT) {
case ATT_ArrayRank: return "__array_rank";
case ATT_ArrayExtent: return "__array_extent";
}
llvm_unreachable("Array type trait not covered by switch");
}
static const char *getExpressionTraitName(ExpressionTrait ET) {
switch (ET) {
case ET_IsLValueExpr: return "__is_lvalue_expr";
case ET_IsRValueExpr: return "__is_rvalue_expr";
}
llvm_unreachable("Expression type trait not covered by switch");
}
void StmtPrinter::VisitUnaryTypeTraitExpr(UnaryTypeTraitExpr *E) {
OS << getTypeTraitName(E->getTrait()) << '(';
E->getQueriedType().print(OS, Policy);
OS << ')';
}
void StmtPrinter::VisitBinaryTypeTraitExpr(BinaryTypeTraitExpr *E) {
OS << getTypeTraitName(E->getTrait()) << '(';
E->getLhsType().print(OS, Policy);
OS << ',';
E->getRhsType().print(OS, Policy);
OS << ')';
}
void StmtPrinter::VisitTypeTraitExpr(TypeTraitExpr *E) {
OS << getTypeTraitName(E->getTrait()) << "(";
for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I) {
if (I > 0)
OS << ", ";
E->getArg(I)->getType().print(OS, Policy);
}
OS << ")";
}
void StmtPrinter::VisitArrayTypeTraitExpr(ArrayTypeTraitExpr *E) {
OS << getTypeTraitName(E->getTrait()) << '(';
E->getQueriedType().print(OS, Policy);
OS << ')';
}
void StmtPrinter::VisitExpressionTraitExpr(ExpressionTraitExpr *E) {
OS << getExpressionTraitName(E->getTrait()) << '(';
PrintExpr(E->getQueriedExpression());
OS << ')';
}
void StmtPrinter::VisitCXXNoexceptExpr(CXXNoexceptExpr *E) {
OS << "noexcept(";
PrintExpr(E->getOperand());
OS << ")";
}
void StmtPrinter::VisitPackExpansionExpr(PackExpansionExpr *E) {
PrintExpr(E->getPattern());
OS << "...";
}
void StmtPrinter::VisitSizeOfPackExpr(SizeOfPackExpr *E) {
OS << "sizeof...(" << *E->getPack() << ")";
}
void StmtPrinter::VisitSubstNonTypeTemplateParmPackExpr(
SubstNonTypeTemplateParmPackExpr *Node) {
OS << *Node->getParameterPack();
}
void StmtPrinter::VisitSubstNonTypeTemplateParmExpr(
SubstNonTypeTemplateParmExpr *Node) {
Visit(Node->getReplacement());
}
void StmtPrinter::VisitFunctionParmPackExpr(FunctionParmPackExpr *E) {
OS << *E->getParameterPack();
}
void StmtPrinter::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *Node){
PrintExpr(Node->GetTemporaryExpr());
}
// Obj-C
void StmtPrinter::VisitObjCStringLiteral(ObjCStringLiteral *Node) {
OS << "@";
VisitStringLiteral(Node->getString());
}
void StmtPrinter::VisitObjCBoxedExpr(ObjCBoxedExpr *E) {
OS << "@";
Visit(E->getSubExpr());
}
void StmtPrinter::VisitObjCArrayLiteral(ObjCArrayLiteral *E) {
OS << "@[ ";
StmtRange ch = E->children();
if (ch.first != ch.second) {
while (1) {
Visit(*ch.first);
++ch.first;
if (ch.first == ch.second) break;
OS << ", ";
}
}
OS << " ]";
}
void StmtPrinter::VisitObjCDictionaryLiteral(ObjCDictionaryLiteral *E) {
OS << "@{ ";
for (unsigned I = 0, N = E->getNumElements(); I != N; ++I) {
if (I > 0)
OS << ", ";
ObjCDictionaryElement Element = E->getKeyValueElement(I);
Visit(Element.Key);
OS << " : ";
Visit(Element.Value);
if (Element.isPackExpansion())
OS << "...";
}
OS << " }";
}
void StmtPrinter::VisitObjCEncodeExpr(ObjCEncodeExpr *Node) {
OS << "@encode(";
Node->getEncodedType().print(OS, Policy);
OS << ')';
}
void StmtPrinter::VisitObjCSelectorExpr(ObjCSelectorExpr *Node) {
OS << "@selector(" << Node->getSelector().getAsString() << ')';
}
void StmtPrinter::VisitObjCProtocolExpr(ObjCProtocolExpr *Node) {
OS << "@protocol(" << *Node->getProtocol() << ')';
}
void StmtPrinter::VisitObjCMessageExpr(ObjCMessageExpr *Mess) {
OS << "[";
switch (Mess->getReceiverKind()) {
case ObjCMessageExpr::Instance:
PrintExpr(Mess->getInstanceReceiver());
break;
case ObjCMessageExpr::Class:
Mess->getClassReceiver().print(OS, Policy);
break;
case ObjCMessageExpr::SuperInstance:
case ObjCMessageExpr::SuperClass:
OS << "Super";
break;
}
OS << ' ';
Selector selector = Mess->getSelector();
if (selector.isUnarySelector()) {
OS << selector.getNameForSlot(0);
} else {
for (unsigned i = 0, e = Mess->getNumArgs(); i != e; ++i) {
if (i < selector.getNumArgs()) {
if (i > 0) OS << ' ';
if (selector.getIdentifierInfoForSlot(i))
OS << selector.getIdentifierInfoForSlot(i)->getName() << ':';
else
OS << ":";
}
else OS << ", "; // Handle variadic methods.
PrintExpr(Mess->getArg(i));
}
}
OS << "]";
}
void StmtPrinter::VisitObjCBoolLiteralExpr(ObjCBoolLiteralExpr *Node) {
OS << (Node->getValue() ? "__objc_yes" : "__objc_no");
}
void
StmtPrinter::VisitObjCIndirectCopyRestoreExpr(ObjCIndirectCopyRestoreExpr *E) {
PrintExpr(E->getSubExpr());
}
void
StmtPrinter::VisitObjCBridgedCastExpr(ObjCBridgedCastExpr *E) {
OS << '(' << E->getBridgeKindName();
E->getType().print(OS, Policy);
OS << ')';
PrintExpr(E->getSubExpr());
}
void StmtPrinter::VisitBlockExpr(BlockExpr *Node) {
BlockDecl *BD = Node->getBlockDecl();
OS << "^";
const FunctionType *AFT = Node->getFunctionType();
if (isa<FunctionNoProtoType>(AFT)) {
OS << "()";
} else if (!BD->param_empty() || cast<FunctionProtoType>(AFT)->isVariadic()) {
OS << '(';
for (BlockDecl::param_iterator AI = BD->param_begin(),
E = BD->param_end(); AI != E; ++AI) {
if (AI != BD->param_begin()) OS << ", ";
std::string ParamStr = (*AI)->getNameAsString();
(*AI)->getType().print(OS, Policy, ParamStr);
}
const FunctionProtoType *FT = cast<FunctionProtoType>(AFT);
if (FT->isVariadic()) {
if (!BD->param_empty()) OS << ", ";
OS << "...";
}
OS << ')';
}
OS << "{ }";
}
void StmtPrinter::VisitOpaqueValueExpr(OpaqueValueExpr *Node) {
PrintExpr(Node->getSourceExpr());
}
void StmtPrinter::VisitAsTypeExpr(AsTypeExpr *Node) {
OS << "__builtin_astype(";
PrintExpr(Node->getSrcExpr());
OS << ", ";
Node->getType().print(OS, Policy);
OS << ")";
}
//===----------------------------------------------------------------------===//
// Stmt method implementations
//===----------------------------------------------------------------------===//
void Stmt::dumpPretty(const ASTContext &Context) const {
printPretty(llvm::errs(), 0, PrintingPolicy(Context.getLangOpts()));
}
void Stmt::printPretty(raw_ostream &OS,
PrinterHelper *Helper,
const PrintingPolicy &Policy,
unsigned Indentation) const {
if (this == 0) {
OS << "<NULL>";
return;
}
StmtPrinter P(OS, Helper, Policy, Indentation);
P.Visit(const_cast<Stmt*>(this));
}
//===----------------------------------------------------------------------===//
// PrinterHelper
//===----------------------------------------------------------------------===//
// Implement virtual destructor.
PrinterHelper::~PrinterHelper() {}
Index: head/contrib/llvm/tools/clang/lib/Analysis/Consumed.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Analysis/Consumed.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/Analysis/Consumed.cpp (revision 265925)
@@ -1,1521 +1,1533 @@
//===- Consumed.cpp --------------------------------------------*- C++ --*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// A intra-procedural analysis for checking consumed properties. This is based,
// in part, on research on linear types.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/Type.h"
#include "clang/Analysis/Analyses/PostOrderCFGView.h"
#include "clang/Analysis/AnalysisContext.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/Analyses/Consumed.h"
#include "clang/Basic/OperatorKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/raw_ostream.h"
// TODO: Adjust states of args to constructors in the same way that arguments to
// function calls are handled.
// TODO: Use information from tests in for- and while-loop conditional.
// TODO: Add notes about the actual and expected state for
// TODO: Correctly identify unreachable blocks when chaining boolean operators.
// TODO: Adjust the parser and AttributesList class to support lists of
// identifiers.
// TODO: Warn about unreachable code.
// TODO: Switch to using a bitmap to track unreachable blocks.
// TODO: Handle variable definitions, e.g. bool valid = x.isValid();
// if (valid) ...; (Deferred)
// TODO: Take notes on state transitions to provide better warning messages.
// (Deferred)
// TODO: Test nested conditionals: A) Checking the same value multiple times,
// and 2) Checking different values. (Deferred)
using namespace clang;
using namespace consumed;
// Key method definition
ConsumedWarningsHandlerBase::~ConsumedWarningsHandlerBase() {}
static SourceLocation getFirstStmtLoc(const CFGBlock *Block) {
// Find the source location of the first statement in the block, if the block
// is not empty.
for (CFGBlock::const_iterator BI = Block->begin(), BE = Block->end();
BI != BE; ++BI) {
if (Optional<CFGStmt> CS = BI->getAs<CFGStmt>())
return CS->getStmt()->getLocStart();
}
// Block is empty.
// If we have one successor, return the first statement in that block
if (Block->succ_size() == 1 && *Block->succ_begin())
return getFirstStmtLoc(*Block->succ_begin());
return SourceLocation();
}
static SourceLocation getLastStmtLoc(const CFGBlock *Block) {
// Find the source location of the last statement in the block, if the block
// is not empty.
if (const Stmt *StmtNode = Block->getTerminator()) {
return StmtNode->getLocStart();
} else {
for (CFGBlock::const_reverse_iterator BI = Block->rbegin(),
BE = Block->rend(); BI != BE; ++BI) {
if (Optional<CFGStmt> CS = BI->getAs<CFGStmt>())
return CS->getStmt()->getLocStart();
}
}
// If we have one successor, return the first statement in that block
SourceLocation Loc;
if (Block->succ_size() == 1 && *Block->succ_begin())
Loc = getFirstStmtLoc(*Block->succ_begin());
if (Loc.isValid())
return Loc;
// If we have one predecessor, return the last statement in that block
if (Block->pred_size() == 1 && *Block->pred_begin())
return getLastStmtLoc(*Block->pred_begin());
return Loc;
}
static ConsumedState invertConsumedUnconsumed(ConsumedState State) {
switch (State) {
case CS_Unconsumed:
return CS_Consumed;
case CS_Consumed:
return CS_Unconsumed;
case CS_None:
return CS_None;
case CS_Unknown:
return CS_Unknown;
}
llvm_unreachable("invalid enum");
}
static bool isCallableInState(const CallableWhenAttr *CWAttr,
ConsumedState State) {
CallableWhenAttr::callableState_iterator I = CWAttr->callableState_begin(),
E = CWAttr->callableState_end();
for (; I != E; ++I) {
ConsumedState MappedAttrState = CS_None;
switch (*I) {
case CallableWhenAttr::Unknown:
MappedAttrState = CS_Unknown;
break;
case CallableWhenAttr::Unconsumed:
MappedAttrState = CS_Unconsumed;
break;
case CallableWhenAttr::Consumed:
MappedAttrState = CS_Consumed;
break;
}
if (MappedAttrState == State)
return true;
}
return false;
}
static bool isConsumableType(const QualType &QT) {
if (QT->isPointerType() || QT->isReferenceType())
return false;
if (const CXXRecordDecl *RD = QT->getAsCXXRecordDecl())
return RD->hasAttr<ConsumableAttr>();
return false;
}
static bool isKnownState(ConsumedState State) {
switch (State) {
case CS_Unconsumed:
case CS_Consumed:
return true;
case CS_None:
case CS_Unknown:
return false;
}
llvm_unreachable("invalid enum");
}
static bool isRValueRefish(QualType ParamType) {
return ParamType->isRValueReferenceType() ||
(ParamType->isLValueReferenceType() &&
!cast<LValueReferenceType>(
ParamType.getCanonicalType())->isSpelledAsLValue());
}
static bool isTestingFunction(const FunctionDecl *FunDecl) {
return FunDecl->hasAttr<TestTypestateAttr>();
}
static bool isValueType(QualType ParamType) {
return !(ParamType->isPointerType() || ParamType->isReferenceType());
}
static ConsumedState mapConsumableAttrState(const QualType QT) {
assert(isConsumableType(QT));
const ConsumableAttr *CAttr =
QT->getAsCXXRecordDecl()->getAttr<ConsumableAttr>();
switch (CAttr->getDefaultState()) {
case ConsumableAttr::Unknown:
return CS_Unknown;
case ConsumableAttr::Unconsumed:
return CS_Unconsumed;
case ConsumableAttr::Consumed:
return CS_Consumed;
}
llvm_unreachable("invalid enum");
}
static ConsumedState
mapParamTypestateAttrState(const ParamTypestateAttr *PTAttr) {
switch (PTAttr->getParamState()) {
case ParamTypestateAttr::Unknown:
return CS_Unknown;
case ParamTypestateAttr::Unconsumed:
return CS_Unconsumed;
case ParamTypestateAttr::Consumed:
return CS_Consumed;
}
llvm_unreachable("invalid_enum");
}
static ConsumedState
mapReturnTypestateAttrState(const ReturnTypestateAttr *RTSAttr) {
switch (RTSAttr->getState()) {
case ReturnTypestateAttr::Unknown:
return CS_Unknown;
case ReturnTypestateAttr::Unconsumed:
return CS_Unconsumed;
case ReturnTypestateAttr::Consumed:
return CS_Consumed;
}
llvm_unreachable("invalid enum");
}
static ConsumedState mapSetTypestateAttrState(const SetTypestateAttr *STAttr) {
switch (STAttr->getNewState()) {
case SetTypestateAttr::Unknown:
return CS_Unknown;
case SetTypestateAttr::Unconsumed:
return CS_Unconsumed;
case SetTypestateAttr::Consumed:
return CS_Consumed;
}
llvm_unreachable("invalid_enum");
}
static StringRef stateToString(ConsumedState State) {
switch (State) {
case consumed::CS_None:
return "none";
case consumed::CS_Unknown:
return "unknown";
case consumed::CS_Unconsumed:
return "unconsumed";
case consumed::CS_Consumed:
return "consumed";
}
llvm_unreachable("invalid enum");
}
static ConsumedState testsFor(const FunctionDecl *FunDecl) {
assert(isTestingFunction(FunDecl));
switch (FunDecl->getAttr<TestTypestateAttr>()->getTestState()) {
case TestTypestateAttr::Unconsumed:
return CS_Unconsumed;
case TestTypestateAttr::Consumed:
return CS_Consumed;
}
llvm_unreachable("invalid enum");
}
namespace {
struct VarTestResult {
const VarDecl *Var;
ConsumedState TestsFor;
};
} // end anonymous::VarTestResult
namespace clang {
namespace consumed {
enum EffectiveOp {
EO_And,
EO_Or
};
class PropagationInfo {
enum {
IT_None,
IT_State,
IT_VarTest,
IT_BinTest,
IT_Var,
IT_Tmp
} InfoType;
struct BinTestTy {
const BinaryOperator *Source;
EffectiveOp EOp;
VarTestResult LTest;
VarTestResult RTest;
};
union {
ConsumedState State;
VarTestResult VarTest;
const VarDecl *Var;
const CXXBindTemporaryExpr *Tmp;
BinTestTy BinTest;
};
public:
PropagationInfo() : InfoType(IT_None) {}
PropagationInfo(const VarTestResult &VarTest)
: InfoType(IT_VarTest), VarTest(VarTest) {}
PropagationInfo(const VarDecl *Var, ConsumedState TestsFor)
: InfoType(IT_VarTest) {
VarTest.Var = Var;
VarTest.TestsFor = TestsFor;
}
PropagationInfo(const BinaryOperator *Source, EffectiveOp EOp,
const VarTestResult &LTest, const VarTestResult &RTest)
: InfoType(IT_BinTest) {
BinTest.Source = Source;
BinTest.EOp = EOp;
BinTest.LTest = LTest;
BinTest.RTest = RTest;
}
PropagationInfo(const BinaryOperator *Source, EffectiveOp EOp,
const VarDecl *LVar, ConsumedState LTestsFor,
const VarDecl *RVar, ConsumedState RTestsFor)
: InfoType(IT_BinTest) {
BinTest.Source = Source;
BinTest.EOp = EOp;
BinTest.LTest.Var = LVar;
BinTest.LTest.TestsFor = LTestsFor;
BinTest.RTest.Var = RVar;
BinTest.RTest.TestsFor = RTestsFor;
}
PropagationInfo(ConsumedState State)
: InfoType(IT_State), State(State) {}
PropagationInfo(const VarDecl *Var) : InfoType(IT_Var), Var(Var) {}
PropagationInfo(const CXXBindTemporaryExpr *Tmp)
: InfoType(IT_Tmp), Tmp(Tmp) {}
const ConsumedState & getState() const {
assert(InfoType == IT_State);
return State;
}
const VarTestResult & getVarTest() const {
assert(InfoType == IT_VarTest);
return VarTest;
}
const VarTestResult & getLTest() const {
assert(InfoType == IT_BinTest);
return BinTest.LTest;
}
const VarTestResult & getRTest() const {
assert(InfoType == IT_BinTest);
return BinTest.RTest;
}
const VarDecl * getVar() const {
assert(InfoType == IT_Var);
return Var;
}
const CXXBindTemporaryExpr * getTmp() const {
assert(InfoType == IT_Tmp);
return Tmp;
}
ConsumedState getAsState(const ConsumedStateMap *StateMap) const {
assert(isVar() || isTmp() || isState());
if (isVar())
return StateMap->getState(Var);
else if (isTmp())
return StateMap->getState(Tmp);
else if (isState())
return State;
else
return CS_None;
}
EffectiveOp testEffectiveOp() const {
assert(InfoType == IT_BinTest);
return BinTest.EOp;
}
const BinaryOperator * testSourceNode() const {
assert(InfoType == IT_BinTest);
return BinTest.Source;
}
inline bool isValid() const { return InfoType != IT_None; }
inline bool isState() const { return InfoType == IT_State; }
inline bool isVarTest() const { return InfoType == IT_VarTest; }
inline bool isBinTest() const { return InfoType == IT_BinTest; }
inline bool isVar() const { return InfoType == IT_Var; }
inline bool isTmp() const { return InfoType == IT_Tmp; }
bool isTest() const {
return InfoType == IT_VarTest || InfoType == IT_BinTest;
}
bool isPointerToValue() const {
return InfoType == IT_Var || InfoType == IT_Tmp;
}
PropagationInfo invertTest() const {
assert(InfoType == IT_VarTest || InfoType == IT_BinTest);
if (InfoType == IT_VarTest) {
return PropagationInfo(VarTest.Var,
invertConsumedUnconsumed(VarTest.TestsFor));
} else if (InfoType == IT_BinTest) {
return PropagationInfo(BinTest.Source,
BinTest.EOp == EO_And ? EO_Or : EO_And,
BinTest.LTest.Var, invertConsumedUnconsumed(BinTest.LTest.TestsFor),
BinTest.RTest.Var, invertConsumedUnconsumed(BinTest.RTest.TestsFor));
} else {
return PropagationInfo();
}
}
};
static inline void
setStateForVarOrTmp(ConsumedStateMap *StateMap, const PropagationInfo &PInfo,
ConsumedState State) {
assert(PInfo.isVar() || PInfo.isTmp());
if (PInfo.isVar())
StateMap->setState(PInfo.getVar(), State);
else
StateMap->setState(PInfo.getTmp(), State);
}
class ConsumedStmtVisitor : public ConstStmtVisitor<ConsumedStmtVisitor> {
typedef llvm::DenseMap<const Stmt *, PropagationInfo> MapType;
typedef std::pair<const Stmt *, PropagationInfo> PairType;
typedef MapType::iterator InfoEntry;
typedef MapType::const_iterator ConstInfoEntry;
AnalysisDeclContext &AC;
ConsumedAnalyzer &Analyzer;
ConsumedStateMap *StateMap;
MapType PropagationMap;
void forwardInfo(const Stmt *From, const Stmt *To);
bool isLikeMoveAssignment(const CXXMethodDecl *MethodDecl);
void propagateReturnType(const Stmt *Call, const FunctionDecl *Fun,
QualType ReturnType);
public:
void checkCallability(const PropagationInfo &PInfo,
const FunctionDecl *FunDecl,
SourceLocation BlameLoc);
void VisitBinaryOperator(const BinaryOperator *BinOp);
void VisitCallExpr(const CallExpr *Call);
void VisitCastExpr(const CastExpr *Cast);
void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *Temp);
void VisitCXXConstructExpr(const CXXConstructExpr *Call);
void VisitCXXMemberCallExpr(const CXXMemberCallExpr *Call);
void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *Call);
void VisitDeclRefExpr(const DeclRefExpr *DeclRef);
void VisitDeclStmt(const DeclStmt *DelcS);
void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *Temp);
void VisitMemberExpr(const MemberExpr *MExpr);
void VisitParmVarDecl(const ParmVarDecl *Param);
void VisitReturnStmt(const ReturnStmt *Ret);
void VisitUnaryOperator(const UnaryOperator *UOp);
void VisitVarDecl(const VarDecl *Var);
ConsumedStmtVisitor(AnalysisDeclContext &AC, ConsumedAnalyzer &Analyzer,
ConsumedStateMap *StateMap)
: AC(AC), Analyzer(Analyzer), StateMap(StateMap) {}
PropagationInfo getInfo(const Stmt *StmtNode) const {
ConstInfoEntry Entry = PropagationMap.find(StmtNode);
if (Entry != PropagationMap.end())
return Entry->second;
else
return PropagationInfo();
}
void reset(ConsumedStateMap *NewStateMap) {
StateMap = NewStateMap;
}
};
void ConsumedStmtVisitor::checkCallability(const PropagationInfo &PInfo,
const FunctionDecl *FunDecl,
SourceLocation BlameLoc) {
assert(!PInfo.isTest());
if (!FunDecl->hasAttr<CallableWhenAttr>())
return;
const CallableWhenAttr *CWAttr = FunDecl->getAttr<CallableWhenAttr>();
if (PInfo.isVar()) {
ConsumedState VarState = StateMap->getState(PInfo.getVar());
if (VarState == CS_None || isCallableInState(CWAttr, VarState))
return;
Analyzer.WarningsHandler.warnUseInInvalidState(
FunDecl->getNameAsString(), PInfo.getVar()->getNameAsString(),
stateToString(VarState), BlameLoc);
} else {
ConsumedState TmpState = PInfo.getAsState(StateMap);
if (TmpState == CS_None || isCallableInState(CWAttr, TmpState))
return;
Analyzer.WarningsHandler.warnUseOfTempInInvalidState(
FunDecl->getNameAsString(), stateToString(TmpState), BlameLoc);
}
}
void ConsumedStmtVisitor::forwardInfo(const Stmt *From, const Stmt *To) {
InfoEntry Entry = PropagationMap.find(From);
if (Entry != PropagationMap.end())
PropagationMap.insert(PairType(To, Entry->second));
}
bool ConsumedStmtVisitor::isLikeMoveAssignment(
const CXXMethodDecl *MethodDecl) {
return MethodDecl->isMoveAssignmentOperator() ||
(MethodDecl->getOverloadedOperator() == OO_Equal &&
MethodDecl->getNumParams() == 1 &&
MethodDecl->getParamDecl(0)->getType()->isRValueReferenceType());
}
void ConsumedStmtVisitor::propagateReturnType(const Stmt *Call,
const FunctionDecl *Fun,
QualType ReturnType) {
if (isConsumableType(ReturnType)) {
ConsumedState ReturnState;
if (Fun->hasAttr<ReturnTypestateAttr>())
ReturnState = mapReturnTypestateAttrState(
Fun->getAttr<ReturnTypestateAttr>());
else
ReturnState = mapConsumableAttrState(ReturnType);
PropagationMap.insert(PairType(Call, PropagationInfo(ReturnState)));
}
}
void ConsumedStmtVisitor::VisitBinaryOperator(const BinaryOperator *BinOp) {
switch (BinOp->getOpcode()) {
case BO_LAnd:
case BO_LOr : {
InfoEntry LEntry = PropagationMap.find(BinOp->getLHS()),
REntry = PropagationMap.find(BinOp->getRHS());
VarTestResult LTest, RTest;
if (LEntry != PropagationMap.end() && LEntry->second.isVarTest()) {
LTest = LEntry->second.getVarTest();
} else {
LTest.Var = NULL;
LTest.TestsFor = CS_None;
}
if (REntry != PropagationMap.end() && REntry->second.isVarTest()) {
RTest = REntry->second.getVarTest();
} else {
RTest.Var = NULL;
RTest.TestsFor = CS_None;
}
if (!(LTest.Var == NULL && RTest.Var == NULL))
PropagationMap.insert(PairType(BinOp, PropagationInfo(BinOp,
static_cast<EffectiveOp>(BinOp->getOpcode() == BO_LOr), LTest, RTest)));
break;
}
case BO_PtrMemD:
case BO_PtrMemI:
forwardInfo(BinOp->getLHS(), BinOp);
break;
default:
break;
}
}
+static bool isStdNamespace(const DeclContext *DC) {
+ if (!DC->isNamespace()) return false;
+ while (DC->getParent()->isNamespace())
+ DC = DC->getParent();
+ const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(DC);
+
+ return ND && ND->getName() == "std" &&
+ ND->getDeclContext()->isTranslationUnit();
+}
+
void ConsumedStmtVisitor::VisitCallExpr(const CallExpr *Call) {
if (const FunctionDecl *FunDecl =
dyn_cast_or_null<FunctionDecl>(Call->getDirectCallee())) {
// Special case for the std::move function.
// TODO: Make this more specific. (Deferred)
- if (FunDecl->getNameAsString() == "move") {
+ if (Call->getNumArgs() == 1 &&
+ FunDecl->getNameAsString() == "move" &&
+ isStdNamespace(FunDecl->getDeclContext())) {
forwardInfo(Call->getArg(0), Call);
return;
}
unsigned Offset = Call->getNumArgs() - FunDecl->getNumParams();
for (unsigned Index = Offset; Index < Call->getNumArgs(); ++Index) {
const ParmVarDecl *Param = FunDecl->getParamDecl(Index - Offset);
QualType ParamType = Param->getType();
InfoEntry Entry = PropagationMap.find(Call->getArg(Index));
if (Entry == PropagationMap.end() || Entry->second.isTest())
continue;
PropagationInfo PInfo = Entry->second;
// Check that the parameter is in the correct state.
if (Param->hasAttr<ParamTypestateAttr>()) {
ConsumedState ParamState = PInfo.getAsState(StateMap);
ConsumedState ExpectedState =
mapParamTypestateAttrState(Param->getAttr<ParamTypestateAttr>());
if (ParamState != ExpectedState)
Analyzer.WarningsHandler.warnParamTypestateMismatch(
Call->getArg(Index - Offset)->getExprLoc(),
stateToString(ExpectedState), stateToString(ParamState));
}
if (!(Entry->second.isVar() || Entry->second.isTmp()))
continue;
// Adjust state on the caller side.
if (isRValueRefish(ParamType)) {
setStateForVarOrTmp(StateMap, PInfo, consumed::CS_Consumed);
} else if (Param->hasAttr<ReturnTypestateAttr>()) {
setStateForVarOrTmp(StateMap, PInfo,
mapReturnTypestateAttrState(Param->getAttr<ReturnTypestateAttr>()));
} else if (!isValueType(ParamType) &&
!ParamType->getPointeeType().isConstQualified()) {
setStateForVarOrTmp(StateMap, PInfo, consumed::CS_Unknown);
}
}
QualType RetType = FunDecl->getCallResultType();
if (RetType->isReferenceType())
RetType = RetType->getPointeeType();
propagateReturnType(Call, FunDecl, RetType);
}
}
void ConsumedStmtVisitor::VisitCastExpr(const CastExpr *Cast) {
forwardInfo(Cast->getSubExpr(), Cast);
}
void ConsumedStmtVisitor::VisitCXXBindTemporaryExpr(
const CXXBindTemporaryExpr *Temp) {
InfoEntry Entry = PropagationMap.find(Temp->getSubExpr());
if (Entry != PropagationMap.end() && !Entry->second.isTest()) {
StateMap->setState(Temp, Entry->second.getAsState(StateMap));
PropagationMap.insert(PairType(Temp, PropagationInfo(Temp)));
}
}
void ConsumedStmtVisitor::VisitCXXConstructExpr(const CXXConstructExpr *Call) {
CXXConstructorDecl *Constructor = Call->getConstructor();
ASTContext &CurrContext = AC.getASTContext();
QualType ThisType = Constructor->getThisType(CurrContext)->getPointeeType();
if (!isConsumableType(ThisType))
return;
// FIXME: What should happen if someone annotates the move constructor?
if (Constructor->hasAttr<ReturnTypestateAttr>()) {
// TODO: Adjust state of args appropriately.
ReturnTypestateAttr *RTAttr = Constructor->getAttr<ReturnTypestateAttr>();
ConsumedState RetState = mapReturnTypestateAttrState(RTAttr);
PropagationMap.insert(PairType(Call, PropagationInfo(RetState)));
} else if (Constructor->isDefaultConstructor()) {
PropagationMap.insert(PairType(Call,
PropagationInfo(consumed::CS_Consumed)));
} else if (Constructor->isMoveConstructor()) {
InfoEntry Entry = PropagationMap.find(Call->getArg(0));
if (Entry != PropagationMap.end()) {
PropagationInfo PInfo = Entry->second;
if (PInfo.isVar()) {
const VarDecl* Var = PInfo.getVar();
PropagationMap.insert(PairType(Call,
PropagationInfo(StateMap->getState(Var))));
StateMap->setState(Var, consumed::CS_Consumed);
} else if (PInfo.isTmp()) {
const CXXBindTemporaryExpr *Tmp = PInfo.getTmp();
PropagationMap.insert(PairType(Call,
PropagationInfo(StateMap->getState(Tmp))));
StateMap->setState(Tmp, consumed::CS_Consumed);
} else {
PropagationMap.insert(PairType(Call, PInfo));
}
}
} else if (Constructor->isCopyConstructor()) {
forwardInfo(Call->getArg(0), Call);
} else {
// TODO: Adjust state of args appropriately.
ConsumedState RetState = mapConsumableAttrState(ThisType);
PropagationMap.insert(PairType(Call, PropagationInfo(RetState)));
}
}
void ConsumedStmtVisitor::VisitCXXMemberCallExpr(
const CXXMemberCallExpr *Call) {
VisitCallExpr(Call);
InfoEntry Entry = PropagationMap.find(Call->getCallee()->IgnoreParens());
if (Entry != PropagationMap.end()) {
PropagationInfo PInfo = Entry->second;
const CXXMethodDecl *MethodDecl = Call->getMethodDecl();
checkCallability(PInfo, MethodDecl, Call->getExprLoc());
if (PInfo.isVar()) {
if (isTestingFunction(MethodDecl))
PropagationMap.insert(PairType(Call,
PropagationInfo(PInfo.getVar(), testsFor(MethodDecl))));
else if (MethodDecl->hasAttr<SetTypestateAttr>())
StateMap->setState(PInfo.getVar(),
mapSetTypestateAttrState(MethodDecl->getAttr<SetTypestateAttr>()));
} else if (PInfo.isTmp() && MethodDecl->hasAttr<SetTypestateAttr>()) {
StateMap->setState(PInfo.getTmp(),
mapSetTypestateAttrState(MethodDecl->getAttr<SetTypestateAttr>()));
}
}
}
void ConsumedStmtVisitor::VisitCXXOperatorCallExpr(
const CXXOperatorCallExpr *Call) {
const FunctionDecl *FunDecl =
dyn_cast_or_null<FunctionDecl>(Call->getDirectCallee());
if (!FunDecl) return;
if (isa<CXXMethodDecl>(FunDecl) &&
isLikeMoveAssignment(cast<CXXMethodDecl>(FunDecl))) {
InfoEntry LEntry = PropagationMap.find(Call->getArg(0));
InfoEntry REntry = PropagationMap.find(Call->getArg(1));
PropagationInfo LPInfo, RPInfo;
if (LEntry != PropagationMap.end() &&
REntry != PropagationMap.end()) {
LPInfo = LEntry->second;
RPInfo = REntry->second;
if (LPInfo.isPointerToValue() && RPInfo.isPointerToValue()) {
setStateForVarOrTmp(StateMap, LPInfo, RPInfo.getAsState(StateMap));
PropagationMap.insert(PairType(Call, LPInfo));
setStateForVarOrTmp(StateMap, RPInfo, consumed::CS_Consumed);
} else if (RPInfo.isState()) {
setStateForVarOrTmp(StateMap, LPInfo, RPInfo.getState());
PropagationMap.insert(PairType(Call, LPInfo));
} else {
setStateForVarOrTmp(StateMap, RPInfo, consumed::CS_Consumed);
}
} else if (LEntry != PropagationMap.end() &&
REntry == PropagationMap.end()) {
LPInfo = LEntry->second;
assert(!LPInfo.isTest());
if (LPInfo.isPointerToValue()) {
setStateForVarOrTmp(StateMap, LPInfo, consumed::CS_Unknown);
PropagationMap.insert(PairType(Call, LPInfo));
} else {
PropagationMap.insert(PairType(Call,
PropagationInfo(consumed::CS_Unknown)));
}
} else if (LEntry == PropagationMap.end() &&
REntry != PropagationMap.end()) {
RPInfo = REntry->second;
if (RPInfo.isPointerToValue())
setStateForVarOrTmp(StateMap, RPInfo, consumed::CS_Consumed);
}
} else {
VisitCallExpr(Call);
InfoEntry Entry = PropagationMap.find(Call->getArg(0));
if (Entry != PropagationMap.end()) {
PropagationInfo PInfo = Entry->second;
checkCallability(PInfo, FunDecl, Call->getExprLoc());
if (PInfo.isVar()) {
if (isTestingFunction(FunDecl))
PropagationMap.insert(PairType(Call,
PropagationInfo(PInfo.getVar(), testsFor(FunDecl))));
else if (FunDecl->hasAttr<SetTypestateAttr>())
StateMap->setState(PInfo.getVar(),
mapSetTypestateAttrState(FunDecl->getAttr<SetTypestateAttr>()));
} else if (PInfo.isTmp() && FunDecl->hasAttr<SetTypestateAttr>()) {
StateMap->setState(PInfo.getTmp(),
mapSetTypestateAttrState(FunDecl->getAttr<SetTypestateAttr>()));
}
}
}
}
void ConsumedStmtVisitor::VisitDeclRefExpr(const DeclRefExpr *DeclRef) {
if (const VarDecl *Var = dyn_cast_or_null<VarDecl>(DeclRef->getDecl()))
if (StateMap->getState(Var) != consumed::CS_None)
PropagationMap.insert(PairType(DeclRef, PropagationInfo(Var)));
}
void ConsumedStmtVisitor::VisitDeclStmt(const DeclStmt *DeclS) {
for (DeclStmt::const_decl_iterator DI = DeclS->decl_begin(),
DE = DeclS->decl_end(); DI != DE; ++DI) {
if (isa<VarDecl>(*DI)) VisitVarDecl(cast<VarDecl>(*DI));
}
if (DeclS->isSingleDecl())
if (const VarDecl *Var = dyn_cast_or_null<VarDecl>(DeclS->getSingleDecl()))
PropagationMap.insert(PairType(DeclS, PropagationInfo(Var)));
}
void ConsumedStmtVisitor::VisitMaterializeTemporaryExpr(
const MaterializeTemporaryExpr *Temp) {
forwardInfo(Temp->GetTemporaryExpr(), Temp);
}
void ConsumedStmtVisitor::VisitMemberExpr(const MemberExpr *MExpr) {
forwardInfo(MExpr->getBase(), MExpr);
}
void ConsumedStmtVisitor::VisitParmVarDecl(const ParmVarDecl *Param) {
QualType ParamType = Param->getType();
ConsumedState ParamState = consumed::CS_None;
if (Param->hasAttr<ParamTypestateAttr>()) {
const ParamTypestateAttr *PTAttr = Param->getAttr<ParamTypestateAttr>();
ParamState = mapParamTypestateAttrState(PTAttr);
} else if (isConsumableType(ParamType)) {
ParamState = mapConsumableAttrState(ParamType);
} else if (isRValueRefish(ParamType) &&
isConsumableType(ParamType->getPointeeType())) {
ParamState = mapConsumableAttrState(ParamType->getPointeeType());
} else if (ParamType->isReferenceType() &&
isConsumableType(ParamType->getPointeeType())) {
ParamState = consumed::CS_Unknown;
}
if (ParamState != CS_None)
StateMap->setState(Param, ParamState);
}
void ConsumedStmtVisitor::VisitReturnStmt(const ReturnStmt *Ret) {
ConsumedState ExpectedState = Analyzer.getExpectedReturnState();
if (ExpectedState != CS_None) {
InfoEntry Entry = PropagationMap.find(Ret->getRetValue());
if (Entry != PropagationMap.end()) {
ConsumedState RetState = Entry->second.getAsState(StateMap);
if (RetState != ExpectedState)
Analyzer.WarningsHandler.warnReturnTypestateMismatch(
Ret->getReturnLoc(), stateToString(ExpectedState),
stateToString(RetState));
}
}
StateMap->checkParamsForReturnTypestate(Ret->getLocStart(),
Analyzer.WarningsHandler);
}
void ConsumedStmtVisitor::VisitUnaryOperator(const UnaryOperator *UOp) {
InfoEntry Entry = PropagationMap.find(UOp->getSubExpr()->IgnoreParens());
if (Entry == PropagationMap.end()) return;
switch (UOp->getOpcode()) {
case UO_AddrOf:
PropagationMap.insert(PairType(UOp, Entry->second));
break;
case UO_LNot:
if (Entry->second.isTest())
PropagationMap.insert(PairType(UOp, Entry->second.invertTest()));
break;
default:
break;
}
}
// TODO: See if I need to check for reference types here.
void ConsumedStmtVisitor::VisitVarDecl(const VarDecl *Var) {
if (isConsumableType(Var->getType())) {
if (Var->hasInit()) {
MapType::iterator VIT = PropagationMap.find(
Var->getInit()->IgnoreImplicit());
if (VIT != PropagationMap.end()) {
PropagationInfo PInfo = VIT->second;
ConsumedState St = PInfo.getAsState(StateMap);
if (St != consumed::CS_None) {
StateMap->setState(Var, St);
return;
}
}
}
// Otherwise
StateMap->setState(Var, consumed::CS_Unknown);
}
}
}} // end clang::consumed::ConsumedStmtVisitor
namespace clang {
namespace consumed {
void splitVarStateForIf(const IfStmt * IfNode, const VarTestResult &Test,
ConsumedStateMap *ThenStates,
ConsumedStateMap *ElseStates) {
ConsumedState VarState = ThenStates->getState(Test.Var);
if (VarState == CS_Unknown) {
ThenStates->setState(Test.Var, Test.TestsFor);
ElseStates->setState(Test.Var, invertConsumedUnconsumed(Test.TestsFor));
} else if (VarState == invertConsumedUnconsumed(Test.TestsFor)) {
ThenStates->markUnreachable();
} else if (VarState == Test.TestsFor) {
ElseStates->markUnreachable();
}
}
void splitVarStateForIfBinOp(const PropagationInfo &PInfo,
ConsumedStateMap *ThenStates, ConsumedStateMap *ElseStates) {
const VarTestResult &LTest = PInfo.getLTest(),
&RTest = PInfo.getRTest();
ConsumedState LState = LTest.Var ? ThenStates->getState(LTest.Var) : CS_None,
RState = RTest.Var ? ThenStates->getState(RTest.Var) : CS_None;
if (LTest.Var) {
if (PInfo.testEffectiveOp() == EO_And) {
if (LState == CS_Unknown) {
ThenStates->setState(LTest.Var, LTest.TestsFor);
} else if (LState == invertConsumedUnconsumed(LTest.TestsFor)) {
ThenStates->markUnreachable();
} else if (LState == LTest.TestsFor && isKnownState(RState)) {
if (RState == RTest.TestsFor)
ElseStates->markUnreachable();
else
ThenStates->markUnreachable();
}
} else {
if (LState == CS_Unknown) {
ElseStates->setState(LTest.Var,
invertConsumedUnconsumed(LTest.TestsFor));
} else if (LState == LTest.TestsFor) {
ElseStates->markUnreachable();
} else if (LState == invertConsumedUnconsumed(LTest.TestsFor) &&
isKnownState(RState)) {
if (RState == RTest.TestsFor)
ElseStates->markUnreachable();
else
ThenStates->markUnreachable();
}
}
}
if (RTest.Var) {
if (PInfo.testEffectiveOp() == EO_And) {
if (RState == CS_Unknown)
ThenStates->setState(RTest.Var, RTest.TestsFor);
else if (RState == invertConsumedUnconsumed(RTest.TestsFor))
ThenStates->markUnreachable();
} else {
if (RState == CS_Unknown)
ElseStates->setState(RTest.Var,
invertConsumedUnconsumed(RTest.TestsFor));
else if (RState == RTest.TestsFor)
ElseStates->markUnreachable();
}
}
}
bool ConsumedBlockInfo::allBackEdgesVisited(const CFGBlock *CurrBlock,
const CFGBlock *TargetBlock) {
assert(CurrBlock && "Block pointer must not be NULL");
assert(TargetBlock && "TargetBlock pointer must not be NULL");
unsigned int CurrBlockOrder = VisitOrder[CurrBlock->getBlockID()];
for (CFGBlock::const_pred_iterator PI = TargetBlock->pred_begin(),
PE = TargetBlock->pred_end(); PI != PE; ++PI) {
if (*PI && CurrBlockOrder < VisitOrder[(*PI)->getBlockID()] )
return false;
}
return true;
}
void ConsumedBlockInfo::addInfo(const CFGBlock *Block,
ConsumedStateMap *StateMap,
bool &AlreadyOwned) {
assert(Block && "Block pointer must not be NULL");
ConsumedStateMap *Entry = StateMapsArray[Block->getBlockID()];
if (Entry) {
Entry->intersect(StateMap);
} else if (AlreadyOwned) {
StateMapsArray[Block->getBlockID()] = new ConsumedStateMap(*StateMap);
} else {
StateMapsArray[Block->getBlockID()] = StateMap;
AlreadyOwned = true;
}
}
void ConsumedBlockInfo::addInfo(const CFGBlock *Block,
ConsumedStateMap *StateMap) {
assert(Block != NULL && "Block pointer must not be NULL");
ConsumedStateMap *Entry = StateMapsArray[Block->getBlockID()];
if (Entry) {
Entry->intersect(StateMap);
delete StateMap;
} else {
StateMapsArray[Block->getBlockID()] = StateMap;
}
}
ConsumedStateMap* ConsumedBlockInfo::borrowInfo(const CFGBlock *Block) {
assert(Block && "Block pointer must not be NULL");
assert(StateMapsArray[Block->getBlockID()] && "Block has no block info");
return StateMapsArray[Block->getBlockID()];
}
void ConsumedBlockInfo::discardInfo(const CFGBlock *Block) {
unsigned int BlockID = Block->getBlockID();
delete StateMapsArray[BlockID];
StateMapsArray[BlockID] = NULL;
}
ConsumedStateMap* ConsumedBlockInfo::getInfo(const CFGBlock *Block) {
assert(Block && "Block pointer must not be NULL");
ConsumedStateMap *StateMap = StateMapsArray[Block->getBlockID()];
if (isBackEdgeTarget(Block)) {
return new ConsumedStateMap(*StateMap);
} else {
StateMapsArray[Block->getBlockID()] = NULL;
return StateMap;
}
}
bool ConsumedBlockInfo::isBackEdge(const CFGBlock *From, const CFGBlock *To) {
assert(From && "From block must not be NULL");
assert(To && "From block must not be NULL");
return VisitOrder[From->getBlockID()] > VisitOrder[To->getBlockID()];
}
bool ConsumedBlockInfo::isBackEdgeTarget(const CFGBlock *Block) {
assert(Block != NULL && "Block pointer must not be NULL");
// Anything with less than two predecessors can't be the target of a back
// edge.
if (Block->pred_size() < 2)
return false;
unsigned int BlockVisitOrder = VisitOrder[Block->getBlockID()];
for (CFGBlock::const_pred_iterator PI = Block->pred_begin(),
PE = Block->pred_end(); PI != PE; ++PI) {
if (*PI && BlockVisitOrder < VisitOrder[(*PI)->getBlockID()])
return true;
}
return false;
}
void ConsumedStateMap::checkParamsForReturnTypestate(SourceLocation BlameLoc,
ConsumedWarningsHandlerBase &WarningsHandler) const {
ConsumedState ExpectedState;
for (VarMapType::const_iterator DMI = VarMap.begin(), DME = VarMap.end();
DMI != DME; ++DMI) {
if (isa<ParmVarDecl>(DMI->first)) {
const ParmVarDecl *Param = cast<ParmVarDecl>(DMI->first);
if (!Param->hasAttr<ReturnTypestateAttr>()) continue;
ExpectedState =
mapReturnTypestateAttrState(Param->getAttr<ReturnTypestateAttr>());
if (DMI->second != ExpectedState) {
WarningsHandler.warnParamReturnTypestateMismatch(BlameLoc,
Param->getNameAsString(), stateToString(ExpectedState),
stateToString(DMI->second));
}
}
}
}
void ConsumedStateMap::clearTemporaries() {
TmpMap.clear();
}
ConsumedState ConsumedStateMap::getState(const VarDecl *Var) const {
VarMapType::const_iterator Entry = VarMap.find(Var);
if (Entry != VarMap.end())
return Entry->second;
return CS_None;
}
ConsumedState
ConsumedStateMap::getState(const CXXBindTemporaryExpr *Tmp) const {
TmpMapType::const_iterator Entry = TmpMap.find(Tmp);
if (Entry != TmpMap.end())
return Entry->second;
return CS_None;
}
void ConsumedStateMap::intersect(const ConsumedStateMap *Other) {
ConsumedState LocalState;
if (this->From && this->From == Other->From && !Other->Reachable) {
this->markUnreachable();
return;
}
for (VarMapType::const_iterator DMI = Other->VarMap.begin(),
DME = Other->VarMap.end(); DMI != DME; ++DMI) {
LocalState = this->getState(DMI->first);
if (LocalState == CS_None)
continue;
if (LocalState != DMI->second)
VarMap[DMI->first] = CS_Unknown;
}
}
void ConsumedStateMap::intersectAtLoopHead(const CFGBlock *LoopHead,
const CFGBlock *LoopBack, const ConsumedStateMap *LoopBackStates,
ConsumedWarningsHandlerBase &WarningsHandler) {
ConsumedState LocalState;
SourceLocation BlameLoc = getLastStmtLoc(LoopBack);
for (VarMapType::const_iterator DMI = LoopBackStates->VarMap.begin(),
DME = LoopBackStates->VarMap.end(); DMI != DME; ++DMI) {
LocalState = this->getState(DMI->first);
if (LocalState == CS_None)
continue;
if (LocalState != DMI->second) {
VarMap[DMI->first] = CS_Unknown;
WarningsHandler.warnLoopStateMismatch(
BlameLoc, DMI->first->getNameAsString());
}
}
}
void ConsumedStateMap::markUnreachable() {
this->Reachable = false;
VarMap.clear();
TmpMap.clear();
}
void ConsumedStateMap::setState(const VarDecl *Var, ConsumedState State) {
VarMap[Var] = State;
}
void ConsumedStateMap::setState(const CXXBindTemporaryExpr *Tmp,
ConsumedState State) {
TmpMap[Tmp] = State;
}
void ConsumedStateMap::remove(const VarDecl *Var) {
VarMap.erase(Var);
}
bool ConsumedStateMap::operator!=(const ConsumedStateMap *Other) const {
for (VarMapType::const_iterator DMI = Other->VarMap.begin(),
DME = Other->VarMap.end(); DMI != DME; ++DMI) {
if (this->getState(DMI->first) != DMI->second)
return true;
}
return false;
}
void ConsumedAnalyzer::determineExpectedReturnState(AnalysisDeclContext &AC,
const FunctionDecl *D) {
QualType ReturnType;
if (const CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(D)) {
ASTContext &CurrContext = AC.getASTContext();
ReturnType = Constructor->getThisType(CurrContext)->getPointeeType();
} else
ReturnType = D->getCallResultType();
if (D->hasAttr<ReturnTypestateAttr>()) {
const ReturnTypestateAttr *RTSAttr = D->getAttr<ReturnTypestateAttr>();
const CXXRecordDecl *RD = ReturnType->getAsCXXRecordDecl();
if (!RD || !RD->hasAttr<ConsumableAttr>()) {
// FIXME: This should be removed when template instantiation propagates
// attributes at template specialization definition, not
// declaration. When it is removed the test needs to be enabled
// in SemaDeclAttr.cpp.
WarningsHandler.warnReturnTypestateForUnconsumableType(
RTSAttr->getLocation(), ReturnType.getAsString());
ExpectedReturnState = CS_None;
} else
ExpectedReturnState = mapReturnTypestateAttrState(RTSAttr);
} else if (isConsumableType(ReturnType))
ExpectedReturnState = mapConsumableAttrState(ReturnType);
else
ExpectedReturnState = CS_None;
}
bool ConsumedAnalyzer::splitState(const CFGBlock *CurrBlock,
const ConsumedStmtVisitor &Visitor) {
OwningPtr<ConsumedStateMap> FalseStates(new ConsumedStateMap(*CurrStates));
PropagationInfo PInfo;
if (const IfStmt *IfNode =
dyn_cast_or_null<IfStmt>(CurrBlock->getTerminator().getStmt())) {
const Stmt *Cond = IfNode->getCond();
PInfo = Visitor.getInfo(Cond);
if (!PInfo.isValid() && isa<BinaryOperator>(Cond))
PInfo = Visitor.getInfo(cast<BinaryOperator>(Cond)->getRHS());
if (PInfo.isVarTest()) {
CurrStates->setSource(Cond);
FalseStates->setSource(Cond);
splitVarStateForIf(IfNode, PInfo.getVarTest(), CurrStates,
FalseStates.get());
} else if (PInfo.isBinTest()) {
CurrStates->setSource(PInfo.testSourceNode());
FalseStates->setSource(PInfo.testSourceNode());
splitVarStateForIfBinOp(PInfo, CurrStates, FalseStates.get());
} else {
return false;
}
} else if (const BinaryOperator *BinOp =
dyn_cast_or_null<BinaryOperator>(CurrBlock->getTerminator().getStmt())) {
PInfo = Visitor.getInfo(BinOp->getLHS());
if (!PInfo.isVarTest()) {
if ((BinOp = dyn_cast_or_null<BinaryOperator>(BinOp->getLHS()))) {
PInfo = Visitor.getInfo(BinOp->getRHS());
if (!PInfo.isVarTest())
return false;
} else {
return false;
}
}
CurrStates->setSource(BinOp);
FalseStates->setSource(BinOp);
const VarTestResult &Test = PInfo.getVarTest();
ConsumedState VarState = CurrStates->getState(Test.Var);
if (BinOp->getOpcode() == BO_LAnd) {
if (VarState == CS_Unknown)
CurrStates->setState(Test.Var, Test.TestsFor);
else if (VarState == invertConsumedUnconsumed(Test.TestsFor))
CurrStates->markUnreachable();
} else if (BinOp->getOpcode() == BO_LOr) {
if (VarState == CS_Unknown)
FalseStates->setState(Test.Var,
invertConsumedUnconsumed(Test.TestsFor));
else if (VarState == Test.TestsFor)
FalseStates->markUnreachable();
}
} else {
return false;
}
CFGBlock::const_succ_iterator SI = CurrBlock->succ_begin();
if (*SI)
BlockInfo.addInfo(*SI, CurrStates);
else
delete CurrStates;
if (*++SI)
BlockInfo.addInfo(*SI, FalseStates.take());
CurrStates = NULL;
return true;
}
void ConsumedAnalyzer::run(AnalysisDeclContext &AC) {
const FunctionDecl *D = dyn_cast_or_null<FunctionDecl>(AC.getDecl());
if (!D)
return;
CFG *CFGraph = AC.getCFG();
if (!CFGraph)
return;
determineExpectedReturnState(AC, D);
PostOrderCFGView *SortedGraph = AC.getAnalysis<PostOrderCFGView>();
// AC.getCFG()->viewCFG(LangOptions());
BlockInfo = ConsumedBlockInfo(CFGraph->getNumBlockIDs(), SortedGraph);
CurrStates = new ConsumedStateMap();
ConsumedStmtVisitor Visitor(AC, *this, CurrStates);
// Add all trackable parameters to the state map.
for (FunctionDecl::param_const_iterator PI = D->param_begin(),
PE = D->param_end(); PI != PE; ++PI) {
Visitor.VisitParmVarDecl(*PI);
}
// Visit all of the function's basic blocks.
for (PostOrderCFGView::iterator I = SortedGraph->begin(),
E = SortedGraph->end(); I != E; ++I) {
const CFGBlock *CurrBlock = *I;
if (CurrStates == NULL)
CurrStates = BlockInfo.getInfo(CurrBlock);
if (!CurrStates) {
continue;
} else if (!CurrStates->isReachable()) {
delete CurrStates;
CurrStates = NULL;
continue;
}
Visitor.reset(CurrStates);
// Visit all of the basic block's statements.
for (CFGBlock::const_iterator BI = CurrBlock->begin(),
BE = CurrBlock->end(); BI != BE; ++BI) {
switch (BI->getKind()) {
case CFGElement::Statement:
Visitor.Visit(BI->castAs<CFGStmt>().getStmt());
break;
case CFGElement::TemporaryDtor: {
const CFGTemporaryDtor DTor = BI->castAs<CFGTemporaryDtor>();
const CXXBindTemporaryExpr *BTE = DTor.getBindTemporaryExpr();
Visitor.checkCallability(PropagationInfo(BTE),
DTor.getDestructorDecl(AC.getASTContext()),
BTE->getExprLoc());
break;
}
case CFGElement::AutomaticObjectDtor: {
const CFGAutomaticObjDtor DTor = BI->castAs<CFGAutomaticObjDtor>();
SourceLocation Loc = DTor.getTriggerStmt()->getLocEnd();
const VarDecl *Var = DTor.getVarDecl();
Visitor.checkCallability(PropagationInfo(Var),
DTor.getDestructorDecl(AC.getASTContext()),
Loc);
break;
}
default:
break;
}
}
CurrStates->clearTemporaries();
// TODO: Handle other forms of branching with precision, including while-
// and for-loops. (Deferred)
if (!splitState(CurrBlock, Visitor)) {
CurrStates->setSource(NULL);
if (CurrBlock->succ_size() > 1 ||
(CurrBlock->succ_size() == 1 &&
(*CurrBlock->succ_begin())->pred_size() > 1)) {
bool OwnershipTaken = false;
for (CFGBlock::const_succ_iterator SI = CurrBlock->succ_begin(),
SE = CurrBlock->succ_end(); SI != SE; ++SI) {
if (*SI == NULL) continue;
if (BlockInfo.isBackEdge(CurrBlock, *SI)) {
BlockInfo.borrowInfo(*SI)->intersectAtLoopHead(*SI, CurrBlock,
CurrStates,
WarningsHandler);
if (BlockInfo.allBackEdgesVisited(*SI, CurrBlock))
BlockInfo.discardInfo(*SI);
} else {
BlockInfo.addInfo(*SI, CurrStates, OwnershipTaken);
}
}
if (!OwnershipTaken)
delete CurrStates;
CurrStates = NULL;
}
}
if (CurrBlock == &AC.getCFG()->getExit() &&
D->getCallResultType()->isVoidType())
CurrStates->checkParamsForReturnTypestate(D->getLocation(),
WarningsHandler);
} // End of block iterator.
// Delete the last existing state map.
delete CurrStates;
WarningsHandler.emitDiagnostics();
}
}} // end namespace clang::consumed
Index: head/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Basic/Targets.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/Basic/Targets.cpp (revision 265925)
@@ -1,5925 +1,5935 @@
//===--- Targets.cpp - Implement -arch option and targets -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements construction of a TargetInfo object from a
// target triple.
//
//===----------------------------------------------------------------------===//
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetOptions.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
using namespace clang;
//===----------------------------------------------------------------------===//
// Common code shared among targets.
//===----------------------------------------------------------------------===//
/// DefineStd - Define a macro name and standard variants. For example if
/// MacroName is "unix", then this will define "__unix", "__unix__", and "unix"
/// when in GNU mode.
static void DefineStd(MacroBuilder &Builder, StringRef MacroName,
const LangOptions &Opts) {
assert(MacroName[0] != '_' && "Identifier should be in the user's namespace");
// If in GNU mode (e.g. -std=gnu99 but not -std=c99) define the raw identifier
// in the user's namespace.
if (Opts.GNUMode)
Builder.defineMacro(MacroName);
// Define __unix.
Builder.defineMacro("__" + MacroName);
// Define __unix__.
Builder.defineMacro("__" + MacroName + "__");
}
static void defineCPUMacros(MacroBuilder &Builder, StringRef CPUName,
bool Tuning = true) {
Builder.defineMacro("__" + CPUName);
Builder.defineMacro("__" + CPUName + "__");
if (Tuning)
Builder.defineMacro("__tune_" + CPUName + "__");
}
//===----------------------------------------------------------------------===//
// Defines specific to certain operating systems.
//===----------------------------------------------------------------------===//
namespace {
template<typename TgtInfo>
class OSTargetInfo : public TgtInfo {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const=0;
public:
OSTargetInfo(const llvm::Triple &Triple) : TgtInfo(Triple) {}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
TgtInfo::getTargetDefines(Opts, Builder);
getOSDefines(Opts, TgtInfo::getTriple(), Builder);
}
};
} // end anonymous namespace
static void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
const llvm::Triple &Triple,
StringRef &PlatformName,
VersionTuple &PlatformMinVersion) {
Builder.defineMacro("__APPLE_CC__", "6000");
Builder.defineMacro("__APPLE__");
Builder.defineMacro("__MACH__");
Builder.defineMacro("OBJC_NEW_PROPERTIES");
// AddressSanitizer doesn't play well with source fortification, which is on
// by default on Darwin.
if (Opts.Sanitize.Address) Builder.defineMacro("_FORTIFY_SOURCE", "0");
if (!Opts.ObjCAutoRefCount) {
// __weak is always defined, for use in blocks and with objc pointers.
Builder.defineMacro("__weak", "__attribute__((objc_gc(weak)))");
// Darwin defines __strong even in C mode (just to nothing).
if (Opts.getGC() != LangOptions::NonGC)
Builder.defineMacro("__strong", "__attribute__((objc_gc(strong)))");
else
Builder.defineMacro("__strong", "");
// __unsafe_unretained is defined to nothing in non-ARC mode. We even
// allow this in C, since one might have block pointers in structs that
// are used in pure C code and in Objective-C ARC.
Builder.defineMacro("__unsafe_unretained", "");
}
if (Opts.Static)
Builder.defineMacro("__STATIC__");
else
Builder.defineMacro("__DYNAMIC__");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
// Get the platform type and version number from the triple.
unsigned Maj, Min, Rev;
if (Triple.isMacOSX()) {
Triple.getMacOSXVersion(Maj, Min, Rev);
PlatformName = "macosx";
} else {
Triple.getOSVersion(Maj, Min, Rev);
PlatformName = llvm::Triple::getOSTypeName(Triple.getOS());
}
// If -target arch-pc-win32-macho option specified, we're
// generating code for Win32 ABI. No need to emit
// __ENVIRONMENT_XX_OS_VERSION_MIN_REQUIRED__.
if (PlatformName == "win32") {
PlatformMinVersion = VersionTuple(Maj, Min, Rev);
return;
}
// If there's an environment specified in the triple, that means we're dealing
// with an embedded variant of some sort and don't want the platform
// version-min defines, so only add them if there's not one.
if (Triple.getEnvironmentName().empty()) {
// Set the appropriate OS version define.
if (Triple.isiOS()) {
assert(Maj < 10 && Min < 100 && Rev < 100 && "Invalid version!");
char Str[6];
Str[0] = '0' + Maj;
Str[1] = '0' + (Min / 10);
Str[2] = '0' + (Min % 10);
Str[3] = '0' + (Rev / 10);
Str[4] = '0' + (Rev % 10);
Str[5] = '\0';
Builder.defineMacro("__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__",
Str);
} else {
// Note that the Driver allows versions which aren't representable in the
// define (because we only get a single digit for the minor and micro
// revision numbers). So, we limit them to the maximum representable
// version.
assert(Triple.getEnvironmentName().empty() && "Invalid environment!");
assert(Maj < 100 && Min < 100 && Rev < 100 && "Invalid version!");
char Str[5];
Str[0] = '0' + (Maj / 10);
Str[1] = '0' + (Maj % 10);
Str[2] = '0' + std::min(Min, 9U);
Str[3] = '0' + std::min(Rev, 9U);
Str[4] = '\0';
Builder.defineMacro("__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__", Str);
}
}
PlatformMinVersion = VersionTuple(Maj, Min, Rev);
}
namespace {
template<typename Target>
class DarwinTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
getDarwinDefines(Builder, Opts, Triple, this->PlatformName,
this->PlatformMinVersion);
}
public:
DarwinTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->TLSSupported = Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 7);
this->MCountName = "\01mcount";
}
virtual std::string isValidSectionSpecifier(StringRef SR) const {
// Let MCSectionMachO validate this.
StringRef Segment, Section;
unsigned TAA, StubSize;
bool HasTAA;
return llvm::MCSectionMachO::ParseSectionSpecifier(SR, Segment, Section,
TAA, HasTAA, StubSize);
}
virtual const char *getStaticInitSectionSpecifier() const {
// FIXME: We should return 0 when building kexts.
return "__TEXT,__StaticInit,regular,pure_instructions";
}
/// Darwin does not support protected visibility. Darwin's "default"
/// is very similar to ELF's "protected"; Darwin requires a "weak"
/// attribute on declarations that can be dynamically replaced.
virtual bool hasProtectedVisibility() const {
return false;
}
};
// DragonFlyBSD Target
template<typename Target>
class DragonFlyBSDTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// DragonFly defines; list based off of gcc output
Builder.defineMacro("__DragonFly__");
Builder.defineMacro("__DragonFly_cc_version", "100001");
Builder.defineMacro("__ELF__");
Builder.defineMacro("__KPRINTF_ATTRIBUTE__");
Builder.defineMacro("__tune_i386__");
DefineStd(Builder, "unix", Opts);
}
public:
DragonFlyBSDTargetInfo(const llvm::Triple &Triple)
: OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
switch (Triple.getArch()) {
default:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
this->MCountName = ".mcount";
break;
}
}
};
// FreeBSD Target
template<typename Target>
class FreeBSDTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// FreeBSD defines; list based off of gcc output
unsigned Release = Triple.getOSMajorVersion();
if (Release == 0U)
Release = 8;
Builder.defineMacro("__FreeBSD__", Twine(Release));
Builder.defineMacro("__FreeBSD_cc_version", Twine(Release * 100000U + 1U));
Builder.defineMacro("__KPRINTF_ATTRIBUTE__");
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
// On FreeBSD, wchar_t contains the number of the code point as
// used by the character set of the locale. These character sets are
// not necessarily a superset of ASCII.
Builder.defineMacro("__STDC_MB_MIGHT_NEQ_WC__", "1");
}
public:
FreeBSDTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
switch (Triple.getArch()) {
default:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
this->MCountName = ".mcount";
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
this->MCountName = "_mcount";
break;
case llvm::Triple::arm:
this->MCountName = "__mcount";
break;
}
}
};
// GNU/kFreeBSD Target
template<typename Target>
class KFreeBSDTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// GNU/kFreeBSD defines; list based off of gcc output
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__FreeBSD_kernel__");
Builder.defineMacro("__GLIBC__");
Builder.defineMacro("__ELF__");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
}
public:
KFreeBSDTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
}
};
// Minix Target
template<typename Target>
class MinixTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// Minix defines
Builder.defineMacro("__minix", "3");
Builder.defineMacro("_EM_WSIZE", "4");
Builder.defineMacro("_EM_PSIZE", "4");
Builder.defineMacro("_EM_SSIZE", "2");
Builder.defineMacro("_EM_LSIZE", "4");
Builder.defineMacro("_EM_FSIZE", "4");
Builder.defineMacro("_EM_DSIZE", "8");
Builder.defineMacro("__ELF__");
DefineStd(Builder, "unix", Opts);
}
public:
MinixTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
}
};
// Linux target
template<typename Target>
class LinuxTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// Linux defines; list based off of gcc output
DefineStd(Builder, "unix", Opts);
DefineStd(Builder, "linux", Opts);
Builder.defineMacro("__gnu_linux__");
Builder.defineMacro("__ELF__");
if (Triple.getEnvironment() == llvm::Triple::Android)
Builder.defineMacro("__ANDROID__", "1");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
}
public:
LinuxTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
this->WIntType = TargetInfo::UnsignedInt;
+
+ switch (Triple.getArch()) {
+ default:
+ break;
+ case llvm::Triple::ppc:
+ case llvm::Triple::ppc64:
+ case llvm::Triple::ppc64le:
+ this->MCountName = "_mcount";
+ break;
+ }
}
virtual const char *getStaticInitSectionSpecifier() const {
return ".text.startup";
}
};
// NetBSD Target
template<typename Target>
class NetBSDTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// NetBSD defines; list based off of gcc output
Builder.defineMacro("__NetBSD__");
Builder.defineMacro("__unix__");
Builder.defineMacro("__ELF__");
if (Opts.POSIXThreads)
Builder.defineMacro("_POSIX_THREADS");
}
public:
NetBSDTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
}
};
// OpenBSD Target
template<typename Target>
class OpenBSDTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// OpenBSD defines; list based off of gcc output
Builder.defineMacro("__OpenBSD__");
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
}
public:
OpenBSDTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
this->TLSSupported = false;
switch (Triple.getArch()) {
default:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
case llvm::Triple::arm:
case llvm::Triple::sparc:
this->MCountName = "__mcount";
break;
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::ppc:
case llvm::Triple::sparcv9:
this->MCountName = "_mcount";
break;
}
}
};
// Bitrig Target
template<typename Target>
class BitrigTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// Bitrig defines; list based off of gcc output
Builder.defineMacro("__Bitrig__");
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
}
public:
BitrigTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
this->TLSSupported = false;
this->MCountName = "__mcount";
}
};
// PSP Target
template<typename Target>
class PSPTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// PSP defines; list based on the output of the pspdev gcc toolchain.
Builder.defineMacro("PSP");
Builder.defineMacro("_PSP");
Builder.defineMacro("__psp__");
Builder.defineMacro("__ELF__");
}
public:
PSPTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
}
};
// PS3 PPU Target
template<typename Target>
class PS3PPUTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// PS3 PPU defines.
Builder.defineMacro("__PPC__");
Builder.defineMacro("__PPU__");
Builder.defineMacro("__CELLOS_LV2__");
Builder.defineMacro("__ELF__");
Builder.defineMacro("__LP32__");
Builder.defineMacro("_ARCH_PPC64");
Builder.defineMacro("__powerpc64__");
}
public:
PS3PPUTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
this->LongWidth = this->LongAlign = 32;
this->PointerWidth = this->PointerAlign = 32;
this->IntMaxType = TargetInfo::SignedLongLong;
this->UIntMaxType = TargetInfo::UnsignedLongLong;
this->Int64Type = TargetInfo::SignedLongLong;
this->SizeType = TargetInfo::UnsignedInt;
this->DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32";
}
};
// FIXME: Need a real SPU target.
// PS3 SPU Target
template<typename Target>
class PS3SPUTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// PS3 PPU defines.
Builder.defineMacro("__SPU__");
Builder.defineMacro("__ELF__");
}
public:
PS3SPUTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
}
};
// AuroraUX target
template<typename Target>
class AuroraUXTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
DefineStd(Builder, "sun", Opts);
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
Builder.defineMacro("__svr4__");
Builder.defineMacro("__SVR4");
}
public:
AuroraUXTargetInfo(const llvm::Triple &Triple)
: OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
this->WCharType = this->SignedLong;
// FIXME: WIntType should be SignedLong
}
};
// Solaris target
template<typename Target>
class SolarisTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
DefineStd(Builder, "sun", Opts);
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
Builder.defineMacro("__svr4__");
Builder.defineMacro("__SVR4");
// Solaris headers require _XOPEN_SOURCE to be set to 600 for C99 and
// newer, but to 500 for everything else. feature_test.h has a check to
// ensure that you are not using C99 with an old version of X/Open or C89
// with a new version.
if (Opts.C99 || Opts.C11)
Builder.defineMacro("_XOPEN_SOURCE", "600");
else
Builder.defineMacro("_XOPEN_SOURCE", "500");
if (Opts.CPlusPlus)
Builder.defineMacro("__C99FEATURES__");
Builder.defineMacro("_LARGEFILE_SOURCE");
Builder.defineMacro("_LARGEFILE64_SOURCE");
Builder.defineMacro("__EXTENSIONS__");
Builder.defineMacro("_REENTRANT");
}
public:
SolarisTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
this->WCharType = this->SignedInt;
// FIXME: WIntType should be SignedLong
}
};
// Windows target
template<typename Target>
class WindowsTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
Builder.defineMacro("_WIN32");
}
void getVisualStudioDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
if (Opts.CPlusPlus) {
if (Opts.RTTI)
Builder.defineMacro("_CPPRTTI");
if (Opts.Exceptions)
Builder.defineMacro("_CPPUNWIND");
}
if (!Opts.CharIsSigned)
Builder.defineMacro("_CHAR_UNSIGNED");
// FIXME: POSIXThreads isn't exactly the option this should be defined for,
// but it works for now.
if (Opts.POSIXThreads)
Builder.defineMacro("_MT");
if (Opts.MSCVersion != 0)
Builder.defineMacro("_MSC_VER", Twine(Opts.MSCVersion));
if (Opts.MicrosoftExt) {
Builder.defineMacro("_MSC_EXTENSIONS");
if (Opts.CPlusPlus11) {
Builder.defineMacro("_RVALUE_REFERENCES_V2_SUPPORTED");
Builder.defineMacro("_RVALUE_REFERENCES_SUPPORTED");
Builder.defineMacro("_NATIVE_NULLPTR_SUPPORTED");
}
}
Builder.defineMacro("_INTEGRAL_MAX_BITS", "64");
}
public:
WindowsTargetInfo(const llvm::Triple &Triple)
: OSTargetInfo<Target>(Triple) {}
};
template <typename Target>
class NaClTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
Builder.defineMacro("__native_client__");
}
public:
NaClTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
this->LongAlign = 32;
this->LongWidth = 32;
this->PointerAlign = 32;
this->PointerWidth = 32;
this->IntMaxType = TargetInfo::SignedLongLong;
this->UIntMaxType = TargetInfo::UnsignedLongLong;
this->Int64Type = TargetInfo::SignedLongLong;
this->DoubleAlign = 64;
this->LongDoubleWidth = 64;
this->LongDoubleAlign = 64;
this->SizeType = TargetInfo::UnsignedInt;
this->PtrDiffType = TargetInfo::SignedInt;
this->IntPtrType = TargetInfo::SignedInt;
// RegParmMax is inherited from the underlying architecture
this->LongDoubleFormat = &llvm::APFloat::IEEEdouble;
this->DescriptionString = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
"f32:32:32-f64:64:64-p:32:32:32-v128:32:32";
}
virtual typename Target::CallingConvCheckResult checkCallingConvention(
CallingConv CC) const {
return CC == CC_PnaclCall ? Target::CCCR_OK :
Target::checkCallingConvention(CC);
}
};
} // end anonymous namespace.
//===----------------------------------------------------------------------===//
// Specific target implementations.
//===----------------------------------------------------------------------===//
namespace {
// PPC abstract base class
class PPCTargetInfo : public TargetInfo {
static const Builtin::Info BuiltinInfo[];
static const char * const GCCRegNames[];
static const TargetInfo::GCCRegAlias GCCRegAliases[];
std::string CPU;
// Target cpu features.
bool HasVSX;
public:
PPCTargetInfo(const llvm::Triple &Triple)
: TargetInfo(Triple), HasVSX(false) {
BigEndian = (Triple.getArch() != llvm::Triple::ppc64le);
LongDoubleWidth = LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble;
}
/// \brief Flags for architecture specific defines.
typedef enum {
ArchDefineNone = 0,
ArchDefineName = 1 << 0, // <name> is substituted for arch name.
ArchDefinePpcgr = 1 << 1,
ArchDefinePpcsq = 1 << 2,
ArchDefine440 = 1 << 3,
ArchDefine603 = 1 << 4,
ArchDefine604 = 1 << 5,
ArchDefinePwr4 = 1 << 6,
ArchDefinePwr5 = 1 << 7,
ArchDefinePwr5x = 1 << 8,
ArchDefinePwr6 = 1 << 9,
ArchDefinePwr6x = 1 << 10,
ArchDefinePwr7 = 1 << 11,
ArchDefineA2 = 1 << 12,
ArchDefineA2q = 1 << 13
} ArchDefineTypes;
// Note: GCC recognizes the following additional cpus:
// 401, 403, 405, 405fp, 440fp, 464, 464fp, 476, 476fp, 505, 740, 801,
// 821, 823, 8540, 8548, e300c2, e300c3, e500mc64, e6500, 860, cell,
// titan, rs64.
virtual bool setCPU(const std::string &Name) {
bool CPUKnown = llvm::StringSwitch<bool>(Name)
.Case("generic", true)
.Case("440", true)
.Case("450", true)
.Case("601", true)
.Case("602", true)
.Case("603", true)
.Case("603e", true)
.Case("603ev", true)
.Case("604", true)
.Case("604e", true)
.Case("620", true)
.Case("630", true)
.Case("g3", true)
.Case("7400", true)
.Case("g4", true)
.Case("7450", true)
.Case("g4+", true)
.Case("750", true)
.Case("970", true)
.Case("g5", true)
.Case("a2", true)
.Case("a2q", true)
.Case("e500mc", true)
.Case("e5500", true)
.Case("power3", true)
.Case("pwr3", true)
.Case("power4", true)
.Case("pwr4", true)
.Case("power5", true)
.Case("pwr5", true)
.Case("power5x", true)
.Case("pwr5x", true)
.Case("power6", true)
.Case("pwr6", true)
.Case("power6x", true)
.Case("pwr6x", true)
.Case("power7", true)
.Case("pwr7", true)
.Case("powerpc", true)
.Case("ppc", true)
.Case("powerpc64", true)
.Case("ppc64", true)
.Case("powerpc64le", true)
.Case("ppc64le", true)
.Default(false);
if (CPUKnown)
CPU = Name;
return CPUKnown;
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = BuiltinInfo;
NumRecords = clang::PPC::LastTSBuiltin-Builtin::FirstTSBuiltin;
}
virtual bool isCLZForZeroUndef() const { return false; }
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const;
virtual void getDefaultFeatures(llvm::StringMap<bool> &Features) const;
virtual bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags);
virtual bool hasFeature(StringRef Feature) const;
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const;
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &Info) const {
switch (*Name) {
default: return false;
case 'O': // Zero
break;
case 'b': // Base register
case 'f': // Floating point register
Info.setAllowsRegister();
break;
// FIXME: The following are added to allow parsing.
// I just took a guess at what the actions should be.
// Also, is more specific checking needed? I.e. specific registers?
case 'd': // Floating point register (containing 64-bit value)
case 'v': // Altivec vector register
Info.setAllowsRegister();
break;
case 'w':
switch (Name[1]) {
case 'd':// VSX vector register to hold vector double data
case 'f':// VSX vector register to hold vector float data
case 's':// VSX vector register to hold scalar float data
case 'a':// Any VSX register
break;
default:
return false;
}
Info.setAllowsRegister();
Name++; // Skip over 'w'.
break;
case 'h': // `MQ', `CTR', or `LINK' register
case 'q': // `MQ' register
case 'c': // `CTR' register
case 'l': // `LINK' register
case 'x': // `CR' register (condition register) number 0
case 'y': // `CR' register (condition register)
case 'z': // `XER[CA]' carry bit (part of the XER register)
Info.setAllowsRegister();
break;
case 'I': // Signed 16-bit constant
case 'J': // Unsigned 16-bit constant shifted left 16 bits
// (use `L' instead for SImode constants)
case 'K': // Unsigned 16-bit constant
case 'L': // Signed 16-bit constant shifted left 16 bits
case 'M': // Constant larger than 31
case 'N': // Exact power of 2
case 'P': // Constant whose negation is a signed 16-bit constant
case 'G': // Floating point constant that can be loaded into a
// register with one instruction per word
case 'H': // Integer/Floating point constant that can be loaded
// into a register using three instructions
break;
case 'm': // Memory operand. Note that on PowerPC targets, m can
// include addresses that update the base register. It
// is therefore only safe to use `m' in an asm statement
// if that asm statement accesses the operand exactly once.
// The asm statement must also use `%U<opno>' as a
// placeholder for the "update" flag in the corresponding
// load or store instruction. For example:
// asm ("st%U0 %1,%0" : "=m" (mem) : "r" (val));
// is correct but:
// asm ("st %1,%0" : "=m" (mem) : "r" (val));
// is not. Use es rather than m if you don't want the base
// register to be updated.
case 'e':
if (Name[1] != 's')
return false;
// es: A "stable" memory operand; that is, one which does not
// include any automodification of the base register. Unlike
// `m', this constraint can be used in asm statements that
// might access the operand several times, or that might not
// access it at all.
Info.setAllowsMemory();
Name++; // Skip over 'e'.
break;
case 'Q': // Memory operand that is an offset from a register (it is
// usually better to use `m' or `es' in asm statements)
case 'Z': // Memory operand that is an indexed or indirect from a
// register (it is usually better to use `m' or `es' in
// asm statements)
Info.setAllowsMemory();
Info.setAllowsRegister();
break;
case 'R': // AIX TOC entry
case 'a': // Address operand that is an indexed or indirect from a
// register (`p' is preferable for asm statements)
case 'S': // Constant suitable as a 64-bit mask operand
case 'T': // Constant suitable as a 32-bit mask operand
case 'U': // System V Release 4 small data area reference
case 't': // AND masks that can be performed by two rldic{l, r}
// instructions
case 'W': // Vector constant that does not require memory
case 'j': // Vector constant that is all zeros.
break;
// End FIXME.
}
return true;
}
virtual const char *getClobbers() const {
return "";
}
int getEHDataRegisterNumber(unsigned RegNo) const {
if (RegNo == 0) return 3;
if (RegNo == 1) return 4;
return -1;
}
};
const Builtin::Info PPCTargetInfo::BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
ALL_LANGUAGES },
#include "clang/Basic/BuiltinsPPC.def"
};
/// handleTargetFeatures - Perform initialization based on the user
/// configured set of features.
bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) {
// Remember the maximum enabled sselevel.
for (unsigned i = 0, e = Features.size(); i !=e; ++i) {
// Ignore disabled features.
if (Features[i][0] == '-')
continue;
StringRef Feature = StringRef(Features[i]).substr(1);
if (Feature == "vsx") {
HasVSX = true;
continue;
}
// TODO: Finish this list and add an assert that we've handled them
// all.
}
return true;
}
/// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific
/// #defines that are not tied to a specific subtarget.
void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Target identification.
Builder.defineMacro("__ppc__");
Builder.defineMacro("__PPC__");
Builder.defineMacro("_ARCH_PPC");
Builder.defineMacro("__powerpc__");
Builder.defineMacro("__POWERPC__");
if (PointerWidth == 64) {
Builder.defineMacro("_ARCH_PPC64");
Builder.defineMacro("__powerpc64__");
Builder.defineMacro("__ppc64__");
Builder.defineMacro("__PPC64__");
}
// Target properties.
if (getTriple().getArch() == llvm::Triple::ppc64le) {
Builder.defineMacro("_LITTLE_ENDIAN");
Builder.defineMacro("__LITTLE_ENDIAN__");
} else {
if (getTriple().getOS() != llvm::Triple::NetBSD &&
getTriple().getOS() != llvm::Triple::OpenBSD)
Builder.defineMacro("_BIG_ENDIAN");
Builder.defineMacro("__BIG_ENDIAN__");
}
// Subtarget options.
Builder.defineMacro("__NATURAL_ALIGNMENT__");
Builder.defineMacro("__REGISTER_PREFIX__", "");
// FIXME: Should be controlled by command line option.
if (LongDoubleWidth == 128)
Builder.defineMacro("__LONG_DOUBLE_128__");
if (Opts.AltiVec) {
Builder.defineMacro("__VEC__", "10206");
Builder.defineMacro("__ALTIVEC__");
}
// CPU identification.
ArchDefineTypes defs = (ArchDefineTypes)llvm::StringSwitch<int>(CPU)
.Case("440", ArchDefineName)
.Case("450", ArchDefineName | ArchDefine440)
.Case("601", ArchDefineName)
.Case("602", ArchDefineName | ArchDefinePpcgr)
.Case("603", ArchDefineName | ArchDefinePpcgr)
.Case("603e", ArchDefineName | ArchDefine603 | ArchDefinePpcgr)
.Case("603ev", ArchDefineName | ArchDefine603 | ArchDefinePpcgr)
.Case("604", ArchDefineName | ArchDefinePpcgr)
.Case("604e", ArchDefineName | ArchDefine604 | ArchDefinePpcgr)
.Case("620", ArchDefineName | ArchDefinePpcgr)
.Case("630", ArchDefineName | ArchDefinePpcgr)
.Case("7400", ArchDefineName | ArchDefinePpcgr)
.Case("7450", ArchDefineName | ArchDefinePpcgr)
.Case("750", ArchDefineName | ArchDefinePpcgr)
.Case("970", ArchDefineName | ArchDefinePwr4 | ArchDefinePpcgr
| ArchDefinePpcsq)
.Case("a2", ArchDefineA2)
.Case("a2q", ArchDefineName | ArchDefineA2 | ArchDefineA2q)
.Case("pwr3", ArchDefinePpcgr)
.Case("pwr4", ArchDefineName | ArchDefinePpcgr | ArchDefinePpcsq)
.Case("pwr5", ArchDefineName | ArchDefinePwr4 | ArchDefinePpcgr
| ArchDefinePpcsq)
.Case("pwr5x", ArchDefineName | ArchDefinePwr5 | ArchDefinePwr4
| ArchDefinePpcgr | ArchDefinePpcsq)
.Case("pwr6", ArchDefineName | ArchDefinePwr5x | ArchDefinePwr5
| ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
.Case("pwr6x", ArchDefineName | ArchDefinePwr6 | ArchDefinePwr5x
| ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr
| ArchDefinePpcsq)
.Case("pwr7", ArchDefineName | ArchDefinePwr6x | ArchDefinePwr6
| ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4
| ArchDefinePwr6 | ArchDefinePpcgr | ArchDefinePpcsq)
.Case("power3", ArchDefinePpcgr)
.Case("power4", ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
.Case("power5", ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr
| ArchDefinePpcsq)
.Case("power5x", ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4
| ArchDefinePpcgr | ArchDefinePpcsq)
.Case("power6", ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5
| ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
.Case("power6x", ArchDefinePwr6x | ArchDefinePwr6 | ArchDefinePwr5x
| ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr
| ArchDefinePpcsq)
.Case("power7", ArchDefinePwr7 | ArchDefinePwr6x | ArchDefinePwr6
| ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4
| ArchDefinePwr6 | ArchDefinePpcgr | ArchDefinePpcsq)
.Default(ArchDefineNone);
if (defs & ArchDefineName)
Builder.defineMacro(Twine("_ARCH_", StringRef(CPU).upper()));
if (defs & ArchDefinePpcgr)
Builder.defineMacro("_ARCH_PPCGR");
if (defs & ArchDefinePpcsq)
Builder.defineMacro("_ARCH_PPCSQ");
if (defs & ArchDefine440)
Builder.defineMacro("_ARCH_440");
if (defs & ArchDefine603)
Builder.defineMacro("_ARCH_603");
if (defs & ArchDefine604)
Builder.defineMacro("_ARCH_604");
if (defs & ArchDefinePwr4)
Builder.defineMacro("_ARCH_PWR4");
if (defs & ArchDefinePwr5)
Builder.defineMacro("_ARCH_PWR5");
if (defs & ArchDefinePwr5x)
Builder.defineMacro("_ARCH_PWR5X");
if (defs & ArchDefinePwr6)
Builder.defineMacro("_ARCH_PWR6");
if (defs & ArchDefinePwr6x)
Builder.defineMacro("_ARCH_PWR6X");
if (defs & ArchDefinePwr7)
Builder.defineMacro("_ARCH_PWR7");
if (defs & ArchDefineA2)
Builder.defineMacro("_ARCH_A2");
if (defs & ArchDefineA2q) {
Builder.defineMacro("_ARCH_A2Q");
Builder.defineMacro("_ARCH_QP");
}
if (getTriple().getVendor() == llvm::Triple::BGQ) {
Builder.defineMacro("__bg__");
Builder.defineMacro("__THW_BLUEGENE__");
Builder.defineMacro("__bgq__");
Builder.defineMacro("__TOS_BGQ__");
}
if (HasVSX)
Builder.defineMacro("__VSX__");
// FIXME: The following are not yet generated here by Clang, but are
// generated by GCC:
//
// _SOFT_FLOAT_
// __RECIP_PRECISION__
// __APPLE_ALTIVEC__
// __RECIP__
// __RECIPF__
// __RSQRTE__
// __RSQRTEF__
// _SOFT_DOUBLE_
// __NO_LWSYNC__
// __HAVE_BSWAP__
// __LONGDOUBLE128
// __CMODEL_MEDIUM__
// __CMODEL_LARGE__
// _CALL_SYSV
// _CALL_DARWIN
// __NO_FPRS__
}
void PPCTargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
Features["altivec"] = llvm::StringSwitch<bool>(CPU)
.Case("7400", true)
.Case("g4", true)
.Case("7450", true)
.Case("g4+", true)
.Case("970", true)
.Case("g5", true)
.Case("pwr6", true)
.Case("pwr7", true)
.Case("ppc64", true)
.Case("ppc64le", true)
.Default(false);
Features["qpx"] = (CPU == "a2q");
}
bool PPCTargetInfo::hasFeature(StringRef Feature) const {
return Feature == "powerpc";
}
const char * const PPCTargetInfo::GCCRegNames[] = {
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
"mq", "lr", "ctr", "ap",
"cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",
"xer",
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
"vrsave", "vscr",
"spe_acc", "spefscr",
"sfp"
};
void PPCTargetInfo::getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
const TargetInfo::GCCRegAlias PPCTargetInfo::GCCRegAliases[] = {
// While some of these aliases do map to different registers
// they still share the same register name.
{ { "0" }, "r0" },
{ { "1"}, "r1" },
{ { "2" }, "r2" },
{ { "3" }, "r3" },
{ { "4" }, "r4" },
{ { "5" }, "r5" },
{ { "6" }, "r6" },
{ { "7" }, "r7" },
{ { "8" }, "r8" },
{ { "9" }, "r9" },
{ { "10" }, "r10" },
{ { "11" }, "r11" },
{ { "12" }, "r12" },
{ { "13" }, "r13" },
{ { "14" }, "r14" },
{ { "15" }, "r15" },
{ { "16" }, "r16" },
{ { "17" }, "r17" },
{ { "18" }, "r18" },
{ { "19" }, "r19" },
{ { "20" }, "r20" },
{ { "21" }, "r21" },
{ { "22" }, "r22" },
{ { "23" }, "r23" },
{ { "24" }, "r24" },
{ { "25" }, "r25" },
{ { "26" }, "r26" },
{ { "27" }, "r27" },
{ { "28" }, "r28" },
{ { "29" }, "r29" },
{ { "30" }, "r30" },
{ { "31" }, "r31" },
{ { "fr0" }, "f0" },
{ { "fr1" }, "f1" },
{ { "fr2" }, "f2" },
{ { "fr3" }, "f3" },
{ { "fr4" }, "f4" },
{ { "fr5" }, "f5" },
{ { "fr6" }, "f6" },
{ { "fr7" }, "f7" },
{ { "fr8" }, "f8" },
{ { "fr9" }, "f9" },
{ { "fr10" }, "f10" },
{ { "fr11" }, "f11" },
{ { "fr12" }, "f12" },
{ { "fr13" }, "f13" },
{ { "fr14" }, "f14" },
{ { "fr15" }, "f15" },
{ { "fr16" }, "f16" },
{ { "fr17" }, "f17" },
{ { "fr18" }, "f18" },
{ { "fr19" }, "f19" },
{ { "fr20" }, "f20" },
{ { "fr21" }, "f21" },
{ { "fr22" }, "f22" },
{ { "fr23" }, "f23" },
{ { "fr24" }, "f24" },
{ { "fr25" }, "f25" },
{ { "fr26" }, "f26" },
{ { "fr27" }, "f27" },
{ { "fr28" }, "f28" },
{ { "fr29" }, "f29" },
{ { "fr30" }, "f30" },
{ { "fr31" }, "f31" },
{ { "cc" }, "cr0" },
};
void PPCTargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
Aliases = GCCRegAliases;
NumAliases = llvm::array_lengthof(GCCRegAliases);
}
} // end anonymous namespace.
namespace {
class PPC32TargetInfo : public PPCTargetInfo {
public:
PPC32TargetInfo(const llvm::Triple &Triple) : PPCTargetInfo(Triple) {
DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32";
switch (getTriple().getOS()) {
case llvm::Triple::Linux:
case llvm::Triple::FreeBSD:
case llvm::Triple::NetBSD:
SizeType = UnsignedInt;
PtrDiffType = SignedInt;
IntPtrType = SignedInt;
break;
default:
break;
}
if (getTriple().getOS() == llvm::Triple::FreeBSD) {
LongDoubleWidth = LongDoubleAlign = 64;
LongDoubleFormat = &llvm::APFloat::IEEEdouble;
}
// PPC32 supports atomics up to 4 bytes.
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
// This is the ELF definition, and is overridden by the Darwin sub-target
return TargetInfo::PowerABIBuiltinVaList;
}
};
} // end anonymous namespace.
// Note: ABI differences may eventually require us to have a separate
// TargetInfo for little endian.
namespace {
class PPC64TargetInfo : public PPCTargetInfo {
public:
PPC64TargetInfo(const llvm::Triple &Triple) : PPCTargetInfo(Triple) {
LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
IntMaxType = SignedLong;
UIntMaxType = UnsignedLong;
Int64Type = SignedLong;
if (getTriple().getOS() == llvm::Triple::FreeBSD) {
LongDoubleWidth = LongDoubleAlign = 64;
LongDoubleFormat = &llvm::APFloat::IEEEdouble;
DescriptionString = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-"
"v128:128:128-n32:64";
} else
DescriptionString = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-f128:128:128-"
"v128:128:128-n32:64";
// PPC64 supports atomics up to 8 bytes.
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::CharPtrBuiltinVaList;
}
};
} // end anonymous namespace.
namespace {
class DarwinPPC32TargetInfo :
public DarwinTargetInfo<PPC32TargetInfo> {
public:
DarwinPPC32TargetInfo(const llvm::Triple &Triple)
: DarwinTargetInfo<PPC32TargetInfo>(Triple) {
HasAlignMac68kSupport = true;
BoolWidth = BoolAlign = 32; //XXX support -mone-byte-bool?
PtrDiffType = SignedInt; // for http://llvm.org/bugs/show_bug.cgi?id=15726
LongLongAlign = 32;
SuitableAlign = 128;
DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
- "i64:32:64-f32:32:32-f64:64:64-v128:128:128-n32";
+ "i64:32:64-f32:32:32-f64:32:64-v128:128:128-n32";
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::CharPtrBuiltinVaList;
}
};
class DarwinPPC64TargetInfo :
public DarwinTargetInfo<PPC64TargetInfo> {
public:
DarwinPPC64TargetInfo(const llvm::Triple &Triple)
: DarwinTargetInfo<PPC64TargetInfo>(Triple) {
HasAlignMac68kSupport = true;
SuitableAlign = 128;
DescriptionString = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64";
}
};
} // end anonymous namespace.
namespace {
static const unsigned NVPTXAddrSpaceMap[] = {
1, // opencl_global
3, // opencl_local
4, // opencl_constant
1, // cuda_device
4, // cuda_constant
3, // cuda_shared
};
class NVPTXTargetInfo : public TargetInfo {
static const char * const GCCRegNames[];
static const Builtin::Info BuiltinInfo[];
public:
NVPTXTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
BigEndian = false;
TLSSupported = false;
LongWidth = LongAlign = 64;
AddrSpaceMap = &NVPTXAddrSpaceMap;
UseAddrSpaceMapMangling = true;
// Define available target features
// These must be defined in sorted order!
NoAsmVariants = true;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("__PTX__");
Builder.defineMacro("__NVPTX__");
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = BuiltinInfo;
NumRecords = clang::NVPTX::LastTSBuiltin-Builtin::FirstTSBuiltin;
}
virtual bool hasFeature(StringRef Feature) const {
return Feature == "ptx" || Feature == "nvptx";
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
// No aliases.
Aliases = 0;
NumAliases = 0;
}
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &Info) const {
switch (*Name) {
default: return false;
case 'c':
case 'h':
case 'r':
case 'l':
case 'f':
case 'd':
Info.setAllowsRegister();
return true;
}
}
virtual const char *getClobbers() const {
// FIXME: Is this really right?
return "";
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
// FIXME: implement
return TargetInfo::CharPtrBuiltinVaList;
}
virtual bool setCPU(const std::string &Name) {
bool Valid = llvm::StringSwitch<bool>(Name)
.Case("sm_20", true)
.Case("sm_21", true)
.Case("sm_30", true)
.Case("sm_35", true)
.Default(false);
return Valid;
}
};
const Builtin::Info NVPTXTargetInfo::BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
ALL_LANGUAGES },
#include "clang/Basic/BuiltinsNVPTX.def"
};
const char * const NVPTXTargetInfo::GCCRegNames[] = {
"r0"
};
void NVPTXTargetInfo::getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
class NVPTX32TargetInfo : public NVPTXTargetInfo {
public:
NVPTX32TargetInfo(const llvm::Triple &Triple) : NVPTXTargetInfo(Triple) {
PointerWidth = PointerAlign = 32;
SizeType = PtrDiffType = IntPtrType = TargetInfo::UnsignedInt;
DescriptionString
= "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
"f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
"n16:32:64";
}
};
class NVPTX64TargetInfo : public NVPTXTargetInfo {
public:
NVPTX64TargetInfo(const llvm::Triple &Triple) : NVPTXTargetInfo(Triple) {
PointerWidth = PointerAlign = 64;
SizeType = PtrDiffType = IntPtrType = TargetInfo::UnsignedLongLong;
DescriptionString
= "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
"f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
"n16:32:64";
}
};
}
namespace {
static const unsigned R600AddrSpaceMap[] = {
1, // opencl_global
3, // opencl_local
2, // opencl_constant
1, // cuda_device
2, // cuda_constant
3 // cuda_shared
};
static const char *DescriptionStringR600 =
"e"
"-p:32:32:32"
"-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128"
"-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048"
"-n32:64";
static const char *DescriptionStringR600DoubleOps =
"e"
"-p:32:32:32"
"-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64"
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128"
"-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048"
"-n32:64";
static const char *DescriptionStringSI =
"e"
"-p:64:64:64"
"-p3:32:32:32"
"-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64"
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128"
"-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048"
"-n32:64";
class R600TargetInfo : public TargetInfo {
/// \brief The GPU profiles supported by the R600 target.
enum GPUKind {
GK_NONE,
GK_R600,
GK_R600_DOUBLE_OPS,
GK_R700,
GK_R700_DOUBLE_OPS,
GK_EVERGREEN,
GK_EVERGREEN_DOUBLE_OPS,
GK_NORTHERN_ISLANDS,
GK_CAYMAN,
GK_SOUTHERN_ISLANDS,
GK_SEA_ISLANDS
} GPU;
public:
R600TargetInfo(const llvm::Triple &Triple)
: TargetInfo(Triple), GPU(GK_R600) {
DescriptionString = DescriptionStringR600;
AddrSpaceMap = &R600AddrSpaceMap;
UseAddrSpaceMapMangling = true;
}
virtual const char * getClobbers() const {
return "";
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &numNames) const {
Names = NULL;
numNames = 0;
}
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
Aliases = NULL;
NumAliases = 0;
}
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &info) const {
return true;
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = NULL;
NumRecords = 0;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("__R600__");
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::CharPtrBuiltinVaList;
}
virtual bool setCPU(const std::string &Name) {
GPU = llvm::StringSwitch<GPUKind>(Name)
.Case("r600" , GK_R600)
.Case("rv610", GK_R600)
.Case("rv620", GK_R600)
.Case("rv630", GK_R600)
.Case("rv635", GK_R600)
.Case("rs780", GK_R600)
.Case("rs880", GK_R600)
.Case("rv670", GK_R600_DOUBLE_OPS)
.Case("rv710", GK_R700)
.Case("rv730", GK_R700)
.Case("rv740", GK_R700_DOUBLE_OPS)
.Case("rv770", GK_R700_DOUBLE_OPS)
.Case("palm", GK_EVERGREEN)
.Case("cedar", GK_EVERGREEN)
.Case("sumo", GK_EVERGREEN)
.Case("sumo2", GK_EVERGREEN)
.Case("redwood", GK_EVERGREEN)
.Case("juniper", GK_EVERGREEN)
.Case("hemlock", GK_EVERGREEN_DOUBLE_OPS)
.Case("cypress", GK_EVERGREEN_DOUBLE_OPS)
.Case("barts", GK_NORTHERN_ISLANDS)
.Case("turks", GK_NORTHERN_ISLANDS)
.Case("caicos", GK_NORTHERN_ISLANDS)
.Case("cayman", GK_CAYMAN)
.Case("aruba", GK_CAYMAN)
.Case("tahiti", GK_SOUTHERN_ISLANDS)
.Case("pitcairn", GK_SOUTHERN_ISLANDS)
.Case("verde", GK_SOUTHERN_ISLANDS)
.Case("oland", GK_SOUTHERN_ISLANDS)
.Case("bonaire", GK_SEA_ISLANDS)
.Case("kabini", GK_SEA_ISLANDS)
.Case("kaveri", GK_SEA_ISLANDS)
.Case("hawaii", GK_SEA_ISLANDS)
.Default(GK_NONE);
if (GPU == GK_NONE) {
return false;
}
// Set the correct data layout
switch (GPU) {
case GK_NONE:
case GK_R600:
case GK_R700:
case GK_EVERGREEN:
case GK_NORTHERN_ISLANDS:
DescriptionString = DescriptionStringR600;
break;
case GK_R600_DOUBLE_OPS:
case GK_R700_DOUBLE_OPS:
case GK_EVERGREEN_DOUBLE_OPS:
case GK_CAYMAN:
DescriptionString = DescriptionStringR600DoubleOps;
break;
case GK_SOUTHERN_ISLANDS:
case GK_SEA_ISLANDS:
DescriptionString = DescriptionStringSI;
break;
}
return true;
}
};
} // end anonymous namespace
namespace {
// Namespace for x86 abstract base class
const Builtin::Info BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
ALL_LANGUAGES },
#include "clang/Basic/BuiltinsX86.def"
};
static const char* const GCCRegNames[] = {
"ax", "dx", "cx", "bx", "si", "di", "bp", "sp",
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
"argp", "flags", "fpcr", "fpsr", "dirflag", "frame",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
"ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
"ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
};
const TargetInfo::AddlRegName AddlRegNames[] = {
{ { "al", "ah", "eax", "rax" }, 0 },
{ { "bl", "bh", "ebx", "rbx" }, 3 },
{ { "cl", "ch", "ecx", "rcx" }, 2 },
{ { "dl", "dh", "edx", "rdx" }, 1 },
{ { "esi", "rsi" }, 4 },
{ { "edi", "rdi" }, 5 },
{ { "esp", "rsp" }, 7 },
{ { "ebp", "rbp" }, 6 },
};
// X86 target abstract base class; x86-32 and x86-64 are very close, so
// most of the implementation can be shared.
class X86TargetInfo : public TargetInfo {
enum X86SSEEnum {
NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
} SSELevel;
enum MMX3DNowEnum {
NoMMX3DNow, MMX, AMD3DNow, AMD3DNowAthlon
} MMX3DNowLevel;
enum XOPEnum {
NoXOP,
SSE4A,
FMA4,
XOP
} XOPLevel;
bool HasAES;
bool HasPCLMUL;
bool HasLZCNT;
bool HasRDRND;
bool HasBMI;
bool HasBMI2;
bool HasPOPCNT;
bool HasRTM;
bool HasPRFCHW;
bool HasRDSEED;
bool HasTBM;
bool HasFMA;
bool HasF16C;
bool HasAVX512CD, HasAVX512ER, HasAVX512PF;
bool HasSHA;
bool HasCX16;
/// \brief Enumeration of all of the X86 CPUs supported by Clang.
///
/// Each enumeration represents a particular CPU supported by Clang. These
/// loosely correspond to the options passed to '-march' or '-mtune' flags.
enum CPUKind {
CK_Generic,
/// \name i386
/// i386-generation processors.
//@{
CK_i386,
//@}
/// \name i486
/// i486-generation processors.
//@{
CK_i486,
CK_WinChipC6,
CK_WinChip2,
CK_C3,
//@}
/// \name i586
/// i586-generation processors, P5 microarchitecture based.
//@{
CK_i586,
CK_Pentium,
CK_PentiumMMX,
//@}
/// \name i686
/// i686-generation processors, P6 / Pentium M microarchitecture based.
//@{
CK_i686,
CK_PentiumPro,
CK_Pentium2,
CK_Pentium3,
CK_Pentium3M,
CK_PentiumM,
CK_C3_2,
/// This enumerator is a bit odd, as GCC no longer accepts -march=yonah.
/// Clang however has some logic to suport this.
// FIXME: Warn, deprecate, and potentially remove this.
CK_Yonah,
//@}
/// \name Netburst
/// Netburst microarchitecture based processors.
//@{
CK_Pentium4,
CK_Pentium4M,
CK_Prescott,
CK_Nocona,
//@}
/// \name Core
/// Core microarchitecture based processors.
//@{
CK_Core2,
/// This enumerator, like \see CK_Yonah, is a bit odd. It is another
/// codename which GCC no longer accepts as an option to -march, but Clang
/// has some logic for recognizing it.
// FIXME: Warn, deprecate, and potentially remove this.
CK_Penryn,
//@}
/// \name Atom
/// Atom processors
//@{
CK_Atom,
CK_Silvermont,
//@}
/// \name Nehalem
/// Nehalem microarchitecture based processors.
//@{
CK_Corei7,
CK_Corei7AVX,
CK_CoreAVXi,
CK_CoreAVX2,
//@}
/// \name Knights Landing
/// Knights Landing processor.
CK_KNL,
/// \name K6
/// K6 architecture processors.
//@{
CK_K6,
CK_K6_2,
CK_K6_3,
//@}
/// \name K7
/// K7 architecture processors.
//@{
CK_Athlon,
CK_AthlonThunderbird,
CK_Athlon4,
CK_AthlonXP,
CK_AthlonMP,
//@}
/// \name K8
/// K8 architecture processors.
//@{
CK_Athlon64,
CK_Athlon64SSE3,
CK_AthlonFX,
CK_K8,
CK_K8SSE3,
CK_Opteron,
CK_OpteronSSE3,
CK_AMDFAM10,
//@}
/// \name Bobcat
/// Bobcat architecture processors.
//@{
CK_BTVER1,
CK_BTVER2,
//@}
/// \name Bulldozer
/// Bulldozer architecture processors.
//@{
CK_BDVER1,
CK_BDVER2,
CK_BDVER3,
//@}
/// This specification is deprecated and will be removed in the future.
/// Users should prefer \see CK_K8.
// FIXME: Warn on this when the CPU is set to it.
CK_x86_64,
//@}
/// \name Geode
/// Geode processors.
//@{
CK_Geode
//@}
} CPU;
enum FPMathKind {
FP_Default,
FP_SSE,
FP_387
} FPMath;
public:
X86TargetInfo(const llvm::Triple &Triple)
: TargetInfo(Triple), SSELevel(NoSSE), MMX3DNowLevel(NoMMX3DNow),
XOPLevel(NoXOP), HasAES(false), HasPCLMUL(false), HasLZCNT(false),
HasRDRND(false), HasBMI(false), HasBMI2(false), HasPOPCNT(false),
HasRTM(false), HasPRFCHW(false), HasRDSEED(false), HasTBM(false),
HasFMA(false), HasF16C(false), HasAVX512CD(false), HasAVX512ER(false),
HasAVX512PF(false), HasSHA(false), HasCX16(false), CPU(CK_Generic),
FPMath(FP_Default) {
BigEndian = false;
LongDoubleFormat = &llvm::APFloat::x87DoubleExtended;
}
virtual unsigned getFloatEvalMethod() const {
// X87 evaluates with 80 bits "long double" precision.
return SSELevel == NoSSE ? 2 : 0;
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = BuiltinInfo;
NumRecords = clang::X86::LastTSBuiltin-Builtin::FirstTSBuiltin;
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
Aliases = 0;
NumAliases = 0;
}
virtual void getGCCAddlRegNames(const AddlRegName *&Names,
unsigned &NumNames) const {
Names = AddlRegNames;
NumNames = llvm::array_lengthof(AddlRegNames);
}
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &info) const;
virtual std::string convertConstraint(const char *&Constraint) const;
virtual const char *getClobbers() const {
return "~{dirflag},~{fpsr},~{flags}";
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const;
static void setSSELevel(llvm::StringMap<bool> &Features, X86SSEEnum Level,
bool Enabled);
static void setMMXLevel(llvm::StringMap<bool> &Features, MMX3DNowEnum Level,
bool Enabled);
static void setXOPLevel(llvm::StringMap<bool> &Features, XOPEnum Level,
bool Enabled);
virtual void setFeatureEnabled(llvm::StringMap<bool> &Features,
StringRef Name, bool Enabled) const {
setFeatureEnabledImpl(Features, Name, Enabled);
}
// This exists purely to cut down on the number of virtual calls in
// getDefaultFeatures which calls this repeatedly.
static void setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
StringRef Name, bool Enabled);
virtual void getDefaultFeatures(llvm::StringMap<bool> &Features) const;
virtual bool hasFeature(StringRef Feature) const;
virtual bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags);
virtual const char* getABI() const {
if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
return "avx";
else if (getTriple().getArch() == llvm::Triple::x86 &&
MMX3DNowLevel == NoMMX3DNow)
return "no-mmx";
return "";
}
virtual bool setCPU(const std::string &Name) {
CPU = llvm::StringSwitch<CPUKind>(Name)
.Case("i386", CK_i386)
.Case("i486", CK_i486)
.Case("winchip-c6", CK_WinChipC6)
.Case("winchip2", CK_WinChip2)
.Case("c3", CK_C3)
.Case("i586", CK_i586)
.Case("pentium", CK_Pentium)
.Case("pentium-mmx", CK_PentiumMMX)
.Case("i686", CK_i686)
.Case("pentiumpro", CK_PentiumPro)
.Case("pentium2", CK_Pentium2)
.Case("pentium3", CK_Pentium3)
.Case("pentium3m", CK_Pentium3M)
.Case("pentium-m", CK_PentiumM)
.Case("c3-2", CK_C3_2)
.Case("yonah", CK_Yonah)
.Case("pentium4", CK_Pentium4)
.Case("pentium4m", CK_Pentium4M)
.Case("prescott", CK_Prescott)
.Case("nocona", CK_Nocona)
.Case("core2", CK_Core2)
.Case("penryn", CK_Penryn)
.Case("atom", CK_Atom)
.Case("slm", CK_Silvermont)
.Case("corei7", CK_Corei7)
.Case("corei7-avx", CK_Corei7AVX)
.Case("core-avx-i", CK_CoreAVXi)
.Case("core-avx2", CK_CoreAVX2)
.Case("knl", CK_KNL)
.Case("k6", CK_K6)
.Case("k6-2", CK_K6_2)
.Case("k6-3", CK_K6_3)
.Case("athlon", CK_Athlon)
.Case("athlon-tbird", CK_AthlonThunderbird)
.Case("athlon-4", CK_Athlon4)
.Case("athlon-xp", CK_AthlonXP)
.Case("athlon-mp", CK_AthlonMP)
.Case("athlon64", CK_Athlon64)
.Case("athlon64-sse3", CK_Athlon64SSE3)
.Case("athlon-fx", CK_AthlonFX)
.Case("k8", CK_K8)
.Case("k8-sse3", CK_K8SSE3)
.Case("opteron", CK_Opteron)
.Case("opteron-sse3", CK_OpteronSSE3)
.Case("amdfam10", CK_AMDFAM10)
.Case("btver1", CK_BTVER1)
.Case("btver2", CK_BTVER2)
.Case("bdver1", CK_BDVER1)
.Case("bdver2", CK_BDVER2)
.Case("bdver3", CK_BDVER3)
.Case("x86-64", CK_x86_64)
.Case("geode", CK_Geode)
.Default(CK_Generic);
// Perform any per-CPU checks necessary to determine if this CPU is
// acceptable.
// FIXME: This results in terrible diagnostics. Clang just says the CPU is
// invalid without explaining *why*.
switch (CPU) {
case CK_Generic:
// No processor selected!
return false;
case CK_i386:
case CK_i486:
case CK_WinChipC6:
case CK_WinChip2:
case CK_C3:
case CK_i586:
case CK_Pentium:
case CK_PentiumMMX:
case CK_i686:
case CK_PentiumPro:
case CK_Pentium2:
case CK_Pentium3:
case CK_Pentium3M:
case CK_PentiumM:
case CK_Yonah:
case CK_C3_2:
case CK_Pentium4:
case CK_Pentium4M:
case CK_Prescott:
case CK_K6:
case CK_K6_2:
case CK_K6_3:
case CK_Athlon:
case CK_AthlonThunderbird:
case CK_Athlon4:
case CK_AthlonXP:
case CK_AthlonMP:
case CK_Geode:
// Only accept certain architectures when compiling in 32-bit mode.
if (getTriple().getArch() != llvm::Triple::x86)
return false;
// Fallthrough
case CK_Nocona:
case CK_Core2:
case CK_Penryn:
case CK_Atom:
case CK_Silvermont:
case CK_Corei7:
case CK_Corei7AVX:
case CK_CoreAVXi:
case CK_CoreAVX2:
case CK_KNL:
case CK_Athlon64:
case CK_Athlon64SSE3:
case CK_AthlonFX:
case CK_K8:
case CK_K8SSE3:
case CK_Opteron:
case CK_OpteronSSE3:
case CK_AMDFAM10:
case CK_BTVER1:
case CK_BTVER2:
case CK_BDVER1:
case CK_BDVER2:
case CK_BDVER3:
case CK_x86_64:
return true;
}
llvm_unreachable("Unhandled CPU kind");
}
virtual bool setFPMath(StringRef Name);
virtual CallingConvCheckResult checkCallingConvention(CallingConv CC) const {
// We accept all non-ARM calling conventions
return (CC == CC_X86ThisCall ||
CC == CC_X86FastCall ||
CC == CC_X86StdCall ||
CC == CC_C ||
CC == CC_X86Pascal ||
CC == CC_IntelOclBicc) ? CCCR_OK : CCCR_Warning;
}
virtual CallingConv getDefaultCallingConv(CallingConvMethodType MT) const {
return MT == CCMT_Member ? CC_X86ThisCall : CC_C;
}
};
bool X86TargetInfo::setFPMath(StringRef Name) {
if (Name == "387") {
FPMath = FP_387;
return true;
}
if (Name == "sse") {
FPMath = FP_SSE;
return true;
}
return false;
}
void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
// FIXME: This *really* should not be here.
// X86_64 always has SSE2.
if (getTriple().getArch() == llvm::Triple::x86_64)
setFeatureEnabledImpl(Features, "sse2", true);
switch (CPU) {
case CK_Generic:
case CK_i386:
case CK_i486:
case CK_i586:
case CK_Pentium:
case CK_i686:
case CK_PentiumPro:
break;
case CK_PentiumMMX:
case CK_Pentium2:
setFeatureEnabledImpl(Features, "mmx", true);
break;
case CK_Pentium3:
case CK_Pentium3M:
setFeatureEnabledImpl(Features, "sse", true);
break;
case CK_PentiumM:
case CK_Pentium4:
case CK_Pentium4M:
case CK_x86_64:
setFeatureEnabledImpl(Features, "sse2", true);
break;
case CK_Yonah:
case CK_Prescott:
case CK_Nocona:
setFeatureEnabledImpl(Features, "sse3", true);
setFeatureEnabledImpl(Features, "cx16", true);
break;
case CK_Core2:
setFeatureEnabledImpl(Features, "ssse3", true);
setFeatureEnabledImpl(Features, "cx16", true);
break;
case CK_Penryn:
setFeatureEnabledImpl(Features, "sse4.1", true);
setFeatureEnabledImpl(Features, "cx16", true);
break;
case CK_Atom:
setFeatureEnabledImpl(Features, "ssse3", true);
setFeatureEnabledImpl(Features, "cx16", true);
break;
case CK_Silvermont:
setFeatureEnabledImpl(Features, "sse4.2", true);
setFeatureEnabledImpl(Features, "aes", true);
setFeatureEnabledImpl(Features, "cx16", true);
setFeatureEnabledImpl(Features, "pclmul", true);
break;
case CK_Corei7:
setFeatureEnabledImpl(Features, "sse4.2", true);
setFeatureEnabledImpl(Features, "cx16", true);
break;
case CK_Corei7AVX:
setFeatureEnabledImpl(Features, "avx", true);
setFeatureEnabledImpl(Features, "aes", true);
setFeatureEnabledImpl(Features, "cx16", true);
setFeatureEnabledImpl(Features, "pclmul", true);
break;
case CK_CoreAVXi:
setFeatureEnabledImpl(Features, "avx", true);
setFeatureEnabledImpl(Features, "aes", true);
setFeatureEnabledImpl(Features, "pclmul", true);
setFeatureEnabledImpl(Features, "rdrnd", true);
setFeatureEnabledImpl(Features, "f16c", true);
break;
case CK_CoreAVX2:
setFeatureEnabledImpl(Features, "avx2", true);
setFeatureEnabledImpl(Features, "aes", true);
setFeatureEnabledImpl(Features, "pclmul", true);
setFeatureEnabledImpl(Features, "lzcnt", true);
setFeatureEnabledImpl(Features, "rdrnd", true);
setFeatureEnabledImpl(Features, "f16c", true);
setFeatureEnabledImpl(Features, "bmi", true);
setFeatureEnabledImpl(Features, "bmi2", true);
setFeatureEnabledImpl(Features, "rtm", true);
setFeatureEnabledImpl(Features, "fma", true);
setFeatureEnabledImpl(Features, "cx16", true);
break;
case CK_KNL:
setFeatureEnabledImpl(Features, "avx512f", true);
setFeatureEnabledImpl(Features, "avx512cd", true);
setFeatureEnabledImpl(Features, "avx512er", true);
setFeatureEnabledImpl(Features, "avx512pf", true);
setFeatureEnabledImpl(Features, "aes", true);
setFeatureEnabledImpl(Features, "pclmul", true);
setFeatureEnabledImpl(Features, "lzcnt", true);
setFeatureEnabledImpl(Features, "rdrnd", true);
setFeatureEnabledImpl(Features, "f16c", true);
setFeatureEnabledImpl(Features, "bmi", true);
setFeatureEnabledImpl(Features, "bmi2", true);
setFeatureEnabledImpl(Features, "rtm", true);
setFeatureEnabledImpl(Features, "fma", true);
break;
case CK_K6:
case CK_WinChipC6:
setFeatureEnabledImpl(Features, "mmx", true);
break;
case CK_K6_2:
case CK_K6_3:
case CK_WinChip2:
case CK_C3:
setFeatureEnabledImpl(Features, "3dnow", true);
break;
case CK_Athlon:
case CK_AthlonThunderbird:
case CK_Geode:
setFeatureEnabledImpl(Features, "3dnowa", true);
break;
case CK_Athlon4:
case CK_AthlonXP:
case CK_AthlonMP:
setFeatureEnabledImpl(Features, "sse", true);
setFeatureEnabledImpl(Features, "3dnowa", true);
break;
case CK_K8:
case CK_Opteron:
case CK_Athlon64:
case CK_AthlonFX:
setFeatureEnabledImpl(Features, "sse2", true);
setFeatureEnabledImpl(Features, "3dnowa", true);
break;
case CK_K8SSE3:
case CK_OpteronSSE3:
case CK_Athlon64SSE3:
setFeatureEnabledImpl(Features, "sse3", true);
setFeatureEnabledImpl(Features, "3dnowa", true);
break;
case CK_AMDFAM10:
setFeatureEnabledImpl(Features, "sse3", true);
setFeatureEnabledImpl(Features, "sse4a", true);
setFeatureEnabledImpl(Features, "3dnowa", true);
setFeatureEnabledImpl(Features, "lzcnt", true);
setFeatureEnabledImpl(Features, "popcnt", true);
break;
case CK_BTVER1:
setFeatureEnabledImpl(Features, "ssse3", true);
setFeatureEnabledImpl(Features, "sse4a", true);
setFeatureEnabledImpl(Features, "cx16", true);
setFeatureEnabledImpl(Features, "lzcnt", true);
setFeatureEnabledImpl(Features, "popcnt", true);
setFeatureEnabledImpl(Features, "prfchw", true);
break;
case CK_BTVER2:
setFeatureEnabledImpl(Features, "avx", true);
setFeatureEnabledImpl(Features, "sse4a", true);
setFeatureEnabledImpl(Features, "lzcnt", true);
setFeatureEnabledImpl(Features, "aes", true);
setFeatureEnabledImpl(Features, "pclmul", true);
setFeatureEnabledImpl(Features, "prfchw", true);
setFeatureEnabledImpl(Features, "bmi", true);
setFeatureEnabledImpl(Features, "f16c", true);
setFeatureEnabledImpl(Features, "cx16", true);
break;
case CK_BDVER1:
setFeatureEnabledImpl(Features, "xop", true);
setFeatureEnabledImpl(Features, "lzcnt", true);
setFeatureEnabledImpl(Features, "aes", true);
setFeatureEnabledImpl(Features, "pclmul", true);
setFeatureEnabledImpl(Features, "prfchw", true);
setFeatureEnabledImpl(Features, "cx16", true);
break;
case CK_BDVER2:
case CK_BDVER3:
setFeatureEnabledImpl(Features, "xop", true);
setFeatureEnabledImpl(Features, "lzcnt", true);
setFeatureEnabledImpl(Features, "aes", true);
setFeatureEnabledImpl(Features, "pclmul", true);
setFeatureEnabledImpl(Features, "prfchw", true);
setFeatureEnabledImpl(Features, "bmi", true);
setFeatureEnabledImpl(Features, "fma", true);
setFeatureEnabledImpl(Features, "f16c", true);
setFeatureEnabledImpl(Features, "tbm", true);
setFeatureEnabledImpl(Features, "cx16", true);
break;
case CK_C3_2:
setFeatureEnabledImpl(Features, "sse", true);
break;
}
}
void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
X86SSEEnum Level, bool Enabled) {
if (Enabled) {
switch (Level) {
case AVX512F:
Features["avx512f"] = true;
case AVX2:
Features["avx2"] = true;
case AVX:
Features["avx"] = true;
case SSE42:
Features["sse4.2"] = true;
case SSE41:
Features["sse4.1"] = true;
case SSSE3:
Features["ssse3"] = true;
case SSE3:
Features["sse3"] = true;
case SSE2:
Features["sse2"] = true;
case SSE1:
Features["sse"] = true;
case NoSSE:
break;
}
return;
}
switch (Level) {
case NoSSE:
case SSE1:
Features["sse"] = false;
case SSE2:
Features["sse2"] = Features["pclmul"] = Features["aes"] =
Features["sha"] = false;
case SSE3:
Features["sse3"] = false;
setXOPLevel(Features, NoXOP, false);
case SSSE3:
Features["ssse3"] = false;
case SSE41:
Features["sse4.1"] = false;
case SSE42:
Features["sse4.2"] = false;
case AVX:
Features["fma"] = Features["avx"] = Features["f16c"] = false;
setXOPLevel(Features, FMA4, false);
case AVX2:
Features["avx2"] = false;
case AVX512F:
Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
Features["avx512pf"] = false;
}
}
void X86TargetInfo::setMMXLevel(llvm::StringMap<bool> &Features,
MMX3DNowEnum Level, bool Enabled) {
if (Enabled) {
switch (Level) {
case AMD3DNowAthlon:
Features["3dnowa"] = true;
case AMD3DNow:
Features["3dnow"] = true;
case MMX:
Features["mmx"] = true;
case NoMMX3DNow:
break;
}
return;
}
switch (Level) {
case NoMMX3DNow:
case MMX:
Features["mmx"] = false;
case AMD3DNow:
Features["3dnow"] = false;
case AMD3DNowAthlon:
Features["3dnowa"] = false;
}
}
void X86TargetInfo::setXOPLevel(llvm::StringMap<bool> &Features, XOPEnum Level,
bool Enabled) {
if (Enabled) {
switch (Level) {
case XOP:
Features["xop"] = true;
case FMA4:
Features["fma4"] = true;
setSSELevel(Features, AVX, true);
case SSE4A:
Features["sse4a"] = true;
setSSELevel(Features, SSE3, true);
case NoXOP:
break;
}
return;
}
switch (Level) {
case NoXOP:
case SSE4A:
Features["sse4a"] = false;
case FMA4:
Features["fma4"] = false;
case XOP:
Features["xop"] = false;
}
}
void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
StringRef Name, bool Enabled) {
// FIXME: This *really* should not be here. We need some way of translating
// options into llvm subtarget features.
if (Name == "sse4")
Name = "sse4.2";
Features[Name] = Enabled;
if (Name == "mmx") {
setMMXLevel(Features, MMX, Enabled);
} else if (Name == "sse") {
setSSELevel(Features, SSE1, Enabled);
} else if (Name == "sse2") {
setSSELevel(Features, SSE2, Enabled);
} else if (Name == "sse3") {
setSSELevel(Features, SSE3, Enabled);
} else if (Name == "ssse3") {
setSSELevel(Features, SSSE3, Enabled);
} else if (Name == "sse4.2") {
setSSELevel(Features, SSE42, Enabled);
} else if (Name == "sse4.1") {
setSSELevel(Features, SSE41, Enabled);
} else if (Name == "3dnow") {
setMMXLevel(Features, AMD3DNow, Enabled);
} else if (Name == "3dnowa") {
setMMXLevel(Features, AMD3DNowAthlon, Enabled);
} else if (Name == "aes") {
if (Enabled)
setSSELevel(Features, SSE2, Enabled);
} else if (Name == "pclmul") {
if (Enabled)
setSSELevel(Features, SSE2, Enabled);
} else if (Name == "avx") {
setSSELevel(Features, AVX, Enabled);
} else if (Name == "avx2") {
setSSELevel(Features, AVX2, Enabled);
} else if (Name == "avx512f") {
setSSELevel(Features, AVX512F, Enabled);
} else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf") {
if (Enabled)
setSSELevel(Features, AVX512F, Enabled);
} else if (Name == "fma") {
if (Enabled)
setSSELevel(Features, AVX, Enabled);
} else if (Name == "fma4") {
setXOPLevel(Features, FMA4, Enabled);
} else if (Name == "xop") {
setXOPLevel(Features, XOP, Enabled);
} else if (Name == "sse4a") {
setXOPLevel(Features, SSE4A, Enabled);
} else if (Name == "f16c") {
if (Enabled)
setSSELevel(Features, AVX, Enabled);
} else if (Name == "sha") {
if (Enabled)
setSSELevel(Features, SSE2, Enabled);
}
}
/// handleTargetFeatures - Perform initialization based on the user
/// configured set of features.
bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) {
// Remember the maximum enabled sselevel.
for (unsigned i = 0, e = Features.size(); i !=e; ++i) {
// Ignore disabled features.
if (Features[i][0] == '-')
continue;
StringRef Feature = StringRef(Features[i]).substr(1);
if (Feature == "aes") {
HasAES = true;
continue;
}
if (Feature == "pclmul") {
HasPCLMUL = true;
continue;
}
if (Feature == "lzcnt") {
HasLZCNT = true;
continue;
}
if (Feature == "rdrnd") {
HasRDRND = true;
continue;
}
if (Feature == "bmi") {
HasBMI = true;
continue;
}
if (Feature == "bmi2") {
HasBMI2 = true;
continue;
}
if (Feature == "popcnt") {
HasPOPCNT = true;
continue;
}
if (Feature == "rtm") {
HasRTM = true;
continue;
}
if (Feature == "prfchw") {
HasPRFCHW = true;
continue;
}
if (Feature == "rdseed") {
HasRDSEED = true;
continue;
}
if (Feature == "tbm") {
HasTBM = true;
continue;
}
if (Feature == "fma") {
HasFMA = true;
continue;
}
if (Feature == "f16c") {
HasF16C = true;
continue;
}
if (Feature == "avx512cd") {
HasAVX512CD = true;
continue;
}
if (Feature == "avx512er") {
HasAVX512ER = true;
continue;
}
if (Feature == "avx512pf") {
HasAVX512PF = true;
continue;
}
if (Feature == "sha") {
HasSHA = true;
continue;
}
if (Feature == "cx16") {
HasCX16 = true;
continue;
}
assert(Features[i][0] == '+' && "Invalid target feature!");
X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
.Case("avx512f", AVX512F)
.Case("avx2", AVX2)
.Case("avx", AVX)
.Case("sse4.2", SSE42)
.Case("sse4.1", SSE41)
.Case("ssse3", SSSE3)
.Case("sse3", SSE3)
.Case("sse2", SSE2)
.Case("sse", SSE1)
.Default(NoSSE);
SSELevel = std::max(SSELevel, Level);
MMX3DNowEnum ThreeDNowLevel =
llvm::StringSwitch<MMX3DNowEnum>(Feature)
.Case("3dnowa", AMD3DNowAthlon)
.Case("3dnow", AMD3DNow)
.Case("mmx", MMX)
.Default(NoMMX3DNow);
MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel);
XOPEnum XLevel = llvm::StringSwitch<XOPEnum>(Feature)
.Case("xop", XOP)
.Case("fma4", FMA4)
.Case("sse4a", SSE4A)
.Default(NoXOP);
XOPLevel = std::max(XOPLevel, XLevel);
}
// Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled.
// Can't do this earlier because we need to be able to explicitly enable
// popcnt and still disable sse4.2.
if (!HasPOPCNT && SSELevel >= SSE42 &&
std::find(Features.begin(), Features.end(), "-popcnt") == Features.end()){
HasPOPCNT = true;
Features.push_back("+popcnt");
}
// Enable prfchw if 3DNow! is enabled and prfchw is not explicitly disabled.
if (!HasPRFCHW && MMX3DNowLevel >= AMD3DNow &&
std::find(Features.begin(), Features.end(), "-prfchw") == Features.end()){
HasPRFCHW = true;
Features.push_back("+prfchw");
}
// LLVM doesn't have a separate switch for fpmath, so only accept it if it
// matches the selected sse level.
if (FPMath == FP_SSE && SSELevel < SSE1) {
Diags.Report(diag::err_target_unsupported_fpmath) << "sse";
return false;
} else if (FPMath == FP_387 && SSELevel >= SSE1) {
Diags.Report(diag::err_target_unsupported_fpmath) << "387";
return false;
}
// Don't tell the backend if we're turning off mmx; it will end up disabling
// SSE, which we don't want.
// Additionally, if SSE is enabled and mmx is not explicitly disabled,
// then enable MMX.
std::vector<std::string>::iterator it;
it = std::find(Features.begin(), Features.end(), "-mmx");
if (it != Features.end())
Features.erase(it);
else if (SSELevel > NoSSE)
MMX3DNowLevel = std::max(MMX3DNowLevel, MMX);
return true;
}
/// X86TargetInfo::getTargetDefines - Return the set of the X86-specific macro
/// definitions for this particular subtarget.
void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Target identification.
if (getTriple().getArch() == llvm::Triple::x86_64) {
Builder.defineMacro("__amd64__");
Builder.defineMacro("__amd64");
Builder.defineMacro("__x86_64");
Builder.defineMacro("__x86_64__");
} else {
DefineStd(Builder, "i386", Opts);
}
// Subtarget options.
// FIXME: We are hard-coding the tune parameters based on the CPU, but they
// truly should be based on -mtune options.
switch (CPU) {
case CK_Generic:
break;
case CK_i386:
// The rest are coming from the i386 define above.
Builder.defineMacro("__tune_i386__");
break;
case CK_i486:
case CK_WinChipC6:
case CK_WinChip2:
case CK_C3:
defineCPUMacros(Builder, "i486");
break;
case CK_PentiumMMX:
Builder.defineMacro("__pentium_mmx__");
Builder.defineMacro("__tune_pentium_mmx__");
// Fallthrough
case CK_i586:
case CK_Pentium:
defineCPUMacros(Builder, "i586");
defineCPUMacros(Builder, "pentium");
break;
case CK_Pentium3:
case CK_Pentium3M:
case CK_PentiumM:
Builder.defineMacro("__tune_pentium3__");
// Fallthrough
case CK_Pentium2:
case CK_C3_2:
Builder.defineMacro("__tune_pentium2__");
// Fallthrough
case CK_PentiumPro:
Builder.defineMacro("__tune_i686__");
Builder.defineMacro("__tune_pentiumpro__");
// Fallthrough
case CK_i686:
Builder.defineMacro("__i686");
Builder.defineMacro("__i686__");
// Strangely, __tune_i686__ isn't defined by GCC when CPU == i686.
Builder.defineMacro("__pentiumpro");
Builder.defineMacro("__pentiumpro__");
break;
case CK_Pentium4:
case CK_Pentium4M:
defineCPUMacros(Builder, "pentium4");
break;
case CK_Yonah:
case CK_Prescott:
case CK_Nocona:
defineCPUMacros(Builder, "nocona");
break;
case CK_Core2:
case CK_Penryn:
defineCPUMacros(Builder, "core2");
break;
case CK_Atom:
defineCPUMacros(Builder, "atom");
break;
case CK_Silvermont:
defineCPUMacros(Builder, "slm");
break;
case CK_Corei7:
case CK_Corei7AVX:
case CK_CoreAVXi:
case CK_CoreAVX2:
defineCPUMacros(Builder, "corei7");
break;
case CK_KNL:
defineCPUMacros(Builder, "knl");
break;
case CK_K6_2:
Builder.defineMacro("__k6_2__");
Builder.defineMacro("__tune_k6_2__");
// Fallthrough
case CK_K6_3:
if (CPU != CK_K6_2) { // In case of fallthrough
// FIXME: GCC may be enabling these in cases where some other k6
// architecture is specified but -m3dnow is explicitly provided. The
// exact semantics need to be determined and emulated here.
Builder.defineMacro("__k6_3__");
Builder.defineMacro("__tune_k6_3__");
}
// Fallthrough
case CK_K6:
defineCPUMacros(Builder, "k6");
break;
case CK_Athlon:
case CK_AthlonThunderbird:
case CK_Athlon4:
case CK_AthlonXP:
case CK_AthlonMP:
defineCPUMacros(Builder, "athlon");
if (SSELevel != NoSSE) {
Builder.defineMacro("__athlon_sse__");
Builder.defineMacro("__tune_athlon_sse__");
}
break;
case CK_K8:
case CK_K8SSE3:
case CK_x86_64:
case CK_Opteron:
case CK_OpteronSSE3:
case CK_Athlon64:
case CK_Athlon64SSE3:
case CK_AthlonFX:
defineCPUMacros(Builder, "k8");
break;
case CK_AMDFAM10:
defineCPUMacros(Builder, "amdfam10");
break;
case CK_BTVER1:
defineCPUMacros(Builder, "btver1");
break;
case CK_BTVER2:
defineCPUMacros(Builder, "btver2");
break;
case CK_BDVER1:
defineCPUMacros(Builder, "bdver1");
break;
case CK_BDVER2:
defineCPUMacros(Builder, "bdver2");
break;
case CK_BDVER3:
defineCPUMacros(Builder, "bdver3");
break;
case CK_Geode:
defineCPUMacros(Builder, "geode");
break;
}
// Target properties.
Builder.defineMacro("__LITTLE_ENDIAN__");
Builder.defineMacro("__REGISTER_PREFIX__", "");
// Define __NO_MATH_INLINES on linux/x86 so that we don't get inline
// functions in glibc header files that use FP Stack inline asm which the
// backend can't deal with (PR879).
Builder.defineMacro("__NO_MATH_INLINES");
if (HasAES)
Builder.defineMacro("__AES__");
if (HasPCLMUL)
Builder.defineMacro("__PCLMUL__");
if (HasLZCNT)
Builder.defineMacro("__LZCNT__");
if (HasRDRND)
Builder.defineMacro("__RDRND__");
if (HasBMI)
Builder.defineMacro("__BMI__");
if (HasBMI2)
Builder.defineMacro("__BMI2__");
if (HasPOPCNT)
Builder.defineMacro("__POPCNT__");
if (HasRTM)
Builder.defineMacro("__RTM__");
if (HasPRFCHW)
Builder.defineMacro("__PRFCHW__");
if (HasRDSEED)
Builder.defineMacro("__RDSEED__");
if (HasTBM)
Builder.defineMacro("__TBM__");
switch (XOPLevel) {
case XOP:
Builder.defineMacro("__XOP__");
case FMA4:
Builder.defineMacro("__FMA4__");
case SSE4A:
Builder.defineMacro("__SSE4A__");
case NoXOP:
break;
}
if (HasFMA)
Builder.defineMacro("__FMA__");
if (HasF16C)
Builder.defineMacro("__F16C__");
if (HasAVX512CD)
Builder.defineMacro("__AVX512CD__");
if (HasAVX512ER)
Builder.defineMacro("__AVX512ER__");
if (HasAVX512PF)
Builder.defineMacro("__AVX512PF__");
if (HasSHA)
Builder.defineMacro("__SHA__");
if (HasCX16)
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
// Each case falls through to the previous one here.
switch (SSELevel) {
case AVX512F:
Builder.defineMacro("__AVX512F__");
case AVX2:
Builder.defineMacro("__AVX2__");
case AVX:
Builder.defineMacro("__AVX__");
case SSE42:
Builder.defineMacro("__SSE4_2__");
case SSE41:
Builder.defineMacro("__SSE4_1__");
case SSSE3:
Builder.defineMacro("__SSSE3__");
case SSE3:
Builder.defineMacro("__SSE3__");
case SSE2:
Builder.defineMacro("__SSE2__");
Builder.defineMacro("__SSE2_MATH__"); // -mfp-math=sse always implied.
case SSE1:
Builder.defineMacro("__SSE__");
Builder.defineMacro("__SSE_MATH__"); // -mfp-math=sse always implied.
case NoSSE:
break;
}
if (Opts.MicrosoftExt && getTriple().getArch() == llvm::Triple::x86) {
switch (SSELevel) {
case AVX512F:
case AVX2:
case AVX:
case SSE42:
case SSE41:
case SSSE3:
case SSE3:
case SSE2:
Builder.defineMacro("_M_IX86_FP", Twine(2));
break;
case SSE1:
Builder.defineMacro("_M_IX86_FP", Twine(1));
break;
default:
Builder.defineMacro("_M_IX86_FP", Twine(0));
}
}
// Each case falls through to the previous one here.
switch (MMX3DNowLevel) {
case AMD3DNowAthlon:
Builder.defineMacro("__3dNOW_A__");
case AMD3DNow:
Builder.defineMacro("__3dNOW__");
case MMX:
Builder.defineMacro("__MMX__");
case NoMMX3DNow:
break;
}
if (CPU >= CK_i486) {
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
}
if (CPU >= CK_i586)
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
}
bool X86TargetInfo::hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
.Case("aes", HasAES)
.Case("avx", SSELevel >= AVX)
.Case("avx2", SSELevel >= AVX2)
.Case("avx512f", SSELevel >= AVX512F)
.Case("avx512cd", HasAVX512CD)
.Case("avx512er", HasAVX512ER)
.Case("avx512pf", HasAVX512PF)
.Case("bmi", HasBMI)
.Case("bmi2", HasBMI2)
.Case("cx16", HasCX16)
.Case("f16c", HasF16C)
.Case("fma", HasFMA)
.Case("fma4", XOPLevel >= FMA4)
.Case("tbm", HasTBM)
.Case("lzcnt", HasLZCNT)
.Case("rdrnd", HasRDRND)
.Case("mm3dnow", MMX3DNowLevel >= AMD3DNow)
.Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon)
.Case("mmx", MMX3DNowLevel >= MMX)
.Case("pclmul", HasPCLMUL)
.Case("popcnt", HasPOPCNT)
.Case("rtm", HasRTM)
.Case("prfchw", HasPRFCHW)
.Case("rdseed", HasRDSEED)
.Case("sha", HasSHA)
.Case("sse", SSELevel >= SSE1)
.Case("sse2", SSELevel >= SSE2)
.Case("sse3", SSELevel >= SSE3)
.Case("ssse3", SSELevel >= SSSE3)
.Case("sse4.1", SSELevel >= SSE41)
.Case("sse4.2", SSELevel >= SSE42)
.Case("sse4a", XOPLevel >= SSE4A)
.Case("x86", true)
.Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
.Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
.Case("xop", XOPLevel >= XOP)
.Default(false);
}
bool
X86TargetInfo::validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &Info) const {
switch (*Name) {
default: return false;
case 'Y': // first letter of a pair:
switch (*(Name+1)) {
default: return false;
case '0': // First SSE register.
case 't': // Any SSE register, when SSE2 is enabled.
case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled.
case 'm': // any MMX register, when inter-unit moves enabled.
break; // falls through to setAllowsRegister.
}
case 'a': // eax.
case 'b': // ebx.
case 'c': // ecx.
case 'd': // edx.
case 'S': // esi.
case 'D': // edi.
case 'A': // edx:eax.
case 'f': // any x87 floating point stack register.
case 't': // top of floating point stack.
case 'u': // second from top of floating point stack.
case 'q': // Any register accessible as [r]l: a, b, c, and d.
case 'y': // Any MMX register.
case 'x': // Any SSE register.
case 'Q': // Any register accessible as [r]h: a, b, c, and d.
case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
case 'l': // "Index" registers: any general register that can be used as an
// index in a base+index memory access.
Info.setAllowsRegister();
return true;
case 'C': // SSE floating point constant.
case 'G': // x87 floating point constant.
case 'e': // 32-bit signed integer constant for use with zero-extending
// x86_64 instructions.
case 'Z': // 32-bit unsigned integer constant for use with zero-extending
// x86_64 instructions.
return true;
}
}
std::string
X86TargetInfo::convertConstraint(const char *&Constraint) const {
switch (*Constraint) {
case 'a': return std::string("{ax}");
case 'b': return std::string("{bx}");
case 'c': return std::string("{cx}");
case 'd': return std::string("{dx}");
case 'S': return std::string("{si}");
case 'D': return std::string("{di}");
case 'p': // address
return std::string("im");
case 't': // top of floating point stack.
return std::string("{st}");
case 'u': // second from top of floating point stack.
return std::string("{st(1)}"); // second from top of floating point stack.
default:
return std::string(1, *Constraint);
}
}
} // end anonymous namespace
namespace {
// X86-32 generic target
class X86_32TargetInfo : public X86TargetInfo {
public:
X86_32TargetInfo(const llvm::Triple &Triple) : X86TargetInfo(Triple) {
DoubleAlign = LongLongAlign = 32;
LongDoubleWidth = 96;
LongDoubleAlign = 32;
SuitableAlign = 128;
DescriptionString = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-"
"a0:0:64-f80:32:32-n8:16:32-S128";
SizeType = UnsignedInt;
PtrDiffType = SignedInt;
IntPtrType = SignedInt;
RegParmMax = 3;
// Use fpret for all types.
RealTypeUsesObjCFPRet = ((1 << TargetInfo::Float) |
(1 << TargetInfo::Double) |
(1 << TargetInfo::LongDouble));
// x86-32 has atomics up to 8 bytes
// FIXME: Check that we actually have cmpxchg8b before setting
// MaxAtomicInlineWidth. (cmpxchg8b is an i586 instruction.)
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::CharPtrBuiltinVaList;
}
int getEHDataRegisterNumber(unsigned RegNo) const {
if (RegNo == 0) return 0;
if (RegNo == 1) return 2;
return -1;
}
virtual bool validateInputSize(StringRef Constraint,
unsigned Size) const {
switch (Constraint[0]) {
default: break;
case 'a':
case 'b':
case 'c':
case 'd':
return Size <= 32;
}
return true;
}
};
} // end anonymous namespace
namespace {
class NetBSDI386TargetInfo : public NetBSDTargetInfo<X86_32TargetInfo> {
public:
NetBSDI386TargetInfo(const llvm::Triple &Triple)
: NetBSDTargetInfo<X86_32TargetInfo>(Triple) {}
virtual unsigned getFloatEvalMethod() const {
unsigned Major, Minor, Micro;
getTriple().getOSVersion(Major, Minor, Micro);
// New NetBSD uses the default rounding mode.
if (Major >= 7 || (Major == 6 && Minor == 99 && Micro >= 26) || Major == 0)
return X86_32TargetInfo::getFloatEvalMethod();
// NetBSD before 6.99.26 defaults to "double" rounding.
return 1;
}
};
} // end anonymous namespace
namespace {
class OpenBSDI386TargetInfo : public OpenBSDTargetInfo<X86_32TargetInfo> {
public:
OpenBSDI386TargetInfo(const llvm::Triple &Triple)
: OpenBSDTargetInfo<X86_32TargetInfo>(Triple) {
SizeType = UnsignedLong;
IntPtrType = SignedLong;
PtrDiffType = SignedLong;
}
};
} // end anonymous namespace
namespace {
class BitrigI386TargetInfo : public BitrigTargetInfo<X86_32TargetInfo> {
public:
BitrigI386TargetInfo(const llvm::Triple &Triple)
: BitrigTargetInfo<X86_32TargetInfo>(Triple) {
SizeType = UnsignedLong;
IntPtrType = SignedLong;
PtrDiffType = SignedLong;
}
};
} // end anonymous namespace
namespace {
class DarwinI386TargetInfo : public DarwinTargetInfo<X86_32TargetInfo> {
public:
DarwinI386TargetInfo(const llvm::Triple &Triple)
: DarwinTargetInfo<X86_32TargetInfo>(Triple) {
LongDoubleWidth = 128;
LongDoubleAlign = 128;
SuitableAlign = 128;
MaxVectorAlign = 256;
SizeType = UnsignedLong;
IntPtrType = SignedLong;
DescriptionString = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-"
"a0:0:64-f80:128:128-n8:16:32-S128";
HasAlignMac68kSupport = true;
}
};
} // end anonymous namespace
namespace {
// x86-32 Windows target
class WindowsX86_32TargetInfo : public WindowsTargetInfo<X86_32TargetInfo> {
public:
WindowsX86_32TargetInfo(const llvm::Triple &Triple)
: WindowsTargetInfo<X86_32TargetInfo>(Triple) {
TLSSupported = false;
WCharType = UnsignedShort;
DoubleAlign = LongLongAlign = 64;
DescriptionString = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-"
"v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32";
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
WindowsTargetInfo<X86_32TargetInfo>::getTargetDefines(Opts, Builder);
}
};
} // end anonymous namespace
namespace {
// x86-32 Windows Visual Studio target
class VisualStudioWindowsX86_32TargetInfo : public WindowsX86_32TargetInfo {
public:
VisualStudioWindowsX86_32TargetInfo(const llvm::Triple &Triple)
: WindowsX86_32TargetInfo(Triple) {
LongDoubleWidth = LongDoubleAlign = 64;
LongDoubleFormat = &llvm::APFloat::IEEEdouble;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
WindowsX86_32TargetInfo::getTargetDefines(Opts, Builder);
WindowsX86_32TargetInfo::getVisualStudioDefines(Opts, Builder);
// The value of the following reflects processor type.
// 300=386, 400=486, 500=Pentium, 600=Blend (default)
// We lost the original triple, so we use the default.
Builder.defineMacro("_M_IX86", "600");
}
};
} // end anonymous namespace
namespace {
// x86-32 MinGW target
class MinGWX86_32TargetInfo : public WindowsX86_32TargetInfo {
public:
MinGWX86_32TargetInfo(const llvm::Triple &Triple)
: WindowsX86_32TargetInfo(Triple) {}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
WindowsX86_32TargetInfo::getTargetDefines(Opts, Builder);
DefineStd(Builder, "WIN32", Opts);
DefineStd(Builder, "WINNT", Opts);
Builder.defineMacro("_X86_");
Builder.defineMacro("__MSVCRT__");
Builder.defineMacro("__MINGW32__");
// mingw32-gcc provides __declspec(a) as alias of __attribute__((a)).
// In contrast, clang-cc1 provides __declspec(a) with -fms-extensions.
if (Opts.MicrosoftExt)
// Provide "as-is" __declspec.
Builder.defineMacro("__declspec", "__declspec");
else
// Provide alias of __attribute__ like mingw32-gcc.
Builder.defineMacro("__declspec(a)", "__attribute__((a))");
}
};
} // end anonymous namespace
namespace {
// x86-32 Cygwin target
class CygwinX86_32TargetInfo : public X86_32TargetInfo {
public:
CygwinX86_32TargetInfo(const llvm::Triple &Triple)
: X86_32TargetInfo(Triple) {
TLSSupported = false;
WCharType = UnsignedShort;
DoubleAlign = LongLongAlign = 64;
DescriptionString = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-"
"a0:0:64-f80:32:32-n8:16:32-S32";
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
X86_32TargetInfo::getTargetDefines(Opts, Builder);
Builder.defineMacro("_X86_");
Builder.defineMacro("__CYGWIN__");
Builder.defineMacro("__CYGWIN32__");
DefineStd(Builder, "unix", Opts);
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
}
};
} // end anonymous namespace
namespace {
// x86-32 Haiku target
class HaikuX86_32TargetInfo : public X86_32TargetInfo {
public:
HaikuX86_32TargetInfo(const llvm::Triple &Triple) : X86_32TargetInfo(Triple) {
SizeType = UnsignedLong;
IntPtrType = SignedLong;
PtrDiffType = SignedLong;
ProcessIDType = SignedLong;
this->UserLabelPrefix = "";
this->TLSSupported = false;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
X86_32TargetInfo::getTargetDefines(Opts, Builder);
Builder.defineMacro("__INTEL__");
Builder.defineMacro("__HAIKU__");
}
};
} // end anonymous namespace
// RTEMS Target
template<typename Target>
class RTEMSTargetInfo : public OSTargetInfo<Target> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
// RTEMS defines; list based off of gcc output
Builder.defineMacro("__rtems__");
Builder.defineMacro("__ELF__");
}
public:
RTEMSTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
this->UserLabelPrefix = "";
switch (Triple.getArch()) {
default:
case llvm::Triple::x86:
// this->MCountName = ".mcount";
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
// this->MCountName = "_mcount";
break;
case llvm::Triple::arm:
// this->MCountName = "__mcount";
break;
}
}
};
namespace {
// x86-32 RTEMS target
class RTEMSX86_32TargetInfo : public X86_32TargetInfo {
public:
RTEMSX86_32TargetInfo(const llvm::Triple &Triple) : X86_32TargetInfo(Triple) {
SizeType = UnsignedLong;
IntPtrType = SignedLong;
PtrDiffType = SignedLong;
this->UserLabelPrefix = "";
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
X86_32TargetInfo::getTargetDefines(Opts, Builder);
Builder.defineMacro("__INTEL__");
Builder.defineMacro("__rtems__");
}
};
} // end anonymous namespace
namespace {
// x86-64 generic target
class X86_64TargetInfo : public X86TargetInfo {
public:
X86_64TargetInfo(const llvm::Triple &Triple) : X86TargetInfo(Triple) {
LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
LongDoubleWidth = 128;
LongDoubleAlign = 128;
LargeArrayMinWidth = 128;
LargeArrayAlign = 128;
SuitableAlign = 128;
IntMaxType = SignedLong;
UIntMaxType = UnsignedLong;
Int64Type = SignedLong;
RegParmMax = 6;
DescriptionString = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-"
"a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128";
// Use fpret only for long double.
RealTypeUsesObjCFPRet = (1 << TargetInfo::LongDouble);
// Use fp2ret for _Complex long double.
ComplexLongDoubleUsesFP2Ret = true;
// x86-64 has atomics up to 16 bytes.
// FIXME: Once the backend is fixed, increase MaxAtomicInlineWidth to 128
// on CPUs with cmpxchg16b
MaxAtomicPromoteWidth = 128;
MaxAtomicInlineWidth = 64;
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::X86_64ABIBuiltinVaList;
}
int getEHDataRegisterNumber(unsigned RegNo) const {
if (RegNo == 0) return 0;
if (RegNo == 1) return 1;
return -1;
}
virtual CallingConvCheckResult checkCallingConvention(CallingConv CC) const {
return (CC == CC_C ||
CC == CC_IntelOclBicc ||
CC == CC_X86_64Win64) ? CCCR_OK : CCCR_Warning;
}
virtual CallingConv getDefaultCallingConv(CallingConvMethodType MT) const {
return CC_C;
}
};
} // end anonymous namespace
namespace {
// x86-64 Windows target
class WindowsX86_64TargetInfo : public WindowsTargetInfo<X86_64TargetInfo> {
public:
WindowsX86_64TargetInfo(const llvm::Triple &Triple)
: WindowsTargetInfo<X86_64TargetInfo>(Triple) {
TLSSupported = false;
WCharType = UnsignedShort;
LongWidth = LongAlign = 32;
DoubleAlign = LongLongAlign = 64;
IntMaxType = SignedLongLong;
UIntMaxType = UnsignedLongLong;
Int64Type = SignedLongLong;
SizeType = UnsignedLongLong;
PtrDiffType = SignedLongLong;
IntPtrType = SignedLongLong;
this->UserLabelPrefix = "";
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
WindowsTargetInfo<X86_64TargetInfo>::getTargetDefines(Opts, Builder);
Builder.defineMacro("_WIN64");
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::CharPtrBuiltinVaList;
}
virtual CallingConvCheckResult checkCallingConvention(CallingConv CC) const {
return (CC == CC_C ||
CC == CC_IntelOclBicc ||
CC == CC_X86_64SysV) ? CCCR_OK : CCCR_Warning;
}
};
} // end anonymous namespace
namespace {
// x86-64 Windows Visual Studio target
class VisualStudioWindowsX86_64TargetInfo : public WindowsX86_64TargetInfo {
public:
VisualStudioWindowsX86_64TargetInfo(const llvm::Triple &Triple)
: WindowsX86_64TargetInfo(Triple) {
LongDoubleWidth = LongDoubleAlign = 64;
LongDoubleFormat = &llvm::APFloat::IEEEdouble;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
WindowsX86_64TargetInfo::getTargetDefines(Opts, Builder);
WindowsX86_64TargetInfo::getVisualStudioDefines(Opts, Builder);
Builder.defineMacro("_M_X64");
Builder.defineMacro("_M_AMD64");
}
};
} // end anonymous namespace
namespace {
// x86-64 MinGW target
class MinGWX86_64TargetInfo : public WindowsX86_64TargetInfo {
public:
MinGWX86_64TargetInfo(const llvm::Triple &Triple)
: WindowsX86_64TargetInfo(Triple) {}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
WindowsX86_64TargetInfo::getTargetDefines(Opts, Builder);
DefineStd(Builder, "WIN64", Opts);
Builder.defineMacro("__MSVCRT__");
Builder.defineMacro("__MINGW32__");
Builder.defineMacro("__MINGW64__");
// mingw32-gcc provides __declspec(a) as alias of __attribute__((a)).
// In contrast, clang-cc1 provides __declspec(a) with -fms-extensions.
if (Opts.MicrosoftExt)
// Provide "as-is" __declspec.
Builder.defineMacro("__declspec", "__declspec");
else
// Provide alias of __attribute__ like mingw32-gcc.
Builder.defineMacro("__declspec(a)", "__attribute__((a))");
}
};
} // end anonymous namespace
namespace {
class DarwinX86_64TargetInfo : public DarwinTargetInfo<X86_64TargetInfo> {
public:
DarwinX86_64TargetInfo(const llvm::Triple &Triple)
: DarwinTargetInfo<X86_64TargetInfo>(Triple) {
Int64Type = SignedLongLong;
MaxVectorAlign = 256;
}
};
} // end anonymous namespace
namespace {
class OpenBSDX86_64TargetInfo : public OpenBSDTargetInfo<X86_64TargetInfo> {
public:
OpenBSDX86_64TargetInfo(const llvm::Triple &Triple)
: OpenBSDTargetInfo<X86_64TargetInfo>(Triple) {
IntMaxType = SignedLongLong;
UIntMaxType = UnsignedLongLong;
Int64Type = SignedLongLong;
}
};
} // end anonymous namespace
namespace {
class BitrigX86_64TargetInfo : public BitrigTargetInfo<X86_64TargetInfo> {
public:
BitrigX86_64TargetInfo(const llvm::Triple &Triple)
: BitrigTargetInfo<X86_64TargetInfo>(Triple) {
IntMaxType = SignedLongLong;
UIntMaxType = UnsignedLongLong;
Int64Type = SignedLongLong;
}
};
}
namespace {
class AArch64TargetInfo : public TargetInfo {
static const char * const GCCRegNames[];
static const TargetInfo::GCCRegAlias GCCRegAliases[];
enum FPUModeEnum {
FPUMode,
NeonMode
};
unsigned FPU;
unsigned Crypto;
static const Builtin::Info BuiltinInfo[];
public:
AArch64TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
BigEndian = false;
LongWidth = LongAlign = 64;
LongDoubleWidth = LongDoubleAlign = 128;
PointerWidth = PointerAlign = 64;
SuitableAlign = 128;
DescriptionString = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-i128:128:128-f32:32:32-f64:64:64-"
"f128:128:128-n32:64-S128";
WCharType = UnsignedInt;
LongDoubleFormat = &llvm::APFloat::IEEEquad;
// AArch64 backend supports 64-bit operations at the moment. In principle
// 128-bit is possible if register-pairs are used.
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
TheCXXABI.set(TargetCXXABI::GenericAArch64);
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
// GCC defines theses currently
Builder.defineMacro("__aarch64__");
Builder.defineMacro("__AARCH64EL__");
// ACLE predefines. Many can only have one possible value on v8 AArch64.
Builder.defineMacro("__ARM_ACLE", "200");
Builder.defineMacro("__ARM_ARCH", "8");
Builder.defineMacro("__ARM_ARCH_PROFILE", "'A'");
Builder.defineMacro("__ARM_64BIT_STATE");
Builder.defineMacro("__ARM_PCS_AAPCS64");
Builder.defineMacro("__ARM_ARCH_ISA_A64");
Builder.defineMacro("__ARM_FEATURE_UNALIGNED");
Builder.defineMacro("__ARM_FEATURE_CLZ");
Builder.defineMacro("__ARM_FEATURE_FMA");
Builder.defineMacro("__ARM_FEATURE_DIV");
Builder.defineMacro("__ARM_ALIGN_MAX_STACK_PWR", "4");
// 0xe implies support for half, single and double precision operations.
Builder.defineMacro("__ARM_FP", "0xe");
// PCS specifies this for SysV variants, which is all we support. Other ABIs
// may choose __ARM_FP16_FORMAT_ALTERNATIVE.
Builder.defineMacro("__ARM_FP16_FORMAT_IEEE");
if (Opts.FastMath || Opts.FiniteMathOnly)
Builder.defineMacro("__ARM_FP_FAST");
if ((Opts.C99 || Opts.C11) && !Opts.Freestanding)
Builder.defineMacro("__ARM_FP_FENV_ROUNDING");
Builder.defineMacro("__ARM_SIZEOF_WCHAR_T",
Opts.ShortWChar ? "2" : "4");
Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM",
Opts.ShortEnums ? "1" : "4");
if (BigEndian)
Builder.defineMacro("__AARCH_BIG_ENDIAN");
if (FPU == NeonMode) {
Builder.defineMacro("__ARM_NEON");
// 64-bit NEON supports half, single and double precision operations.
Builder.defineMacro("__ARM_NEON_FP", "7");
}
if (Crypto) {
Builder.defineMacro("__ARM_FEATURE_CRYPTO");
}
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = BuiltinInfo;
NumRecords = clang::AArch64::LastTSBuiltin-Builtin::FirstTSBuiltin;
}
virtual bool hasFeature(StringRef Feature) const {
return Feature == "aarch64" || (Feature == "neon" && FPU == NeonMode);
}
virtual bool setCPU(const std::string &Name) {
return llvm::StringSwitch<bool>(Name)
.Case("generic", true)
.Cases("cortex-a53", "cortex-a57", true)
.Default(false);
}
virtual bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) {
FPU = FPUMode;
Crypto = 0;
for (unsigned i = 0, e = Features.size(); i != e; ++i) {
if (Features[i] == "+neon")
FPU = NeonMode;
if (Features[i] == "+crypto")
Crypto = 1;
}
return true;
}
virtual void getGCCRegNames(const char *const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const;
virtual bool isCLZForZeroUndef() const { return false; }
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &Info) const {
switch (*Name) {
default: return false;
case 'w': // An FP/SIMD vector register
Info.setAllowsRegister();
return true;
case 'I': // Constant that can be used with an ADD instruction
case 'J': // Constant that can be used with a SUB instruction
case 'K': // Constant that can be used with a 32-bit logical instruction
case 'L': // Constant that can be used with a 64-bit logical instruction
case 'M': // Constant that can be used as a 32-bit MOV immediate
case 'N': // Constant that can be used as a 64-bit MOV immediate
case 'Y': // Floating point constant zero
case 'Z': // Integer constant zero
return true;
case 'Q': // A memory reference with base register and no offset
Info.setAllowsMemory();
return true;
case 'S': // A symbolic address
Info.setAllowsRegister();
return true;
case 'U':
// Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, whatever they may be
// Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
// Usa: An absolute symbolic address
// Ush: The high part (bits 32:12) of a pc-relative symbolic address
llvm_unreachable("FIXME: Unimplemented support for bizarre constraints");
}
}
virtual const char *getClobbers() const {
// There are no AArch64 clobbers shared by all asm statements.
return "";
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::AArch64ABIBuiltinVaList;
}
};
const char * const AArch64TargetInfo::GCCRegNames[] = {
"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp", "wzr",
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
"x24", "x25", "x26", "x27", "x28", "x29", "x30", "sp", "xzr",
"b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
"b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
"b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
"b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31",
"h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
"h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
"h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
"h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31",
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
"s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
"s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
"s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
"d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
"q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
"q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
"q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31"
};
void AArch64TargetInfo::getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
const TargetInfo::GCCRegAlias AArch64TargetInfo::GCCRegAliases[] = {
{ { "x16" }, "ip0"},
{ { "x17" }, "ip1"},
{ { "x29" }, "fp" },
{ { "x30" }, "lr" }
};
void AArch64TargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
Aliases = GCCRegAliases;
NumAliases = llvm::array_lengthof(GCCRegAliases);
}
const Builtin::Info AArch64TargetInfo::BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
ALL_LANGUAGES },
#include "clang/Basic/BuiltinsAArch64.def"
};
} // end anonymous namespace
namespace {
class ARMTargetInfo : public TargetInfo {
// Possible FPU choices.
enum FPUMode {
VFP2FPU = (1 << 0),
VFP3FPU = (1 << 1),
VFP4FPU = (1 << 2),
NeonFPU = (1 << 3),
FPARMV8 = (1 << 4)
};
// Possible HWDiv features.
enum HWDivMode {
HWDivThumb = (1 << 0),
HWDivARM = (1 << 1)
};
static bool FPUModeIsVFP(FPUMode Mode) {
return Mode & (VFP2FPU | VFP3FPU | VFP4FPU | NeonFPU | FPARMV8);
}
static const TargetInfo::GCCRegAlias GCCRegAliases[];
static const char * const GCCRegNames[];
std::string ABI, CPU;
enum {
FP_Default,
FP_VFP,
FP_Neon
} FPMath;
unsigned FPU : 5;
unsigned IsAAPCS : 1;
unsigned IsThumb : 1;
unsigned HWDiv : 2;
// Initialized via features.
unsigned SoftFloat : 1;
unsigned SoftFloatABI : 1;
unsigned CRC : 1;
static const Builtin::Info BuiltinInfo[];
static bool shouldUseInlineAtomic(const llvm::Triple &T) {
// On linux, binaries targeting old cpus call functions in libgcc to
// perform atomic operations. The implementation in libgcc then calls into
// the kernel which on armv6 and newer uses ldrex and strex. The net result
// is that if we assume the kernel is at least as recent as the hardware,
// it is safe to use atomic instructions on armv6 and newer.
if (!T.isOSLinux() &&
T.getOS() != llvm::Triple::FreeBSD &&
T.getOS() != llvm::Triple::NetBSD &&
T.getOS() != llvm::Triple::Bitrig)
return false;
StringRef ArchName = T.getArchName();
if (T.getArch() == llvm::Triple::arm) {
if (!ArchName.startswith("armv"))
return false;
StringRef VersionStr = ArchName.substr(4);
unsigned Version;
if (VersionStr.getAsInteger(10, Version))
return false;
return Version >= 6;
}
assert(T.getArch() == llvm::Triple::thumb);
if (!ArchName.startswith("thumbv"))
return false;
StringRef VersionStr = ArchName.substr(6);
unsigned Version;
if (VersionStr.getAsInteger(10, Version))
return false;
return Version >= 7;
}
public:
ARMTargetInfo(const llvm::Triple &Triple)
: TargetInfo(Triple), ABI("aapcs-linux"), CPU("arm1136j-s"),
FPMath(FP_Default), IsAAPCS(true) {
BigEndian = false;
switch (getTriple().getOS()) {
case llvm::Triple::NetBSD:
SizeType = UnsignedLong;
PtrDiffType = SignedLong;
WCharType = SignedInt;
break;
default:
// AAPCS 7.1.1, ARM-Linux ABI 2.4: type of wchar_t is unsigned int.
WCharType = UnsignedInt;
SizeType = UnsignedInt;
PtrDiffType = SignedInt;
break;
}
// {} in inline assembly are neon specifiers, not assembly variant
// specifiers.
NoAsmVariants = true;
// FIXME: Should we just treat this as a feature?
IsThumb = getTriple().getArchName().startswith("thumb");
if (IsThumb) {
// Thumb1 add sp, #imm requires the immediate value be multiple of 4,
// so set preferred for small types to 32.
DescriptionString = ("e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-"
"v64:64:64-v128:64:128-a0:0:32-n32-S64");
} else {
DescriptionString = ("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-"
"v64:64:64-v128:64:128-a0:0:64-n32-S64");
}
// ARM targets default to using the ARM C++ ABI.
TheCXXABI.set(TargetCXXABI::GenericARM);
// ARM has atomics up to 8 bytes
MaxAtomicPromoteWidth = 64;
if (shouldUseInlineAtomic(getTriple()))
MaxAtomicInlineWidth = 64;
// Do force alignment of members that follow zero length bitfields. If
// the alignment of the zero-length bitfield is greater than the member
// that follows it, `bar', `bar' will be aligned as the type of the
// zero length bitfield.
UseZeroLengthBitfieldAlignment = true;
}
virtual const char *getABI() const { return ABI.c_str(); }
virtual bool setABI(const std::string &Name) {
ABI = Name;
// The defaults (above) are for AAPCS, check if we need to change them.
//
// FIXME: We need support for -meabi... we could just mangle it into the
// name.
if (Name == "apcs-gnu") {
DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 32;
// size_t is unsigned int on FreeBSD.
if (getTriple().getOS() != llvm::Triple::FreeBSD)
SizeType = UnsignedLong;
// Revert to using SignedInt on apcs-gnu to comply with existing behaviour.
WCharType = SignedInt;
// Do not respect the alignment of bit-field types when laying out
// structures. This corresponds to PCC_BITFIELD_TYPE_MATTERS in gcc.
UseBitFieldTypeAlignment = false;
/// gcc forces the alignment to 4 bytes, regardless of the type of the
/// zero length bitfield. This corresponds to EMPTY_FIELD_BOUNDARY in
/// gcc.
ZeroLengthBitfieldBoundary = 32;
IsAAPCS = false;
if (IsThumb) {
// Thumb1 add sp, #imm requires the immediate value be multiple of 4,
// so set preferred for small types to 32.
DescriptionString = ("e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-"
"i64:32:64-f32:32:32-f64:32:64-"
"v64:32:64-v128:32:128-a0:0:32-n32-S32");
} else {
DescriptionString = ("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:32:64-f32:32:32-f64:32:64-"
"v64:32:64-v128:32:128-a0:0:32-n32-S32");
}
// FIXME: Override "preferred align" for double and long long.
} else if (Name == "aapcs" || Name == "aapcs-vfp") {
// size_t is unsigned long on Darwin.
if (getTriple().isOSDarwin())
SizeType = UnsignedLong;
IsAAPCS = true;
// FIXME: Enumerated types are variable width in straight AAPCS.
} else if (Name == "aapcs-linux") {
IsAAPCS = true;
} else
return false;
return true;
}
void getDefaultFeatures(llvm::StringMap<bool> &Features) const {
StringRef ArchName = getTriple().getArchName();
if (CPU == "arm1136jf-s" || CPU == "arm1176jzf-s" || CPU == "mpcore")
Features["vfp2"] = true;
else if (CPU == "cortex-a8" || CPU == "cortex-a9" ||
CPU == "cortex-a9-mp") {
Features["vfp3"] = true;
Features["neon"] = true;
}
else if (CPU == "cortex-a5") {
Features["vfp4"] = true;
Features["neon"] = true;
} else if (CPU == "swift" || CPU == "cortex-a7" || CPU == "cortex-a15") {
Features["vfp4"] = true;
Features["neon"] = true;
Features["hwdiv"] = true;
Features["hwdiv-arm"] = true;
} else if (CPU == "cortex-a53" || CPU == "cortex-a57") {
Features["fp-armv8"] = true;
Features["neon"] = true;
Features["hwdiv"] = true;
Features["hwdiv-arm"] = true;
Features["crc"] = true;
} else if (CPU == "cortex-r5" || CPU == "cortex-m3" ||
CPU == "cortex-m4" ||
// Enable the hwdiv extension for all v8a AArch32 cores by
// default.
ArchName == "armv8a" || ArchName == "armv8" ||
ArchName == "thumbv8a" || ArchName == "thumbv8") {
Features["hwdiv"] = true;
Features["hwdiv-arm"] = true;
}
}
virtual bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) {
FPU = 0;
CRC = 0;
SoftFloat = SoftFloatABI = false;
HWDiv = 0;
for (unsigned i = 0, e = Features.size(); i != e; ++i) {
if (Features[i] == "+soft-float")
SoftFloat = true;
else if (Features[i] == "+soft-float-abi")
SoftFloatABI = true;
else if (Features[i] == "+vfp2")
FPU |= VFP2FPU;
else if (Features[i] == "+vfp3")
FPU |= VFP3FPU;
else if (Features[i] == "+vfp4")
FPU |= VFP4FPU;
else if (Features[i] == "+fp-armv8")
FPU |= FPARMV8;
else if (Features[i] == "+neon")
FPU |= NeonFPU;
else if (Features[i] == "+hwdiv")
HWDiv |= HWDivThumb;
else if (Features[i] == "+hwdiv-arm")
HWDiv |= HWDivARM;
else if (Features[i] == "+crc")
CRC = 1;
}
if (!(FPU & NeonFPU) && FPMath == FP_Neon) {
Diags.Report(diag::err_target_unsupported_fpmath) << "neon";
return false;
}
if (FPMath == FP_Neon)
Features.push_back("+neonfp");
else if (FPMath == FP_VFP)
Features.push_back("-neonfp");
// Remove front-end specific options which the backend handles differently.
std::vector<std::string>::iterator it;
it = std::find(Features.begin(), Features.end(), "+soft-float");
if (it != Features.end())
Features.erase(it);
it = std::find(Features.begin(), Features.end(), "+soft-float-abi");
if (it != Features.end())
Features.erase(it);
return true;
}
virtual bool hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
.Case("arm", true)
.Case("softfloat", SoftFloat)
.Case("thumb", IsThumb)
.Case("neon", (FPU & NeonFPU) && !SoftFloat)
.Case("hwdiv", HWDiv & HWDivThumb)
.Case("hwdiv-arm", HWDiv & HWDivARM)
.Default(false);
}
// FIXME: Should we actually have some table instead of these switches?
static const char *getCPUDefineSuffix(StringRef Name) {
return llvm::StringSwitch<const char*>(Name)
.Cases("arm8", "arm810", "4")
.Cases("strongarm", "strongarm110", "strongarm1100", "strongarm1110", "4")
.Cases("arm7tdmi", "arm7tdmi-s", "arm710t", "arm720t", "arm9", "4T")
.Cases("arm9tdmi", "arm920", "arm920t", "arm922t", "arm940t", "4T")
.Case("ep9312", "4T")
.Cases("arm10tdmi", "arm1020t", "5T")
.Cases("arm9e", "arm946e-s", "arm966e-s", "arm968e-s", "5TE")
.Case("arm926ej-s", "5TEJ")
.Cases("arm10e", "arm1020e", "arm1022e", "5TE")
.Cases("xscale", "iwmmxt", "5TE")
.Case("arm1136j-s", "6J")
.Cases("arm1176jz-s", "arm1176jzf-s", "6ZK")
.Cases("arm1136jf-s", "mpcorenovfp", "mpcore", "6K")
.Cases("arm1156t2-s", "arm1156t2f-s", "6T2")
.Cases("cortex-a5", "cortex-a7", "cortex-a8", "7A")
.Cases("cortex-a9", "cortex-a12", "cortex-a15", "7A")
.Cases("cortex-r4", "cortex-r5", "7R")
.Case("cortex-a9-mp", "7F")
.Case("swift", "7S")
.Cases("cortex-m3", "cortex-m4", "7M")
.Case("cortex-m0", "6M")
.Cases("cortex-a53", "cortex-a57", "8A")
.Default(0);
}
static const char *getCPUProfile(StringRef Name) {
return llvm::StringSwitch<const char*>(Name)
.Cases("cortex-a5", "cortex-a7", "cortex-a8", "A")
.Cases("cortex-a9", "cortex-a12", "cortex-a15", "A")
.Cases("cortex-a53", "cortex-a57", "A")
.Cases("cortex-m3", "cortex-m4", "cortex-m0", "M")
.Cases("cortex-r4", "cortex-r5", "R")
.Default("");
}
virtual bool setCPU(const std::string &Name) {
if (!getCPUDefineSuffix(Name))
return false;
CPU = Name;
return true;
}
virtual bool setFPMath(StringRef Name);
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Target identification.
Builder.defineMacro("__arm");
Builder.defineMacro("__arm__");
// Target properties.
Builder.defineMacro("__ARMEL__");
Builder.defineMacro("__LITTLE_ENDIAN__");
Builder.defineMacro("__REGISTER_PREFIX__", "");
StringRef CPUArch = getCPUDefineSuffix(CPU);
unsigned int CPUArchVer;
if(CPUArch.substr(0, 1).getAsInteger<unsigned int>(10, CPUArchVer)) {
llvm_unreachable("Invalid char for architecture version number");
}
Builder.defineMacro("__ARM_ARCH_" + CPUArch + "__");
Builder.defineMacro("__ARM_ARCH", CPUArch.substr(0, 1));
StringRef CPUProfile = getCPUProfile(CPU);
if (!CPUProfile.empty())
Builder.defineMacro("__ARM_ARCH_PROFILE", CPUProfile);
// Subtarget options.
// FIXME: It's more complicated than this and we don't really support
// interworking.
if (5 <= CPUArchVer && CPUArchVer <= 7)
Builder.defineMacro("__THUMB_INTERWORK__");
if (ABI == "aapcs" || ABI == "aapcs-linux" || ABI == "aapcs-vfp") {
// Embedded targets on Darwin follow AAPCS, but not EABI.
if (!getTriple().isOSDarwin())
Builder.defineMacro("__ARM_EABI__");
Builder.defineMacro("__ARM_PCS", "1");
if ((!SoftFloat && !SoftFloatABI) || ABI == "aapcs-vfp")
Builder.defineMacro("__ARM_PCS_VFP", "1");
}
if (SoftFloat)
Builder.defineMacro("__SOFTFP__");
if (CPU == "xscale")
Builder.defineMacro("__XSCALE__");
if (IsThumb) {
Builder.defineMacro("__THUMBEL__");
Builder.defineMacro("__thumb__");
if (CPUArch == "6T2" || CPUArchVer == 7)
Builder.defineMacro("__thumb2__");
}
if (((HWDiv & HWDivThumb) && IsThumb) || ((HWDiv & HWDivARM) && !IsThumb))
Builder.defineMacro("__ARM_ARCH_EXT_IDIV__", "1");
// Note, this is always on in gcc, even though it doesn't make sense.
Builder.defineMacro("__APCS_32__");
if (FPUModeIsVFP((FPUMode) FPU)) {
Builder.defineMacro("__VFP_FP__");
if (FPU & VFP2FPU)
Builder.defineMacro("__ARM_VFPV2__");
if (FPU & VFP3FPU)
Builder.defineMacro("__ARM_VFPV3__");
if (FPU & VFP4FPU)
Builder.defineMacro("__ARM_VFPV4__");
}
// This only gets set when Neon instructions are actually available, unlike
// the VFP define, hence the soft float and arch check. This is subtly
// different from gcc, we follow the intent which was that it should be set
// when Neon instructions are actually available.
if ((FPU & NeonFPU) && !SoftFloat && CPUArchVer >= 7)
Builder.defineMacro("__ARM_NEON__");
if (CRC)
Builder.defineMacro("__ARM_FEATURE_CRC32");
if (CPUArchVer >= 6 && CPUArch != "6M") {
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
}
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = BuiltinInfo;
NumRecords = clang::ARM::LastTSBuiltin-Builtin::FirstTSBuiltin;
}
virtual bool isCLZForZeroUndef() const { return false; }
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return IsAAPCS ? AAPCSABIBuiltinVaList : TargetInfo::VoidPtrBuiltinVaList;
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const;
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &Info) const {
switch (*Name) {
default: break;
case 'l': // r0-r7
case 'h': // r8-r15
case 'w': // VFP Floating point register single precision
case 'P': // VFP Floating point register double precision
Info.setAllowsRegister();
return true;
case 'Q': // A memory address that is a single base register.
Info.setAllowsMemory();
return true;
case 'U': // a memory reference...
switch (Name[1]) {
case 'q': // ...ARMV4 ldrsb
case 'v': // ...VFP load/store (reg+constant offset)
case 'y': // ...iWMMXt load/store
case 't': // address valid for load/store opaque types wider
// than 128-bits
case 'n': // valid address for Neon doubleword vector load/store
case 'm': // valid address for Neon element and structure load/store
case 's': // valid address for non-offset loads/stores of quad-word
// values in four ARM registers
Info.setAllowsMemory();
Name++;
return true;
}
}
return false;
}
virtual std::string convertConstraint(const char *&Constraint) const {
std::string R;
switch (*Constraint) {
case 'U': // Two-character constraint; add "^" hint for later parsing.
R = std::string("^") + std::string(Constraint, 2);
Constraint++;
break;
case 'p': // 'p' should be translated to 'r' by default.
R = std::string("r");
break;
default:
return std::string(1, *Constraint);
}
return R;
}
virtual bool validateConstraintModifier(StringRef Constraint,
const char Modifier,
unsigned Size) const {
bool isOutput = (Constraint[0] == '=');
bool isInOut = (Constraint[0] == '+');
// Strip off constraint modifiers.
while (Constraint[0] == '=' ||
Constraint[0] == '+' ||
Constraint[0] == '&')
Constraint = Constraint.substr(1);
switch (Constraint[0]) {
default: break;
case 'r': {
switch (Modifier) {
default:
return (isInOut || isOutput || Size <= 64);
case 'q':
// A register of size 32 cannot fit a vector type.
return false;
}
}
}
return true;
}
virtual const char *getClobbers() const {
// FIXME: Is this really right?
return "";
}
virtual CallingConvCheckResult checkCallingConvention(CallingConv CC) const {
return (CC == CC_AAPCS || CC == CC_AAPCS_VFP) ? CCCR_OK : CCCR_Warning;
}
virtual int getEHDataRegisterNumber(unsigned RegNo) const {
if (RegNo == 0) return 0;
if (RegNo == 1) return 1;
return -1;
}
};
bool ARMTargetInfo::setFPMath(StringRef Name) {
if (Name == "neon") {
FPMath = FP_Neon;
return true;
} else if (Name == "vfp" || Name == "vfp2" || Name == "vfp3" ||
Name == "vfp4") {
FPMath = FP_VFP;
return true;
}
return false;
}
const char * const ARMTargetInfo::GCCRegNames[] = {
// Integer registers
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc",
// Float registers
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
"s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
"s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
"s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
// Double registers
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
"d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
// Quad registers
"q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
};
void ARMTargetInfo::getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
const TargetInfo::GCCRegAlias ARMTargetInfo::GCCRegAliases[] = {
{ { "a1" }, "r0" },
{ { "a2" }, "r1" },
{ { "a3" }, "r2" },
{ { "a4" }, "r3" },
{ { "v1" }, "r4" },
{ { "v2" }, "r5" },
{ { "v3" }, "r6" },
{ { "v4" }, "r7" },
{ { "v5" }, "r8" },
{ { "v6", "rfp" }, "r9" },
{ { "sl" }, "r10" },
{ { "fp" }, "r11" },
{ { "ip" }, "r12" },
{ { "r13" }, "sp" },
{ { "r14" }, "lr" },
{ { "r15" }, "pc" },
// The S, D and Q registers overlap, but aren't really aliases; we
// don't want to substitute one of these for a different-sized one.
};
void ARMTargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
Aliases = GCCRegAliases;
NumAliases = llvm::array_lengthof(GCCRegAliases);
}
const Builtin::Info ARMTargetInfo::BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
ALL_LANGUAGES },
#include "clang/Basic/BuiltinsARM.def"
};
} // end anonymous namespace.
namespace {
class DarwinARMTargetInfo :
public DarwinTargetInfo<ARMTargetInfo> {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const {
getDarwinDefines(Builder, Opts, Triple, PlatformName, PlatformMinVersion);
}
public:
DarwinARMTargetInfo(const llvm::Triple &Triple)
: DarwinTargetInfo<ARMTargetInfo>(Triple) {
HasAlignMac68kSupport = true;
// iOS always has 64-bit atomic instructions.
// FIXME: This should be based off of the target features in ARMTargetInfo.
MaxAtomicInlineWidth = 64;
// Darwin on iOS uses a variant of the ARM C++ ABI.
TheCXXABI.set(TargetCXXABI::iOS);
}
};
} // end anonymous namespace.
namespace {
// Hexagon abstract base class
class HexagonTargetInfo : public TargetInfo {
static const Builtin::Info BuiltinInfo[];
static const char * const GCCRegNames[];
static const TargetInfo::GCCRegAlias GCCRegAliases[];
std::string CPU;
public:
HexagonTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
BigEndian = false;
DescriptionString = ("e-p:32:32:32-"
"i64:64:64-i32:32:32-i16:16:16-i1:32:32-"
"f64:64:64-f32:32:32-a0:0-n32");
// {} in inline assembly are packet specifiers, not assembly variant
// specifiers.
NoAsmVariants = true;
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = BuiltinInfo;
NumRecords = clang::Hexagon::LastTSBuiltin-Builtin::FirstTSBuiltin;
}
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &Info) const {
return true;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const;
virtual bool hasFeature(StringRef Feature) const {
return Feature == "hexagon";
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::CharPtrBuiltinVaList;
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const;
virtual const char *getClobbers() const {
return "";
}
static const char *getHexagonCPUSuffix(StringRef Name) {
return llvm::StringSwitch<const char*>(Name)
.Case("hexagonv4", "4")
.Case("hexagonv5", "5")
.Default(0);
}
virtual bool setCPU(const std::string &Name) {
if (!getHexagonCPUSuffix(Name))
return false;
CPU = Name;
return true;
}
};
void HexagonTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("qdsp6");
Builder.defineMacro("__qdsp6", "1");
Builder.defineMacro("__qdsp6__", "1");
Builder.defineMacro("hexagon");
Builder.defineMacro("__hexagon", "1");
Builder.defineMacro("__hexagon__", "1");
if(CPU == "hexagonv1") {
Builder.defineMacro("__HEXAGON_V1__");
Builder.defineMacro("__HEXAGON_ARCH__", "1");
if(Opts.HexagonQdsp6Compat) {
Builder.defineMacro("__QDSP6_V1__");
Builder.defineMacro("__QDSP6_ARCH__", "1");
}
}
else if(CPU == "hexagonv2") {
Builder.defineMacro("__HEXAGON_V2__");
Builder.defineMacro("__HEXAGON_ARCH__", "2");
if(Opts.HexagonQdsp6Compat) {
Builder.defineMacro("__QDSP6_V2__");
Builder.defineMacro("__QDSP6_ARCH__", "2");
}
}
else if(CPU == "hexagonv3") {
Builder.defineMacro("__HEXAGON_V3__");
Builder.defineMacro("__HEXAGON_ARCH__", "3");
if(Opts.HexagonQdsp6Compat) {
Builder.defineMacro("__QDSP6_V3__");
Builder.defineMacro("__QDSP6_ARCH__", "3");
}
}
else if(CPU == "hexagonv4") {
Builder.defineMacro("__HEXAGON_V4__");
Builder.defineMacro("__HEXAGON_ARCH__", "4");
if(Opts.HexagonQdsp6Compat) {
Builder.defineMacro("__QDSP6_V4__");
Builder.defineMacro("__QDSP6_ARCH__", "4");
}
}
else if(CPU == "hexagonv5") {
Builder.defineMacro("__HEXAGON_V5__");
Builder.defineMacro("__HEXAGON_ARCH__", "5");
if(Opts.HexagonQdsp6Compat) {
Builder.defineMacro("__QDSP6_V5__");
Builder.defineMacro("__QDSP6_ARCH__", "5");
}
}
}
const char * const HexagonTargetInfo::GCCRegNames[] = {
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
"p0", "p1", "p2", "p3",
"sa0", "lc0", "sa1", "lc1", "m0", "m1", "usr", "ugp"
};
void HexagonTargetInfo::getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
const TargetInfo::GCCRegAlias HexagonTargetInfo::GCCRegAliases[] = {
{ { "sp" }, "r29" },
{ { "fp" }, "r30" },
{ { "lr" }, "r31" },
};
void HexagonTargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
Aliases = GCCRegAliases;
NumAliases = llvm::array_lengthof(GCCRegAliases);
}
const Builtin::Info HexagonTargetInfo::BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
ALL_LANGUAGES },
#include "clang/Basic/BuiltinsHexagon.def"
};
}
namespace {
// Shared base class for SPARC v8 (32-bit) and SPARC v9 (64-bit).
class SparcTargetInfo : public TargetInfo {
static const TargetInfo::GCCRegAlias GCCRegAliases[];
static const char * const GCCRegNames[];
bool SoftFloat;
public:
SparcTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {}
virtual bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) {
SoftFloat = false;
for (unsigned i = 0, e = Features.size(); i != e; ++i)
if (Features[i] == "+soft-float")
SoftFloat = true;
return true;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "sparc", Opts);
Builder.defineMacro("__REGISTER_PREFIX__", "");
if (SoftFloat)
Builder.defineMacro("SOFT_FLOAT", "1");
}
virtual bool hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
.Case("softfloat", SoftFloat)
.Case("sparc", true)
.Default(false);
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
// FIXME: Implement!
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::VoidPtrBuiltinVaList;
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const;
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &info) const {
// FIXME: Implement!
return false;
}
virtual const char *getClobbers() const {
// FIXME: Implement!
return "";
}
};
const char * const SparcTargetInfo::GCCRegNames[] = {
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
};
void SparcTargetInfo::getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
const TargetInfo::GCCRegAlias SparcTargetInfo::GCCRegAliases[] = {
{ { "g0" }, "r0" },
{ { "g1" }, "r1" },
{ { "g2" }, "r2" },
{ { "g3" }, "r3" },
{ { "g4" }, "r4" },
{ { "g5" }, "r5" },
{ { "g6" }, "r6" },
{ { "g7" }, "r7" },
{ { "o0" }, "r8" },
{ { "o1" }, "r9" },
{ { "o2" }, "r10" },
{ { "o3" }, "r11" },
{ { "o4" }, "r12" },
{ { "o5" }, "r13" },
{ { "o6", "sp" }, "r14" },
{ { "o7" }, "r15" },
{ { "l0" }, "r16" },
{ { "l1" }, "r17" },
{ { "l2" }, "r18" },
{ { "l3" }, "r19" },
{ { "l4" }, "r20" },
{ { "l5" }, "r21" },
{ { "l6" }, "r22" },
{ { "l7" }, "r23" },
{ { "i0" }, "r24" },
{ { "i1" }, "r25" },
{ { "i2" }, "r26" },
{ { "i3" }, "r27" },
{ { "i4" }, "r28" },
{ { "i5" }, "r29" },
{ { "i6", "fp" }, "r30" },
{ { "i7" }, "r31" },
};
void SparcTargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
Aliases = GCCRegAliases;
NumAliases = llvm::array_lengthof(GCCRegAliases);
}
// SPARC v8 is the 32-bit mode selected by Triple::sparc.
class SparcV8TargetInfo : public SparcTargetInfo {
public:
SparcV8TargetInfo(const llvm::Triple &Triple) : SparcTargetInfo(Triple) {
// FIXME: Support Sparc quad-precision long double?
DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64";
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
SparcTargetInfo::getTargetDefines(Opts, Builder);
Builder.defineMacro("__sparcv8");
}
};
// SPARC v9 is the 64-bit mode selected by Triple::sparcv9.
class SparcV9TargetInfo : public SparcTargetInfo {
public:
SparcV9TargetInfo(const llvm::Triple &Triple) : SparcTargetInfo(Triple) {
// FIXME: Support Sparc quad-precision long double?
DescriptionString = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32:64-S128";
// This is an LP64 platform.
LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
// OpenBSD uses long long for int64_t and intmax_t.
if (getTriple().getOS() == llvm::Triple::OpenBSD) {
IntMaxType = SignedLongLong;
UIntMaxType = UnsignedLongLong;
} else {
IntMaxType = SignedLong;
UIntMaxType = UnsignedLong;
}
Int64Type = IntMaxType;
// The SPARCv8 System V ABI has long double 128-bits in size, but 64-bit
// aligned. The SPARCv9 SCD 2.4.1 says 16-byte aligned.
LongDoubleWidth = 128;
LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::IEEEquad;
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
SparcTargetInfo::getTargetDefines(Opts, Builder);
Builder.defineMacro("__sparcv9");
Builder.defineMacro("__arch64__");
// Solaris and its derivative AuroraUX don't need these variants, but the
// BSDs do.
if (getTriple().getOS() != llvm::Triple::Solaris &&
getTriple().getOS() != llvm::Triple::AuroraUX) {
Builder.defineMacro("__sparc64__");
Builder.defineMacro("__sparc_v9__");
Builder.defineMacro("__sparcv9__");
}
}
virtual bool setCPU(const std::string &Name) {
bool CPUKnown = llvm::StringSwitch<bool>(Name)
.Case("v9", true)
.Case("ultrasparc", true)
.Case("ultrasparc3", true)
.Case("niagara", true)
.Case("niagara2", true)
.Case("niagara3", true)
.Case("niagara4", true)
.Default(false);
// No need to store the CPU yet. There aren't any CPU-specific
// macros to define.
return CPUKnown;
}
};
} // end anonymous namespace.
namespace {
class AuroraUXSparcV8TargetInfo : public AuroraUXTargetInfo<SparcV8TargetInfo> {
public:
AuroraUXSparcV8TargetInfo(const llvm::Triple &Triple)
: AuroraUXTargetInfo<SparcV8TargetInfo>(Triple) {
SizeType = UnsignedInt;
PtrDiffType = SignedInt;
}
};
class SolarisSparcV8TargetInfo : public SolarisTargetInfo<SparcV8TargetInfo> {
public:
SolarisSparcV8TargetInfo(const llvm::Triple &Triple)
: SolarisTargetInfo<SparcV8TargetInfo>(Triple) {
SizeType = UnsignedInt;
PtrDiffType = SignedInt;
}
};
} // end anonymous namespace.
namespace {
class SystemZTargetInfo : public TargetInfo {
static const char *const GCCRegNames[];
public:
SystemZTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
TLSSupported = true;
IntWidth = IntAlign = 32;
LongWidth = LongLongWidth = LongAlign = LongLongAlign = 64;
PointerWidth = PointerAlign = 64;
LongDoubleWidth = 128;
LongDoubleAlign = 64;
LongDoubleFormat = &llvm::APFloat::IEEEquad;
MinGlobalAlign = 16;
DescriptionString = "E-p:64:64:64-i1:8:16-i8:8:16-i16:16-i32:32-i64:64"
"-f32:32-f64:64-f128:64-a0:8:16-n32:64";
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("__s390__");
Builder.defineMacro("__s390x__");
Builder.defineMacro("__zarch__");
Builder.defineMacro("__LONG_DOUBLE_128__");
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
// FIXME: Implement.
Records = 0;
NumRecords = 0;
}
virtual void getGCCRegNames(const char *const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
// No aliases.
Aliases = 0;
NumAliases = 0;
}
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &info) const;
virtual const char *getClobbers() const {
// FIXME: Is this really right?
return "";
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::SystemZBuiltinVaList;
}
virtual bool setCPU(const std::string &Name) {
bool CPUKnown = llvm::StringSwitch<bool>(Name)
.Case("z10", true)
.Case("z196", true)
.Case("zEC12", true)
.Default(false);
// No need to store the CPU yet. There aren't any CPU-specific
// macros to define.
return CPUKnown;
}
};
const char *const SystemZTargetInfo::GCCRegNames[] = {
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"f0", "f2", "f4", "f6", "f1", "f3", "f5", "f7",
"f8", "f10", "f12", "f14", "f9", "f11", "f13", "f15"
};
void SystemZTargetInfo::getGCCRegNames(const char *const *&Names,
unsigned &NumNames) const {
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
bool SystemZTargetInfo::
validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &Info) const {
switch (*Name) {
default:
return false;
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
case 'f': // Floating-point register
Info.setAllowsRegister();
return true;
case 'I': // Unsigned 8-bit constant
case 'J': // Unsigned 12-bit constant
case 'K': // Signed 16-bit constant
case 'L': // Signed 20-bit displacement (on all targets we support)
case 'M': // 0x7fffffff
return true;
case 'Q': // Memory with base and unsigned 12-bit displacement
case 'R': // Likewise, plus an index
case 'S': // Memory with base and signed 20-bit displacement
case 'T': // Likewise, plus an index
Info.setAllowsMemory();
return true;
}
}
}
namespace {
class MSP430TargetInfo : public TargetInfo {
static const char * const GCCRegNames[];
public:
MSP430TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
BigEndian = false;
TLSSupported = false;
IntWidth = 16; IntAlign = 16;
LongWidth = 32; LongLongWidth = 64;
LongAlign = LongLongAlign = 16;
PointerWidth = 16; PointerAlign = 16;
SuitableAlign = 16;
SizeType = UnsignedInt;
IntMaxType = SignedLongLong;
UIntMaxType = UnsignedLongLong;
IntPtrType = SignedInt;
PtrDiffType = SignedInt;
SigAtomicType = SignedLong;
DescriptionString = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16";
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("MSP430");
Builder.defineMacro("__MSP430__");
// FIXME: defines for different 'flavours' of MCU
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
// FIXME: Implement.
Records = 0;
NumRecords = 0;
}
virtual bool hasFeature(StringRef Feature) const {
return Feature == "msp430";
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
// No aliases.
Aliases = 0;
NumAliases = 0;
}
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &info) const {
// No target constraints for now.
return false;
}
virtual const char *getClobbers() const {
// FIXME: Is this really right?
return "";
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
// FIXME: implement
return TargetInfo::CharPtrBuiltinVaList;
}
};
const char * const MSP430TargetInfo::GCCRegNames[] = {
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
};
void MSP430TargetInfo::getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
}
namespace {
// LLVM and Clang cannot be used directly to output native binaries for
// target, but is used to compile C code to llvm bitcode with correct
// type and alignment information.
//
// TCE uses the llvm bitcode as input and uses it for generating customized
// target processor and program binary. TCE co-design environment is
// publicly available in http://tce.cs.tut.fi
static const unsigned TCEOpenCLAddrSpaceMap[] = {
3, // opencl_global
4, // opencl_local
5, // opencl_constant
0, // cuda_device
0, // cuda_constant
0 // cuda_shared
};
class TCETargetInfo : public TargetInfo{
public:
TCETargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
TLSSupported = false;
IntWidth = 32;
LongWidth = LongLongWidth = 32;
PointerWidth = 32;
IntAlign = 32;
LongAlign = LongLongAlign = 32;
PointerAlign = 32;
SuitableAlign = 32;
SizeType = UnsignedInt;
IntMaxType = SignedLong;
UIntMaxType = UnsignedLong;
IntPtrType = SignedInt;
PtrDiffType = SignedInt;
FloatWidth = 32;
FloatAlign = 32;
DoubleWidth = 32;
DoubleAlign = 32;
LongDoubleWidth = 32;
LongDoubleAlign = 32;
FloatFormat = &llvm::APFloat::IEEEsingle;
DoubleFormat = &llvm::APFloat::IEEEsingle;
LongDoubleFormat = &llvm::APFloat::IEEEsingle;
DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:32-"
"i16:16:32-i32:32:32-i64:32:32-"
"f32:32:32-f64:32:32-v64:32:32-"
"v128:32:32-a0:0:32-n32";
AddrSpaceMap = &TCEOpenCLAddrSpaceMap;
UseAddrSpaceMapMangling = true;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "tce", Opts);
Builder.defineMacro("__TCE__");
Builder.defineMacro("__TCE_V1__");
}
virtual bool hasFeature(StringRef Feature) const {
return Feature == "tce";
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {}
virtual const char *getClobbers() const {
return "";
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::VoidPtrBuiltinVaList;
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {}
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &info) const {
return true;
}
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {}
};
}
namespace {
class MipsTargetInfoBase : public TargetInfo {
virtual void setDescriptionString() = 0;
static const Builtin::Info BuiltinInfo[];
std::string CPU;
bool IsMips16;
bool IsMicromips;
bool IsNan2008;
bool IsSingleFloat;
enum MipsFloatABI {
HardFloat, SoftFloat
} FloatABI;
enum DspRevEnum {
NoDSP, DSP1, DSP2
} DspRev;
bool HasMSA;
protected:
bool HasFP64;
std::string ABI;
public:
MipsTargetInfoBase(const llvm::Triple &Triple, const std::string &ABIStr,
const std::string &CPUStr)
: TargetInfo(Triple), CPU(CPUStr), IsMips16(false), IsMicromips(false),
IsNan2008(false), IsSingleFloat(false), FloatABI(HardFloat),
DspRev(NoDSP), HasMSA(false), HasFP64(false), ABI(ABIStr) {}
virtual const char *getABI() const { return ABI.c_str(); }
virtual bool setABI(const std::string &Name) = 0;
virtual bool setCPU(const std::string &Name) {
CPU = Name;
return true;
}
void getDefaultFeatures(llvm::StringMap<bool> &Features) const {
Features[ABI] = true;
Features[CPU] = true;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "mips", Opts);
Builder.defineMacro("_mips");
Builder.defineMacro("__REGISTER_PREFIX__", "");
switch (FloatABI) {
case HardFloat:
Builder.defineMacro("__mips_hard_float", Twine(1));
break;
case SoftFloat:
Builder.defineMacro("__mips_soft_float", Twine(1));
break;
}
if (IsSingleFloat)
Builder.defineMacro("__mips_single_float", Twine(1));
Builder.defineMacro("__mips_fpr", HasFP64 ? Twine(64) : Twine(32));
Builder.defineMacro("_MIPS_FPSET",
Twine(32 / (HasFP64 || IsSingleFloat ? 1 : 2)));
if (IsMips16)
Builder.defineMacro("__mips16", Twine(1));
if (IsMicromips)
Builder.defineMacro("__mips_micromips", Twine(1));
if (IsNan2008)
Builder.defineMacro("__mips_nan2008", Twine(1));
switch (DspRev) {
default:
break;
case DSP1:
Builder.defineMacro("__mips_dsp_rev", Twine(1));
Builder.defineMacro("__mips_dsp", Twine(1));
break;
case DSP2:
Builder.defineMacro("__mips_dsp_rev", Twine(2));
Builder.defineMacro("__mips_dspr2", Twine(1));
Builder.defineMacro("__mips_dsp", Twine(1));
break;
}
if (HasMSA)
Builder.defineMacro("__mips_msa", Twine(1));
Builder.defineMacro("_MIPS_SZPTR", Twine(getPointerWidth(0)));
Builder.defineMacro("_MIPS_SZINT", Twine(getIntWidth()));
Builder.defineMacro("_MIPS_SZLONG", Twine(getLongWidth()));
Builder.defineMacro("_MIPS_ARCH", "\"" + CPU + "\"");
Builder.defineMacro("_MIPS_ARCH_" + StringRef(CPU).upper());
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = BuiltinInfo;
NumRecords = clang::Mips::LastTSBuiltin - Builtin::FirstTSBuiltin;
}
virtual bool hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
.Case("mips", true)
.Case("fp64", HasFP64)
.Default(false);
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::VoidPtrBuiltinVaList;
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
static const char *const GCCRegNames[] = {
// CPU register names
// Must match second column of GCCRegAliases
"$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
"$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15",
"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",
"$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31",
// Floating point register names
"$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7",
"$f8", "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",
"$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",
"$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
// Hi/lo and condition register names
"hi", "lo", "", "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4",
"$fcc5","$fcc6","$fcc7",
// MSA register names
"$w0", "$w1", "$w2", "$w3", "$w4", "$w5", "$w6", "$w7",
"$w8", "$w9", "$w10", "$w11", "$w12", "$w13", "$w14", "$w15",
"$w16", "$w17", "$w18", "$w19", "$w20", "$w21", "$w22", "$w23",
"$w24", "$w25", "$w26", "$w27", "$w28", "$w29", "$w30", "$w31",
// MSA control register names
"$msair", "$msacsr", "$msaaccess", "$msasave", "$msamodify",
"$msarequest", "$msamap", "$msaunmap"
};
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const = 0;
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &Info) const {
switch (*Name) {
default:
return false;
case 'r': // CPU registers.
case 'd': // Equivalent to "r" unless generating MIPS16 code.
case 'y': // Equivalent to "r", backwards compatibility only.
case 'f': // floating-point registers.
case 'c': // $25 for indirect jumps
case 'l': // lo register
case 'x': // hilo register pair
Info.setAllowsRegister();
return true;
case 'R': // An address that can be used in a non-macro load or store
Info.setAllowsMemory();
return true;
}
}
virtual const char *getClobbers() const {
// FIXME: Implement!
return "";
}
virtual bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) {
IsMips16 = false;
IsMicromips = false;
IsNan2008 = false;
IsSingleFloat = false;
FloatABI = HardFloat;
DspRev = NoDSP;
HasFP64 = ABI == "n32" || ABI == "n64" || ABI == "64";
for (std::vector<std::string>::iterator it = Features.begin(),
ie = Features.end(); it != ie; ++it) {
if (*it == "+single-float")
IsSingleFloat = true;
else if (*it == "+soft-float")
FloatABI = SoftFloat;
else if (*it == "+mips16")
IsMips16 = true;
else if (*it == "+micromips")
IsMicromips = true;
else if (*it == "+dsp")
DspRev = std::max(DspRev, DSP1);
else if (*it == "+dspr2")
DspRev = std::max(DspRev, DSP2);
else if (*it == "+msa")
HasMSA = true;
else if (*it == "+fp64")
HasFP64 = true;
else if (*it == "-fp64")
HasFP64 = false;
else if (*it == "+nan2008")
IsNan2008 = true;
}
// Remove front-end specific options.
std::vector<std::string>::iterator it =
std::find(Features.begin(), Features.end(), "+soft-float");
if (it != Features.end())
Features.erase(it);
it = std::find(Features.begin(), Features.end(), "+nan2008");
if (it != Features.end())
Features.erase(it);
setDescriptionString();
return true;
}
virtual int getEHDataRegisterNumber(unsigned RegNo) const {
if (RegNo == 0) return 4;
if (RegNo == 1) return 5;
return -1;
}
};
const Builtin::Info MipsTargetInfoBase::BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
ALL_LANGUAGES },
#include "clang/Basic/BuiltinsMips.def"
};
class Mips32TargetInfoBase : public MipsTargetInfoBase {
public:
Mips32TargetInfoBase(const llvm::Triple &Triple)
: MipsTargetInfoBase(Triple, "o32", "mips32") {
SizeType = UnsignedInt;
PtrDiffType = SignedInt;
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
}
virtual bool setABI(const std::string &Name) {
if ((Name == "o32") || (Name == "eabi")) {
ABI = Name;
return true;
} else if (Name == "32") {
ABI = "o32";
return true;
} else
return false;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
MipsTargetInfoBase::getTargetDefines(Opts, Builder);
if (ABI == "o32") {
Builder.defineMacro("__mips_o32");
Builder.defineMacro("_ABIO32", "1");
Builder.defineMacro("_MIPS_SIM", "_ABIO32");
}
else if (ABI == "eabi")
Builder.defineMacro("__mips_eabi");
else
llvm_unreachable("Invalid ABI for Mips32.");
}
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
static const TargetInfo::GCCRegAlias GCCRegAliases[] = {
{ { "at" }, "$1" },
{ { "v0" }, "$2" },
{ { "v1" }, "$3" },
{ { "a0" }, "$4" },
{ { "a1" }, "$5" },
{ { "a2" }, "$6" },
{ { "a3" }, "$7" },
{ { "t0" }, "$8" },
{ { "t1" }, "$9" },
{ { "t2" }, "$10" },
{ { "t3" }, "$11" },
{ { "t4" }, "$12" },
{ { "t5" }, "$13" },
{ { "t6" }, "$14" },
{ { "t7" }, "$15" },
{ { "s0" }, "$16" },
{ { "s1" }, "$17" },
{ { "s2" }, "$18" },
{ { "s3" }, "$19" },
{ { "s4" }, "$20" },
{ { "s5" }, "$21" },
{ { "s6" }, "$22" },
{ { "s7" }, "$23" },
{ { "t8" }, "$24" },
{ { "t9" }, "$25" },
{ { "k0" }, "$26" },
{ { "k1" }, "$27" },
{ { "gp" }, "$28" },
{ { "sp","$sp" }, "$29" },
{ { "fp","$fp" }, "$30" },
{ { "ra" }, "$31" }
};
Aliases = GCCRegAliases;
NumAliases = llvm::array_lengthof(GCCRegAliases);
}
};
class Mips32EBTargetInfo : public Mips32TargetInfoBase {
virtual void setDescriptionString() {
DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64";
}
public:
Mips32EBTargetInfo(const llvm::Triple &Triple)
: Mips32TargetInfoBase(Triple) {
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "MIPSEB", Opts);
Builder.defineMacro("_MIPSEB");
Mips32TargetInfoBase::getTargetDefines(Opts, Builder);
}
};
class Mips32ELTargetInfo : public Mips32TargetInfoBase {
virtual void setDescriptionString() {
DescriptionString = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64";
}
public:
Mips32ELTargetInfo(const llvm::Triple &Triple)
: Mips32TargetInfoBase(Triple) {
BigEndian = false;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "MIPSEL", Opts);
Builder.defineMacro("_MIPSEL");
Mips32TargetInfoBase::getTargetDefines(Opts, Builder);
}
};
class Mips64TargetInfoBase : public MipsTargetInfoBase {
public:
Mips64TargetInfoBase(const llvm::Triple &Triple)
: MipsTargetInfoBase(Triple, "n64", "mips64") {
LongWidth = LongAlign = 64;
PointerWidth = PointerAlign = 64;
LongDoubleWidth = LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::IEEEquad;
if (getTriple().getOS() == llvm::Triple::FreeBSD) {
LongDoubleWidth = LongDoubleAlign = 64;
LongDoubleFormat = &llvm::APFloat::IEEEdouble;
}
SuitableAlign = 128;
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
}
virtual bool setABI(const std::string &Name) {
if (Name == "n32") {
LongWidth = LongAlign = 32;
PointerWidth = PointerAlign = 32;
ABI = Name;
return true;
} else if (Name == "n64") {
ABI = Name;
return true;
} else if (Name == "64") {
ABI = "n64";
return true;
} else
return false;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
MipsTargetInfoBase::getTargetDefines(Opts, Builder);
Builder.defineMacro("__mips64");
Builder.defineMacro("__mips64__");
if (ABI == "n32") {
Builder.defineMacro("__mips_n32");
Builder.defineMacro("_ABIN32", "2");
Builder.defineMacro("_MIPS_SIM", "_ABIN32");
}
else if (ABI == "n64") {
Builder.defineMacro("__mips_n64");
Builder.defineMacro("_ABI64", "3");
Builder.defineMacro("_MIPS_SIM", "_ABI64");
}
else
llvm_unreachable("Invalid ABI for Mips64.");
}
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
static const TargetInfo::GCCRegAlias GCCRegAliases[] = {
{ { "at" }, "$1" },
{ { "v0" }, "$2" },
{ { "v1" }, "$3" },
{ { "a0" }, "$4" },
{ { "a1" }, "$5" },
{ { "a2" }, "$6" },
{ { "a3" }, "$7" },
{ { "a4" }, "$8" },
{ { "a5" }, "$9" },
{ { "a6" }, "$10" },
{ { "a7" }, "$11" },
{ { "t0" }, "$12" },
{ { "t1" }, "$13" },
{ { "t2" }, "$14" },
{ { "t3" }, "$15" },
{ { "s0" }, "$16" },
{ { "s1" }, "$17" },
{ { "s2" }, "$18" },
{ { "s3" }, "$19" },
{ { "s4" }, "$20" },
{ { "s5" }, "$21" },
{ { "s6" }, "$22" },
{ { "s7" }, "$23" },
{ { "t8" }, "$24" },
{ { "t9" }, "$25" },
{ { "k0" }, "$26" },
{ { "k1" }, "$27" },
{ { "gp" }, "$28" },
{ { "sp","$sp" }, "$29" },
{ { "fp","$fp" }, "$30" },
{ { "ra" }, "$31" }
};
Aliases = GCCRegAliases;
NumAliases = llvm::array_lengthof(GCCRegAliases);
}
};
class Mips64EBTargetInfo : public Mips64TargetInfoBase {
virtual void setDescriptionString() {
if (ABI == "n32")
DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-f128:128:128-"
"v64:64:64-n32:64-S128";
else
DescriptionString = "E-p:64:64:64-i1:8:8-i8:8:32-i16:16:32-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-f128:128:128-"
"v64:64:64-n32:64-S128";
}
public:
Mips64EBTargetInfo(const llvm::Triple &Triple)
: Mips64TargetInfoBase(Triple) {}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "MIPSEB", Opts);
Builder.defineMacro("_MIPSEB");
Mips64TargetInfoBase::getTargetDefines(Opts, Builder);
}
};
class Mips64ELTargetInfo : public Mips64TargetInfoBase {
virtual void setDescriptionString() {
if (ABI == "n32")
DescriptionString = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-f128:128:128"
"-v64:64:64-n32:64-S128";
else
DescriptionString = "e-p:64:64:64-i1:8:8-i8:8:32-i16:16:32-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-f128:128:128-"
"v64:64:64-n32:64-S128";
}
public:
Mips64ELTargetInfo(const llvm::Triple &Triple)
: Mips64TargetInfoBase(Triple) {
// Default ABI is n64.
BigEndian = false;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "MIPSEL", Opts);
Builder.defineMacro("_MIPSEL");
Mips64TargetInfoBase::getTargetDefines(Opts, Builder);
}
};
} // end anonymous namespace.
namespace {
class PNaClTargetInfo : public TargetInfo {
public:
PNaClTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
BigEndian = false;
this->UserLabelPrefix = "";
this->LongAlign = 32;
this->LongWidth = 32;
this->PointerAlign = 32;
this->PointerWidth = 32;
this->IntMaxType = TargetInfo::SignedLongLong;
this->UIntMaxType = TargetInfo::UnsignedLongLong;
this->Int64Type = TargetInfo::SignedLongLong;
this->DoubleAlign = 64;
this->LongDoubleWidth = 64;
this->LongDoubleAlign = 64;
this->SizeType = TargetInfo::UnsignedInt;
this->PtrDiffType = TargetInfo::SignedInt;
this->IntPtrType = TargetInfo::SignedInt;
this->RegParmMax = 0; // Disallow regparm
DescriptionString = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
"f32:32:32-f64:64:64-p:32:32:32-v128:32:32";
}
void getDefaultFeatures(llvm::StringMap<bool> &Features) const {
}
virtual void getArchDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("__le32__");
Builder.defineMacro("__pnacl__");
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("__LITTLE_ENDIAN__");
getArchDefines(Opts, Builder);
}
virtual bool hasFeature(StringRef Feature) const {
return Feature == "pnacl";
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::PNaClABIBuiltinVaList;
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const;
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &Info) const {
return false;
}
virtual const char *getClobbers() const {
return "";
}
};
void PNaClTargetInfo::getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
Names = NULL;
NumNames = 0;
}
void PNaClTargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
Aliases = NULL;
NumAliases = 0;
}
} // end anonymous namespace.
namespace {
static const unsigned SPIRAddrSpaceMap[] = {
1, // opencl_global
3, // opencl_local
2, // opencl_constant
0, // cuda_device
0, // cuda_constant
0 // cuda_shared
};
class SPIRTargetInfo : public TargetInfo {
public:
SPIRTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
"SPIR target must use unknown OS");
assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
"SPIR target must use unknown environment type");
BigEndian = false;
TLSSupported = false;
LongWidth = LongAlign = 64;
AddrSpaceMap = &SPIRAddrSpaceMap;
UseAddrSpaceMapMangling = true;
// Define available target features
// These must be defined in sorted order!
NoAsmVariants = true;
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "SPIR", Opts);
}
virtual bool hasFeature(StringRef Feature) const {
return Feature == "spir";
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {}
virtual const char *getClobbers() const {
return "";
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {}
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &info) const {
return true;
}
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::VoidPtrBuiltinVaList;
}
};
class SPIR32TargetInfo : public SPIRTargetInfo {
public:
SPIR32TargetInfo(const llvm::Triple &Triple) : SPIRTargetInfo(Triple) {
PointerWidth = PointerAlign = 32;
SizeType = TargetInfo::UnsignedInt;
PtrDiffType = IntPtrType = TargetInfo::SignedInt;
DescriptionString
= "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
"f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-"
"v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-"
"v512:512:512-v1024:1024:1024";
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "SPIR32", Opts);
}
};
class SPIR64TargetInfo : public SPIRTargetInfo {
public:
SPIR64TargetInfo(const llvm::Triple &Triple) : SPIRTargetInfo(Triple) {
PointerWidth = PointerAlign = 64;
SizeType = TargetInfo::UnsignedLong;
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
DescriptionString
= "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
"f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-"
"v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-"
"v512:512:512-v1024:1024:1024";
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "SPIR64", Opts);
}
};
}
namespace {
class XCoreTargetInfo : public TargetInfo {
static const Builtin::Info BuiltinInfo[];
public:
XCoreTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
BigEndian = false;
NoAsmVariants = true;
LongLongAlign = 32;
SuitableAlign = 32;
DoubleAlign = LongDoubleAlign = 32;
SizeType = UnsignedInt;
PtrDiffType = SignedInt;
IntPtrType = SignedInt;
WCharType = UnsignedChar;
WIntType = UnsignedInt;
UseZeroLengthBitfieldAlignment = true;
DescriptionString = "e-p:32:32:32-a0:0:32-n32"
"-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32"
"-f16:16:32-f32:32:32-f64:32:32";
}
virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("__XS1B__");
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = BuiltinInfo;
NumRecords = clang::XCore::LastTSBuiltin-Builtin::FirstTSBuiltin;
}
virtual BuiltinVaListKind getBuiltinVaListKind() const {
return TargetInfo::VoidPtrBuiltinVaList;
}
virtual const char *getClobbers() const {
return "";
}
virtual void getGCCRegNames(const char * const *&Names,
unsigned &NumNames) const {
static const char * const GCCRegNames[] = {
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "cp", "dp", "sp", "lr"
};
Names = GCCRegNames;
NumNames = llvm::array_lengthof(GCCRegNames);
}
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const {
Aliases = NULL;
NumAliases = 0;
}
virtual bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &Info) const {
return false;
}
};
const Builtin::Info XCoreTargetInfo::BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
ALL_LANGUAGES },
#include "clang/Basic/BuiltinsXCore.def"
};
} // end anonymous namespace.
//===----------------------------------------------------------------------===//
// Driver code
//===----------------------------------------------------------------------===//
static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
llvm::Triple::OSType os = Triple.getOS();
switch (Triple.getArch()) {
default:
return NULL;
case llvm::Triple::xcore:
return new XCoreTargetInfo(Triple);
case llvm::Triple::hexagon:
return new HexagonTargetInfo(Triple);
case llvm::Triple::aarch64:
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<AArch64TargetInfo>(Triple);
default:
return new AArch64TargetInfo(Triple);
}
case llvm::Triple::arm:
case llvm::Triple::thumb:
if (Triple.isOSDarwin())
return new DarwinARMTargetInfo(Triple);
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<ARMTargetInfo>(Triple);
case llvm::Triple::FreeBSD:
return new FreeBSDTargetInfo<ARMTargetInfo>(Triple);
case llvm::Triple::NetBSD:
return new NetBSDTargetInfo<ARMTargetInfo>(Triple);
case llvm::Triple::OpenBSD:
return new OpenBSDTargetInfo<ARMTargetInfo>(Triple);
case llvm::Triple::Bitrig:
return new BitrigTargetInfo<ARMTargetInfo>(Triple);
case llvm::Triple::RTEMS:
return new RTEMSTargetInfo<ARMTargetInfo>(Triple);
case llvm::Triple::NaCl:
return new NaClTargetInfo<ARMTargetInfo>(Triple);
default:
return new ARMTargetInfo(Triple);
}
case llvm::Triple::msp430:
return new MSP430TargetInfo(Triple);
case llvm::Triple::mips:
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<Mips32EBTargetInfo>(Triple);
case llvm::Triple::RTEMS:
return new RTEMSTargetInfo<Mips32EBTargetInfo>(Triple);
case llvm::Triple::FreeBSD:
return new FreeBSDTargetInfo<Mips32EBTargetInfo>(Triple);
case llvm::Triple::NetBSD:
return new NetBSDTargetInfo<Mips32EBTargetInfo>(Triple);
default:
return new Mips32EBTargetInfo(Triple);
}
case llvm::Triple::mipsel:
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<Mips32ELTargetInfo>(Triple);
case llvm::Triple::RTEMS:
return new RTEMSTargetInfo<Mips32ELTargetInfo>(Triple);
case llvm::Triple::FreeBSD:
return new FreeBSDTargetInfo<Mips32ELTargetInfo>(Triple);
case llvm::Triple::NetBSD:
return new NetBSDTargetInfo<Mips32ELTargetInfo>(Triple);
case llvm::Triple::NaCl:
return new NaClTargetInfo<Mips32ELTargetInfo>(Triple);
default:
return new Mips32ELTargetInfo(Triple);
}
case llvm::Triple::mips64:
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<Mips64EBTargetInfo>(Triple);
case llvm::Triple::RTEMS:
return new RTEMSTargetInfo<Mips64EBTargetInfo>(Triple);
case llvm::Triple::FreeBSD:
return new FreeBSDTargetInfo<Mips64EBTargetInfo>(Triple);
case llvm::Triple::NetBSD:
return new NetBSDTargetInfo<Mips64EBTargetInfo>(Triple);
case llvm::Triple::OpenBSD:
return new OpenBSDTargetInfo<Mips64EBTargetInfo>(Triple);
default:
return new Mips64EBTargetInfo(Triple);
}
case llvm::Triple::mips64el:
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<Mips64ELTargetInfo>(Triple);
case llvm::Triple::RTEMS:
return new RTEMSTargetInfo<Mips64ELTargetInfo>(Triple);
case llvm::Triple::FreeBSD:
return new FreeBSDTargetInfo<Mips64ELTargetInfo>(Triple);
case llvm::Triple::NetBSD:
return new NetBSDTargetInfo<Mips64ELTargetInfo>(Triple);
case llvm::Triple::OpenBSD:
return new OpenBSDTargetInfo<Mips64ELTargetInfo>(Triple);
default:
return new Mips64ELTargetInfo(Triple);
}
case llvm::Triple::le32:
switch (os) {
case llvm::Triple::NaCl:
return new NaClTargetInfo<PNaClTargetInfo>(Triple);
default:
return NULL;
}
case llvm::Triple::ppc:
if (Triple.isOSDarwin())
return new DarwinPPC32TargetInfo(Triple);
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<PPC32TargetInfo>(Triple);
case llvm::Triple::FreeBSD:
return new FreeBSDTargetInfo<PPC32TargetInfo>(Triple);
case llvm::Triple::NetBSD:
return new NetBSDTargetInfo<PPC32TargetInfo>(Triple);
case llvm::Triple::OpenBSD:
return new OpenBSDTargetInfo<PPC32TargetInfo>(Triple);
case llvm::Triple::RTEMS:
return new RTEMSTargetInfo<PPC32TargetInfo>(Triple);
default:
return new PPC32TargetInfo(Triple);
}
case llvm::Triple::ppc64:
if (Triple.isOSDarwin())
return new DarwinPPC64TargetInfo(Triple);
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<PPC64TargetInfo>(Triple);
case llvm::Triple::Lv2:
return new PS3PPUTargetInfo<PPC64TargetInfo>(Triple);
case llvm::Triple::FreeBSD:
return new FreeBSDTargetInfo<PPC64TargetInfo>(Triple);
case llvm::Triple::NetBSD:
return new NetBSDTargetInfo<PPC64TargetInfo>(Triple);
default:
return new PPC64TargetInfo(Triple);
}
case llvm::Triple::ppc64le:
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<PPC64TargetInfo>(Triple);
default:
return new PPC64TargetInfo(Triple);
}
case llvm::Triple::nvptx:
return new NVPTX32TargetInfo(Triple);
case llvm::Triple::nvptx64:
return new NVPTX64TargetInfo(Triple);
case llvm::Triple::r600:
return new R600TargetInfo(Triple);
case llvm::Triple::sparc:
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<SparcV8TargetInfo>(Triple);
case llvm::Triple::AuroraUX:
return new AuroraUXSparcV8TargetInfo(Triple);
case llvm::Triple::Solaris:
return new SolarisSparcV8TargetInfo(Triple);
case llvm::Triple::NetBSD:
return new NetBSDTargetInfo<SparcV8TargetInfo>(Triple);
case llvm::Triple::OpenBSD:
return new OpenBSDTargetInfo<SparcV8TargetInfo>(Triple);
case llvm::Triple::RTEMS:
return new RTEMSTargetInfo<SparcV8TargetInfo>(Triple);
default:
return new SparcV8TargetInfo(Triple);
}
case llvm::Triple::sparcv9:
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<SparcV9TargetInfo>(Triple);
case llvm::Triple::AuroraUX:
return new AuroraUXTargetInfo<SparcV9TargetInfo>(Triple);
case llvm::Triple::Solaris:
return new SolarisTargetInfo<SparcV9TargetInfo>(Triple);
case llvm::Triple::NetBSD:
return new NetBSDTargetInfo<SparcV9TargetInfo>(Triple);
case llvm::Triple::OpenBSD:
return new OpenBSDTargetInfo<SparcV9TargetInfo>(Triple);
case llvm::Triple::FreeBSD:
return new FreeBSDTargetInfo<SparcV9TargetInfo>(Triple);
default:
return new SparcV9TargetInfo(Triple);
}
case llvm::Triple::systemz:
switch (os) {
case llvm::Triple::Linux:
return new LinuxTargetInfo<SystemZTargetInfo>(Triple);
default:
return new SystemZTargetInfo(Triple);
}
case llvm::Triple::tce:
return new TCETargetInfo(Triple);
case llvm::Triple::x86:
if (Triple.isOSDarwin())
return new DarwinI386TargetInfo(Triple);
switch (os) {
case llvm::Triple::AuroraUX:
return new AuroraUXTargetInfo<X86_32TargetInfo>(Triple);
case llvm::Triple::Linux:
return new LinuxTargetInfo<X86_32TargetInfo>(Triple);
case llvm::Triple::DragonFly:
return new DragonFlyBSDTargetInfo<X86_32TargetInfo>(Triple);
case llvm::Triple::NetBSD:
return new NetBSDI386TargetInfo(Triple);
case llvm::Triple::OpenBSD:
return new OpenBSDI386TargetInfo(Triple);
case llvm::Triple::Bitrig:
return new BitrigI386TargetInfo(Triple);
case llvm::Triple::FreeBSD:
return new FreeBSDTargetInfo<X86_32TargetInfo>(Triple);
case llvm::Triple::KFreeBSD:
return new KFreeBSDTargetInfo<X86_32TargetInfo>(Triple);
case llvm::Triple::Minix:
return new MinixTargetInfo<X86_32TargetInfo>(Triple);
case llvm::Triple::Solaris:
return new SolarisTargetInfo<X86_32TargetInfo>(Triple);
case llvm::Triple::Cygwin:
return new CygwinX86_32TargetInfo(Triple);
case llvm::Triple::MinGW32:
return new MinGWX86_32TargetInfo(Triple);
case llvm::Triple::Win32:
return new VisualStudioWindowsX86_32TargetInfo(Triple);
case llvm::Triple::Haiku:
return new HaikuX86_32TargetInfo(Triple);
case llvm::Triple::RTEMS:
return new RTEMSX86_32TargetInfo(Triple);
case llvm::Triple::NaCl:
return new NaClTargetInfo<X86_32TargetInfo>(Triple);
default:
return new X86_32TargetInfo(Triple);
}
case llvm::Triple::x86_64:
if (Triple.isOSDarwin() || Triple.getEnvironment() == llvm::Triple::MachO)
return new DarwinX86_64TargetInfo(Triple);
switch (os) {
case llvm::Triple::AuroraUX:
return new AuroraUXTargetInfo<X86_64TargetInfo>(Triple);
case llvm::Triple::Linux:
return new LinuxTargetInfo<X86_64TargetInfo>(Triple);
case llvm::Triple::DragonFly:
return new DragonFlyBSDTargetInfo<X86_64TargetInfo>(Triple);
case llvm::Triple::NetBSD:
return new NetBSDTargetInfo<X86_64TargetInfo>(Triple);
case llvm::Triple::OpenBSD:
return new OpenBSDX86_64TargetInfo(Triple);
case llvm::Triple::Bitrig:
return new BitrigX86_64TargetInfo(Triple);
case llvm::Triple::FreeBSD:
return new FreeBSDTargetInfo<X86_64TargetInfo>(Triple);
case llvm::Triple::KFreeBSD:
return new KFreeBSDTargetInfo<X86_64TargetInfo>(Triple);
case llvm::Triple::Solaris:
return new SolarisTargetInfo<X86_64TargetInfo>(Triple);
case llvm::Triple::MinGW32:
return new MinGWX86_64TargetInfo(Triple);
case llvm::Triple::Win32: // This is what Triple.h supports now.
return new VisualStudioWindowsX86_64TargetInfo(Triple);
case llvm::Triple::NaCl:
return new NaClTargetInfo<X86_64TargetInfo>(Triple);
default:
return new X86_64TargetInfo(Triple);
}
case llvm::Triple::spir: {
if (Triple.getOS() != llvm::Triple::UnknownOS ||
Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
return NULL;
return new SPIR32TargetInfo(Triple);
}
case llvm::Triple::spir64: {
if (Triple.getOS() != llvm::Triple::UnknownOS ||
Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
return NULL;
return new SPIR64TargetInfo(Triple);
}
}
}
/// CreateTargetInfo - Return the target info object for the specified target
/// triple.
TargetInfo *TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags,
TargetOptions *Opts) {
llvm::Triple Triple(Opts->Triple);
// Construct the target
OwningPtr<TargetInfo> Target(AllocateTarget(Triple));
if (!Target) {
Diags.Report(diag::err_target_unknown_triple) << Triple.str();
return 0;
}
Target->setTargetOpts(Opts);
// Set the target CPU if specified.
if (!Opts->CPU.empty() && !Target->setCPU(Opts->CPU)) {
Diags.Report(diag::err_target_unknown_cpu) << Opts->CPU;
return 0;
}
// Set the target ABI if specified.
if (!Opts->ABI.empty() && !Target->setABI(Opts->ABI)) {
Diags.Report(diag::err_target_unknown_abi) << Opts->ABI;
return 0;
}
// Set the target C++ ABI.
if (!Opts->CXXABI.empty() && !Target->setCXXABI(Opts->CXXABI)) {
Diags.Report(diag::err_target_unknown_cxxabi) << Opts->CXXABI;
return 0;
}
// Set the fp math unit.
if (!Opts->FPMath.empty() && !Target->setFPMath(Opts->FPMath)) {
Diags.Report(diag::err_target_unknown_fpmath) << Opts->FPMath;
return 0;
}
// Compute the default target features, we need the target to handle this
// because features may have dependencies on one another.
llvm::StringMap<bool> Features;
Target->getDefaultFeatures(Features);
// Apply the user specified deltas.
for (unsigned I = 0, N = Opts->FeaturesAsWritten.size();
I < N; ++I) {
const char *Name = Opts->FeaturesAsWritten[I].c_str();
// Apply the feature via the target.
bool Enabled = Name[0] == '+';
Target->setFeatureEnabled(Features, Name + 1, Enabled);
}
// Add the features to the compile options.
//
// FIXME: If we are completely confident that we have the right set, we only
// need to pass the minuses.
Opts->Features.clear();
for (llvm::StringMap<bool>::const_iterator it = Features.begin(),
ie = Features.end(); it != ie; ++it)
Opts->Features.push_back((it->second ? "+" : "-") + it->first().str());
if (!Target->handleTargetFeatures(Opts->Features, Diags))
return 0;
return Target.take();
}
Index: head/contrib/llvm/tools/clang/lib/Basic/Version.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Basic/Version.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/Basic/Version.cpp (revision 265925)
@@ -1,149 +1,149 @@
//===- Version.cpp - Clang Version Number -----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines several version-related utility functions for Clang.
//
//===----------------------------------------------------------------------===//
#include "clang/Basic/Version.h"
#include "clang/Basic/LLVM.h"
#include "llvm/Config/config.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
#include <cstring>
#ifdef HAVE_SVN_VERSION_INC
# include "SVNVersion.inc"
#endif
namespace clang {
std::string getClangRepositoryPath() {
#if defined(CLANG_REPOSITORY_STRING)
return CLANG_REPOSITORY_STRING;
#else
#ifdef SVN_REPOSITORY
StringRef URL(SVN_REPOSITORY);
#else
StringRef URL("");
#endif
// If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us
// pick up a tag in an SVN export, for example.
- StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_34/final/lib/Basic/Version.cpp $");
+ StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_34/dot1-final/lib/Basic/Version.cpp $");
if (URL.empty()) {
URL = SVNRepository.slice(SVNRepository.find(':'),
SVNRepository.find("/lib/Basic"));
}
// Strip off version from a build from an integration branch.
URL = URL.slice(0, URL.find("/src/tools/clang"));
// Trim path prefix off, assuming path came from standard cfe path.
size_t Start = URL.find("cfe/");
if (Start != StringRef::npos)
URL = URL.substr(Start + 4);
return URL;
#endif
}
std::string getLLVMRepositoryPath() {
#ifdef LLVM_REPOSITORY
StringRef URL(LLVM_REPOSITORY);
#else
StringRef URL("");
#endif
// Trim path prefix off, assuming path came from standard llvm path.
// Leave "llvm/" prefix to distinguish the following llvm revision from the
// clang revision.
size_t Start = URL.find("llvm/");
if (Start != StringRef::npos)
URL = URL.substr(Start);
return URL;
}
std::string getClangRevision() {
#ifdef SVN_REVISION
return SVN_REVISION;
#else
return "";
#endif
}
std::string getLLVMRevision() {
#ifdef LLVM_REVISION
return LLVM_REVISION;
#else
return "";
#endif
}
std::string getClangFullRepositoryVersion() {
std::string buf;
llvm::raw_string_ostream OS(buf);
std::string Path = getClangRepositoryPath();
std::string Revision = getClangRevision();
if (!Path.empty() || !Revision.empty()) {
OS << '(';
if (!Path.empty())
OS << Path;
if (!Revision.empty()) {
if (!Path.empty())
OS << ' ';
OS << Revision;
}
OS << ')';
}
// Support LLVM in a separate repository.
std::string LLVMRev = getLLVMRevision();
if (!LLVMRev.empty() && LLVMRev != Revision) {
OS << " (";
std::string LLVMRepo = getLLVMRepositoryPath();
if (!LLVMRepo.empty())
OS << LLVMRepo << ' ';
OS << LLVMRev << ')';
}
return OS.str();
}
std::string getClangFullVersion() {
std::string buf;
llvm::raw_string_ostream OS(buf);
#ifdef CLANG_VENDOR
OS << CLANG_VENDOR;
#endif
OS << "clang version " CLANG_VERSION_STRING " "
<< getClangFullRepositoryVersion();
#ifdef CLANG_VENDOR_SUFFIX
OS << CLANG_VENDOR_SUFFIX;
#elif defined(CLANG_VENDOR)
// If vendor supplied, include the base LLVM version as well.
OS << " (based on LLVM " << PACKAGE_VERSION << ")";
#endif
return OS.str();
}
std::string getClangFullCPPVersion() {
// The version string we report in __VERSION__ is just a compacted version of
// the one we report on the command line.
std::string buf;
llvm::raw_string_ostream OS(buf);
#ifdef CLANG_VENDOR
OS << CLANG_VENDOR;
#endif
OS << "Clang " CLANG_VERSION_STRING " " << getClangFullRepositoryVersion();
return OS.str();
}
} // end namespace clang
Index: head/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Driver/Driver.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/Driver/Driver.cpp (revision 265925)
@@ -1,2057 +1,2061 @@
//===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "clang/Driver/Driver.h"
#include "InputInfo.h"
#include "ToolChains.h"
#include "clang/Basic/Version.h"
#include "clang/Driver/Action.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Job.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
#include "llvm/Option/OptSpecifier.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
// FIXME: It would prevent us from including llvm-config.h
// if config.h were included before system_error.h.
#include "clang/Config/config.h"
using namespace clang::driver;
using namespace clang;
using namespace llvm::opt;
Driver::Driver(StringRef ClangExecutable,
StringRef DefaultTargetTriple,
StringRef DefaultImageName,
DiagnosticsEngine &Diags)
: Opts(createDriverOptTable()), Diags(Diags), Mode(GCCMode),
ClangExecutable(ClangExecutable), SysRoot(DEFAULT_SYSROOT),
UseStdLib(true), DefaultTargetTriple(DefaultTargetTriple),
DefaultImageName(DefaultImageName),
DriverTitle("clang LLVM compiler"),
CCPrintOptionsFilename(0), CCPrintHeadersFilename(0),
CCLogDiagnosticsFilename(0),
CCCPrintBindings(false),
CCPrintHeaders(false), CCLogDiagnostics(false),
CCGenDiagnostics(false), CCCGenericGCCName(""), CheckInputsExist(true),
CCCUsePCH(true), SuppressMissingInputWarning(false) {
Name = llvm::sys::path::stem(ClangExecutable);
Dir = llvm::sys::path::parent_path(ClangExecutable);
// Compute the path to the resource directory.
StringRef ClangResourceDir(CLANG_RESOURCE_DIR);
SmallString<128> P(Dir);
if (ClangResourceDir != "")
llvm::sys::path::append(P, ClangResourceDir);
else
llvm::sys::path::append(P, "..", "lib", "clang", CLANG_VERSION_STRING);
ResourceDir = P.str();
}
Driver::~Driver() {
delete Opts;
for (llvm::StringMap<ToolChain *>::iterator I = ToolChains.begin(),
E = ToolChains.end();
I != E; ++I)
delete I->second;
}
void Driver::ParseDriverMode(ArrayRef<const char *> Args) {
const std::string OptName =
getOpts().getOption(options::OPT_driver_mode).getPrefixedName();
for (size_t I = 0, E = Args.size(); I != E; ++I) {
const StringRef Arg = Args[I];
if (!Arg.startswith(OptName))
continue;
const StringRef Value = Arg.drop_front(OptName.size());
const unsigned M = llvm::StringSwitch<unsigned>(Value)
.Case("gcc", GCCMode)
.Case("g++", GXXMode)
.Case("cpp", CPPMode)
.Case("cl", CLMode)
.Default(~0U);
if (M != ~0U)
Mode = static_cast<DriverMode>(M);
else
Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;
}
}
InputArgList *Driver::ParseArgStrings(ArrayRef<const char *> ArgList) {
llvm::PrettyStackTraceString CrashInfo("Command line argument parsing");
unsigned IncludedFlagsBitmask;
unsigned ExcludedFlagsBitmask;
llvm::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
getIncludeExcludeOptionFlagMasks();
unsigned MissingArgIndex, MissingArgCount;
InputArgList *Args = getOpts().ParseArgs(ArgList.begin(), ArgList.end(),
MissingArgIndex, MissingArgCount,
IncludedFlagsBitmask,
ExcludedFlagsBitmask);
// Check for missing argument error.
if (MissingArgCount)
Diag(clang::diag::err_drv_missing_argument)
<< Args->getArgString(MissingArgIndex) << MissingArgCount;
// Check for unsupported options.
for (ArgList::const_iterator it = Args->begin(), ie = Args->end();
it != ie; ++it) {
Arg *A = *it;
if (A->getOption().hasFlag(options::Unsupported)) {
Diag(clang::diag::err_drv_unsupported_opt) << A->getAsString(*Args);
continue;
}
// Warn about -mcpu= without an argument.
if (A->getOption().matches(options::OPT_mcpu_EQ) &&
A->containsValue("")) {
Diag(clang::diag::warn_drv_empty_joined_argument) <<
A->getAsString(*Args);
}
}
for (arg_iterator it = Args->filtered_begin(options::OPT_UNKNOWN),
ie = Args->filtered_end(); it != ie; ++it) {
Diags.Report(diag::err_drv_unknown_argument) << (*it) ->getAsString(*Args);
}
return Args;
}
// Determine which compilation mode we are in. We look for options which
// affect the phase, starting with the earliest phases, and record which
// option we used to determine the final phase.
phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, Arg **FinalPhaseArg)
const {
Arg *PhaseArg = 0;
phases::ID FinalPhase;
// -{E,M,MM} only run the preprocessor.
if (CCCIsCPP() ||
(PhaseArg = DAL.getLastArg(options::OPT_E)) ||
(PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM))) {
FinalPhase = phases::Preprocess;
// -{fsyntax-only,-analyze,emit-ast,S} only run up to the compiler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) ||
(PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) ||
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) ||
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) ||
(PhaseArg = DAL.getLastArg(options::OPT__migrate)) ||
(PhaseArg = DAL.getLastArg(options::OPT__analyze,
options::OPT__analyze_auto)) ||
(PhaseArg = DAL.getLastArg(options::OPT_emit_ast)) ||
(PhaseArg = DAL.getLastArg(options::OPT_S))) {
FinalPhase = phases::Compile;
// -c only runs up to the assembler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
FinalPhase = phases::Assemble;
// Otherwise do everything.
} else
FinalPhase = phases::Link;
if (FinalPhaseArg)
*FinalPhaseArg = PhaseArg;
return FinalPhase;
}
static Arg* MakeInputArg(const DerivedArgList &Args, OptTable *Opts,
StringRef Value) {
Arg *A = new Arg(Opts->getOption(options::OPT_INPUT), Value,
Args.getBaseArgs().MakeIndex(Value), Value.data());
A->claim();
return A;
}
DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
DerivedArgList *DAL = new DerivedArgList(Args);
bool HasNostdlib = Args.hasArg(options::OPT_nostdlib);
for (ArgList::const_iterator it = Args.begin(),
ie = Args.end(); it != ie; ++it) {
const Arg *A = *it;
// Unfortunately, we have to parse some forwarding options (-Xassembler,
// -Xlinker, -Xpreprocessor) because we either integrate their functionality
// (assembler and preprocessor), or bypass a previous driver ('collect2').
// Rewrite linker options, to replace --no-demangle with a custom internal
// option.
if ((A->getOption().matches(options::OPT_Wl_COMMA) ||
A->getOption().matches(options::OPT_Xlinker)) &&
A->containsValue("--no-demangle")) {
// Add the rewritten no-demangle argument.
DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_Xlinker__no_demangle));
// Add the remaining values as Xlinker arguments.
for (unsigned i = 0, e = A->getNumValues(); i != e; ++i)
if (StringRef(A->getValue(i)) != "--no-demangle")
DAL->AddSeparateArg(A, Opts->getOption(options::OPT_Xlinker),
A->getValue(i));
continue;
}
// Rewrite preprocessor options, to replace -Wp,-MD,FOO which is used by
// some build systems. We don't try to be complete here because we don't
// care to encourage this usage model.
if (A->getOption().matches(options::OPT_Wp_COMMA) &&
(A->getValue(0) == StringRef("-MD") ||
A->getValue(0) == StringRef("-MMD"))) {
// Rewrite to -MD/-MMD along with -MF.
if (A->getValue(0) == StringRef("-MD"))
DAL->AddFlagArg(A, Opts->getOption(options::OPT_MD));
else
DAL->AddFlagArg(A, Opts->getOption(options::OPT_MMD));
if (A->getNumValues() == 2)
DAL->AddSeparateArg(A, Opts->getOption(options::OPT_MF),
A->getValue(1));
continue;
}
// Rewrite reserved library names.
if (A->getOption().matches(options::OPT_l)) {
StringRef Value = A->getValue();
// Rewrite unless -nostdlib is present.
if (!HasNostdlib && Value == "stdc++") {
DAL->AddFlagArg(A, Opts->getOption(
options::OPT_Z_reserved_lib_stdcxx));
continue;
}
// Rewrite unconditionally.
if (Value == "cc_kext") {
DAL->AddFlagArg(A, Opts->getOption(
options::OPT_Z_reserved_lib_cckext));
continue;
}
}
// Pick up inputs via the -- option.
if (A->getOption().matches(options::OPT__DASH_DASH)) {
A->claim();
for (unsigned i = 0, e = A->getNumValues(); i != e; ++i)
DAL->append(MakeInputArg(*DAL, Opts, A->getValue(i)));
continue;
}
DAL->append(*it);
}
// Add a default value of -mlinker-version=, if one was given and the user
// didn't specify one.
#if defined(HOST_LINK_VERSION)
if (!Args.hasArg(options::OPT_mlinker_version_EQ)) {
DAL->AddJoinedArg(0, Opts->getOption(options::OPT_mlinker_version_EQ),
HOST_LINK_VERSION);
DAL->getLastArg(options::OPT_mlinker_version_EQ)->claim();
}
#endif
return DAL;
}
Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
llvm::PrettyStackTraceString CrashInfo("Compilation construction");
// FIXME: Handle environment options which affect driver behavior, somewhere
// (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS.
if (char *env = ::getenv("COMPILER_PATH")) {
StringRef CompilerPath = env;
while (!CompilerPath.empty()) {
std::pair<StringRef, StringRef> Split
= CompilerPath.split(llvm::sys::EnvPathSeparator);
PrefixDirs.push_back(Split.first);
CompilerPath = Split.second;
}
}
// We look for the driver mode option early, because the mode can affect
// how other options are parsed.
ParseDriverMode(ArgList.slice(1));
// FIXME: What are we going to do with -V and -b?
// FIXME: This stuff needs to go into the Compilation, not the driver.
bool CCCPrintActions;
InputArgList *Args = ParseArgStrings(ArgList.slice(1));
// -no-canonical-prefixes is used very early in main.
Args->ClaimAllArgs(options::OPT_no_canonical_prefixes);
// Ignore -pipe.
Args->ClaimAllArgs(options::OPT_pipe);
// Extract -ccc args.
//
// FIXME: We need to figure out where this behavior should live. Most of it
// should be outside in the client; the parts that aren't should have proper
// options, either by introducing new ones or by overloading gcc ones like -V
// or -b.
CCCPrintActions = Args->hasArg(options::OPT_ccc_print_phases);
CCCPrintBindings = Args->hasArg(options::OPT_ccc_print_bindings);
if (const Arg *A = Args->getLastArg(options::OPT_ccc_gcc_name))
CCCGenericGCCName = A->getValue();
CCCUsePCH = Args->hasFlag(options::OPT_ccc_pch_is_pch,
options::OPT_ccc_pch_is_pth);
// FIXME: DefaultTargetTriple is used by the target-prefixed calls to as/ld
// and getToolChain is const.
if (IsCLMode()) {
// clang-cl targets Win32.
llvm::Triple T(DefaultTargetTriple);
T.setOSName(llvm::Triple::getOSTypeName(llvm::Triple::Win32));
DefaultTargetTriple = T.str();
}
if (const Arg *A = Args->getLastArg(options::OPT_target))
DefaultTargetTriple = A->getValue();
if (const Arg *A = Args->getLastArg(options::OPT_ccc_install_dir))
Dir = InstalledDir = A->getValue();
for (arg_iterator it = Args->filtered_begin(options::OPT_B),
ie = Args->filtered_end(); it != ie; ++it) {
const Arg *A = *it;
A->claim();
PrefixDirs.push_back(A->getValue(0));
}
if (const Arg *A = Args->getLastArg(options::OPT__sysroot_EQ))
SysRoot = A->getValue();
if (const Arg *A = Args->getLastArg(options::OPT__dyld_prefix_EQ))
DyldPrefix = A->getValue();
if (Args->hasArg(options::OPT_nostdlib))
UseStdLib = false;
if (const Arg *A = Args->getLastArg(options::OPT_resource_dir))
ResourceDir = A->getValue();
// Perform the default argument translations.
DerivedArgList *TranslatedArgs = TranslateInputArgs(*Args);
// Owned by the host.
const ToolChain &TC = getToolChain(*Args);
// The compilation takes ownership of Args.
Compilation *C = new Compilation(*this, TC, Args, TranslatedArgs);
if (!HandleImmediateArgs(*C))
return C;
// Construct the list of inputs.
InputList Inputs;
BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs);
// Construct the list of abstract actions to perform for this compilation. On
// Darwin target OSes this uses the driver-driver and universal actions.
if (TC.getTriple().isOSDarwin())
BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(),
Inputs, C->getActions());
else
BuildActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
C->getActions());
if (CCCPrintActions) {
PrintActions(*C);
return C;
}
BuildJobs(*C);
return C;
}
// When clang crashes, produce diagnostic information including the fully
// preprocessed source file(s). Request that the developer attach the
// diagnostic information to a bug report.
void Driver::generateCompilationDiagnostics(Compilation &C,
const Command *FailingCommand) {
if (C.getArgs().hasArg(options::OPT_fno_crash_diagnostics))
return;
// Don't try to generate diagnostics for link or dsymutil jobs.
if (FailingCommand && (FailingCommand->getCreator().isLinkJob() ||
FailingCommand->getCreator().isDsymutilJob()))
return;
// Print the version of the compiler.
PrintVersion(C, llvm::errs());
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "PLEASE submit a bug report to " BUG_REPORT_URL " and include the "
"crash backtrace, preprocessed source, and associated run script.";
// Suppress driver output and emit preprocessor output to temp file.
Mode = CPPMode;
CCGenDiagnostics = true;
C.getArgs().AddFlagArg(0, Opts->getOption(options::OPT_frewrite_includes));
// Save the original job command(s).
std::string Cmd;
llvm::raw_string_ostream OS(Cmd);
if (FailingCommand)
FailingCommand->Print(OS, "\n", /*Quote*/ false, /*CrashReport*/ true);
else
// Crash triggered by FORCE_CLANG_DIAGNOSTICS_CRASH, which doesn't have an
// associated FailingCommand, so just pass all jobs.
C.getJobs().Print(OS, "\n", /*Quote*/ false, /*CrashReport*/ true);
OS.flush();
// Keep track of whether we produce any errors while trying to produce
// preprocessed sources.
DiagnosticErrorTrap Trap(Diags);
// Suppress tool output.
C.initCompilationForDiagnostics();
// Construct the list of inputs.
InputList Inputs;
BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs);
for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) {
bool IgnoreInput = false;
// Ignore input from stdin or any inputs that cannot be preprocessed.
if (!strcmp(it->second->getValue(), "-")) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - ignoring input from stdin"
".";
IgnoreInput = true;
} else if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
IgnoreInput = true;
}
if (IgnoreInput) {
it = Inputs.erase(it);
ie = Inputs.end();
} else {
++it;
}
}
if (Inputs.empty()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - no preprocessable inputs.";
return;
}
// Don't attempt to generate preprocessed files if multiple -arch options are
// used, unless they're all duplicates.
llvm::StringSet<> ArchNames;
for (ArgList::const_iterator it = C.getArgs().begin(), ie = C.getArgs().end();
it != ie; ++it) {
Arg *A = *it;
if (A->getOption().matches(options::OPT_arch)) {
StringRef ArchName = A->getValue();
ArchNames.insert(ArchName);
}
}
if (ArchNames.size() > 1) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - cannot generate "
"preprocessed source with multiple -arch options.";
return;
}
// Construct the list of abstract actions to perform for this compilation. On
// Darwin OSes this uses the driver-driver and builds universal actions.
const ToolChain &TC = C.getDefaultToolChain();
if (TC.getTriple().isOSDarwin())
BuildUniversalActions(TC, C.getArgs(), Inputs, C.getActions());
else
BuildActions(TC, C.getArgs(), Inputs, C.getActions());
BuildJobs(C);
// If there were errors building the compilation, quit now.
if (Trap.hasErrorOccurred()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
return;
}
// Generate preprocessed output.
SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
C.ExecuteJob(C.getJobs(), FailingCommands);
// If the command succeeded, we are done.
if (FailingCommands.empty()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "\n********************\n\n"
"PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n"
"Preprocessed source(s) and associated run script(s) are located at:";
ArgStringList Files = C.getTempFiles();
for (ArgStringList::const_iterator it = Files.begin(), ie = Files.end();
it != ie; ++it) {
Diag(clang::diag::note_drv_command_failed_diag_msg) << *it;
std::string Err;
std::string Script = StringRef(*it).rsplit('.').first;
Script += ".sh";
llvm::raw_fd_ostream ScriptOS(
Script.c_str(), Err, llvm::sys::fs::F_Excl | llvm::sys::fs::F_Binary);
if (!Err.empty()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating run script: " + Script + " " + Err;
} else {
// Append the new filename with correct preprocessed suffix.
size_t I, E;
I = Cmd.find("-main-file-name ");
assert (I != std::string::npos && "Expected to find -main-file-name");
I += 16;
E = Cmd.find(" ", I);
assert (E != std::string::npos && "-main-file-name missing argument?");
StringRef OldFilename = StringRef(Cmd).slice(I, E);
StringRef NewFilename = llvm::sys::path::filename(*it);
I = StringRef(Cmd).rfind(OldFilename);
E = I + OldFilename.size();
I = Cmd.rfind(" ", I) + 1;
Cmd.replace(I, E - I, NewFilename.data(), NewFilename.size());
ScriptOS << Cmd;
Diag(clang::diag::note_drv_command_failed_diag_msg) << Script;
}
}
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "\n\n********************";
} else {
// Failure, remove preprocessed files.
if (!C.getArgs().hasArg(options::OPT_save_temps))
C.CleanupFileList(C.getTempFiles(), true);
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
}
}
int Driver::ExecuteCompilation(const Compilation &C,
SmallVectorImpl< std::pair<int, const Command *> > &FailingCommands) const {
// Just print if -### was present.
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
C.getJobs().Print(llvm::errs(), "\n", true);
return 0;
}
// If there were errors building the compilation, quit now.
if (Diags.hasErrorOccurred())
return 1;
C.ExecuteJob(C.getJobs(), FailingCommands);
// Remove temp files.
C.CleanupFileList(C.getTempFiles());
// If the command succeeded, we are done.
if (FailingCommands.empty())
return 0;
// Otherwise, remove result files and print extra information about abnormal
// failures.
for (SmallVectorImpl< std::pair<int, const Command *> >::iterator it =
FailingCommands.begin(), ie = FailingCommands.end(); it != ie; ++it) {
int Res = it->first;
const Command *FailingCommand = it->second;
// Remove result files if we're not saving temps.
if (!C.getArgs().hasArg(options::OPT_save_temps)) {
const JobAction *JA = cast<JobAction>(&FailingCommand->getSource());
C.CleanupFileMap(C.getResultFiles(), JA, true);
// Failure result files are valid unless we crashed.
if (Res < 0)
C.CleanupFileMap(C.getFailureResultFiles(), JA, true);
}
// Print extra information about abnormal failures, if possible.
//
// This is ad-hoc, but we don't want to be excessively noisy. If the result
// status was 1, assume the command failed normally. In particular, if it
// was the compiler then assume it gave a reasonable error code. Failures
// in other tools are less common, and they generally have worse
// diagnostics, so always print the diagnostic there.
const Tool &FailingTool = FailingCommand->getCreator();
if (!FailingCommand->getCreator().hasGoodDiagnostics() || Res != 1) {
// FIXME: See FIXME above regarding result code interpretation.
if (Res < 0)
Diag(clang::diag::err_drv_command_signalled)
<< FailingTool.getShortName();
else
Diag(clang::diag::err_drv_command_failed)
<< FailingTool.getShortName() << Res;
}
}
return 0;
}
void Driver::PrintHelp(bool ShowHidden) const {
unsigned IncludedFlagsBitmask;
unsigned ExcludedFlagsBitmask;
llvm::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
getIncludeExcludeOptionFlagMasks();
ExcludedFlagsBitmask |= options::NoDriverOption;
if (!ShowHidden)
ExcludedFlagsBitmask |= HelpHidden;
getOpts().PrintHelp(llvm::outs(), Name.c_str(), DriverTitle.c_str(),
IncludedFlagsBitmask, ExcludedFlagsBitmask);
}
void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
// FIXME: The following handlers should use a callback mechanism, we don't
// know what the client would like to do.
OS << getClangFullVersion() << '\n';
const ToolChain &TC = C.getDefaultToolChain();
OS << "Target: " << TC.getTripleString() << '\n';
// Print the threading model.
//
// FIXME: Implement correctly.
OS << "Thread model: " << "posix" << '\n';
}
/// PrintDiagnosticCategories - Implement the --print-diagnostic-categories
/// option.
static void PrintDiagnosticCategories(raw_ostream &OS) {
// Skip the empty category.
for (unsigned i = 1, max = DiagnosticIDs::getNumberOfCategories();
i != max; ++i)
OS << i << ',' << DiagnosticIDs::getCategoryNameFromID(i) << '\n';
}
bool Driver::HandleImmediateArgs(const Compilation &C) {
// The order these options are handled in gcc is all over the place, but we
// don't expect inconsistencies w.r.t. that to matter in practice.
if (C.getArgs().hasArg(options::OPT_dumpmachine)) {
llvm::outs() << C.getDefaultToolChain().getTripleString() << '\n';
return false;
}
if (C.getArgs().hasArg(options::OPT_dumpversion)) {
// Since -dumpversion is only implemented for pedantic GCC compatibility, we
// return an answer which matches our definition of __VERSION__.
//
// If we want to return a more correct answer some day, then we should
// introduce a non-pedantically GCC compatible mode to Clang in which we
// provide sensible definitions for -dumpversion, __VERSION__, etc.
llvm::outs() << "4.2.1\n";
return false;
}
if (C.getArgs().hasArg(options::OPT__print_diagnostic_categories)) {
PrintDiagnosticCategories(llvm::outs());
return false;
}
if (C.getArgs().hasArg(options::OPT_help) ||
C.getArgs().hasArg(options::OPT__help_hidden)) {
PrintHelp(C.getArgs().hasArg(options::OPT__help_hidden));
return false;
}
if (C.getArgs().hasArg(options::OPT__version)) {
// Follow gcc behavior and use stdout for --version and stderr for -v.
PrintVersion(C, llvm::outs());
return false;
}
if (C.getArgs().hasArg(options::OPT_v) ||
C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
PrintVersion(C, llvm::errs());
SuppressMissingInputWarning = true;
}
const ToolChain &TC = C.getDefaultToolChain();
if (C.getArgs().hasArg(options::OPT_v))
TC.printVerboseInfo(llvm::errs());
if (C.getArgs().hasArg(options::OPT_print_search_dirs)) {
llvm::outs() << "programs: =";
for (ToolChain::path_list::const_iterator it = TC.getProgramPaths().begin(),
ie = TC.getProgramPaths().end(); it != ie; ++it) {
if (it != TC.getProgramPaths().begin())
llvm::outs() << ':';
llvm::outs() << *it;
}
llvm::outs() << "\n";
llvm::outs() << "libraries: =" << ResourceDir;
StringRef sysroot = C.getSysRoot();
for (ToolChain::path_list::const_iterator it = TC.getFilePaths().begin(),
ie = TC.getFilePaths().end(); it != ie; ++it) {
llvm::outs() << ':';
const char *path = it->c_str();
if (path[0] == '=')
llvm::outs() << sysroot << path + 1;
else
llvm::outs() << path;
}
llvm::outs() << "\n";
return false;
}
// FIXME: The following handlers should use a callback mechanism, we don't
// know what the client would like to do.
if (Arg *A = C.getArgs().getLastArg(options::OPT_print_file_name_EQ)) {
llvm::outs() << GetFilePath(A->getValue(), TC) << "\n";
return false;
}
if (Arg *A = C.getArgs().getLastArg(options::OPT_print_prog_name_EQ)) {
llvm::outs() << GetProgramPath(A->getValue(), TC) << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
llvm::outs() << GetFilePath("libgcc.a", TC) << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_multi_lib)) {
// FIXME: We need tool chain support for this.
llvm::outs() << ".;\n";
switch (C.getDefaultToolChain().getTriple().getArch()) {
default:
break;
case llvm::Triple::x86_64:
llvm::outs() << "x86_64;@m64" << "\n";
break;
case llvm::Triple::ppc64:
llvm::outs() << "ppc64;@m64" << "\n";
break;
case llvm::Triple::ppc64le:
llvm::outs() << "ppc64le;@m64" << "\n";
break;
}
return false;
}
// FIXME: What is the difference between print-multi-directory and
// print-multi-os-directory?
if (C.getArgs().hasArg(options::OPT_print_multi_directory) ||
C.getArgs().hasArg(options::OPT_print_multi_os_directory)) {
switch (C.getDefaultToolChain().getTriple().getArch()) {
default:
case llvm::Triple::x86:
case llvm::Triple::ppc:
llvm::outs() << "." << "\n";
break;
case llvm::Triple::x86_64:
llvm::outs() << "." << "\n";
break;
case llvm::Triple::ppc64:
llvm::outs() << "ppc64" << "\n";
break;
case llvm::Triple::ppc64le:
llvm::outs() << "ppc64le" << "\n";
break;
}
return false;
}
return true;
}
static unsigned PrintActions1(const Compilation &C, Action *A,
std::map<Action*, unsigned> &Ids) {
if (Ids.count(A))
return Ids[A];
std::string str;
llvm::raw_string_ostream os(str);
os << Action::getClassName(A->getKind()) << ", ";
if (InputAction *IA = dyn_cast<InputAction>(A)) {
os << "\"" << IA->getInputArg().getValue() << "\"";
} else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
os << '"' << BIA->getArchName() << '"'
<< ", {" << PrintActions1(C, *BIA->begin(), Ids) << "}";
} else {
os << "{";
for (Action::iterator it = A->begin(), ie = A->end(); it != ie;) {
os << PrintActions1(C, *it, Ids);
++it;
if (it != ie)
os << ", ";
}
os << "}";
}
unsigned Id = Ids.size();
Ids[A] = Id;
llvm::errs() << Id << ": " << os.str() << ", "
<< types::getTypeName(A->getType()) << "\n";
return Id;
}
void Driver::PrintActions(const Compilation &C) const {
std::map<Action*, unsigned> Ids;
for (ActionList::const_iterator it = C.getActions().begin(),
ie = C.getActions().end(); it != ie; ++it)
PrintActions1(C, *it, Ids);
}
/// \brief Check whether the given input tree contains any compilation or
/// assembly actions.
static bool ContainsCompileOrAssembleAction(const Action *A) {
if (isa<CompileJobAction>(A) || isa<AssembleJobAction>(A))
return true;
for (Action::const_iterator it = A->begin(), ie = A->end(); it != ie; ++it)
if (ContainsCompileOrAssembleAction(*it))
return true;
return false;
}
void Driver::BuildUniversalActions(const ToolChain &TC,
DerivedArgList &Args,
const InputList &BAInputs,
ActionList &Actions) const {
llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
// Collect the list of architectures. Duplicates are allowed, but should only
// be handled once (in the order seen).
llvm::StringSet<> ArchNames;
SmallVector<const char *, 4> Archs;
for (ArgList::const_iterator it = Args.begin(), ie = Args.end();
it != ie; ++it) {
Arg *A = *it;
if (A->getOption().matches(options::OPT_arch)) {
// Validate the option here; we don't save the type here because its
// particular spelling may participate in other driver choices.
llvm::Triple::ArchType Arch =
tools::darwin::getArchTypeForDarwinArchName(A->getValue());
if (Arch == llvm::Triple::UnknownArch) {
Diag(clang::diag::err_drv_invalid_arch_name)
<< A->getAsString(Args);
continue;
}
A->claim();
if (ArchNames.insert(A->getValue()))
Archs.push_back(A->getValue());
}
}
// When there is no explicit arch for this platform, make sure we still bind
// the architecture (to the default) so that -Xarch_ is handled correctly.
if (!Archs.size())
Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
ActionList SingleActions;
BuildActions(TC, Args, BAInputs, SingleActions);
// Add in arch bindings for every top level action, as well as lipo and
// dsymutil steps if needed.
for (unsigned i = 0, e = SingleActions.size(); i != e; ++i) {
Action *Act = SingleActions[i];
// Make sure we can lipo this kind of output. If not (and it is an actual
// output) then we disallow, since we can't create an output file with the
// right name without overwriting it. We could remove this oddity by just
// changing the output names to include the arch, which would also fix
// -save-temps. Compatibility wins for now.
if (Archs.size() > 1 && !types::canLipoType(Act->getType()))
Diag(clang::diag::err_drv_invalid_output_with_multiple_archs)
<< types::getTypeName(Act->getType());
ActionList Inputs;
for (unsigned i = 0, e = Archs.size(); i != e; ++i) {
Inputs.push_back(new BindArchAction(Act, Archs[i]));
if (i != 0)
Inputs.back()->setOwnsInputs(false);
}
// Lipo if necessary, we do it this way because we need to set the arch flag
// so that -Xarch_ gets overwritten.
if (Inputs.size() == 1 || Act->getType() == types::TY_Nothing)
Actions.append(Inputs.begin(), Inputs.end());
else
Actions.push_back(new LipoJobAction(Inputs, Act->getType()));
// Handle debug info queries.
Arg *A = Args.getLastArg(options::OPT_g_Group);
if (A && !A->getOption().matches(options::OPT_g0) &&
!A->getOption().matches(options::OPT_gstabs) &&
ContainsCompileOrAssembleAction(Actions.back())) {
// Add a 'dsymutil' step if necessary, when debug info is enabled and we
// have a compile input. We need to run 'dsymutil' ourselves in such cases
// because the debug info will refer to a temporary object file which
// will be removed at the end of the compilation process.
if (Act->getType() == types::TY_Image) {
ActionList Inputs;
Inputs.push_back(Actions.back());
Actions.pop_back();
Actions.push_back(new DsymutilJobAction(Inputs, types::TY_dSYM));
}
// Verify the output (debug information only) if we passed '-verify'.
if (Args.hasArg(options::OPT_verify)) {
ActionList VerifyInputs;
VerifyInputs.push_back(Actions.back());
Actions.pop_back();
Actions.push_back(new VerifyJobAction(VerifyInputs,
types::TY_Nothing));
}
}
}
}
/// \brief Check that the file referenced by Value exists. If it doesn't,
/// issue a diagnostic and return false.
static bool DiagnoseInputExistance(const Driver &D, const DerivedArgList &Args,
StringRef Value) {
if (!D.getCheckInputsExist())
return true;
// stdin always exists.
if (Value == "-")
return true;
SmallString<64> Path(Value);
if (Arg *WorkDir = Args.getLastArg(options::OPT_working_directory)) {
if (!llvm::sys::path::is_absolute(Path.str())) {
SmallString<64> Directory(WorkDir->getValue());
llvm::sys::path::append(Directory, Value);
Path.assign(Directory);
}
}
if (llvm::sys::fs::exists(Twine(Path)))
return true;
D.Diag(clang::diag::err_drv_no_such_file) << Path.str();
return false;
}
// Construct a the list of inputs and their types.
void Driver::BuildInputs(const ToolChain &TC, const DerivedArgList &Args,
InputList &Inputs) const {
// Track the current user specified (-x) input. We also explicitly track the
// argument used to set the type; we only want to claim the type when we
// actually use it, so we warn about unused -x arguments.
types::ID InputType = types::TY_Nothing;
Arg *InputTypeArg = 0;
// The last /TC or /TP option sets the input type to C or C++ globally.
if (Arg *TCTP = Args.getLastArg(options::OPT__SLASH_TC,
options::OPT__SLASH_TP)) {
InputTypeArg = TCTP;
InputType = TCTP->getOption().matches(options::OPT__SLASH_TC)
? types::TY_C : types::TY_CXX;
arg_iterator it = Args.filtered_begin(options::OPT__SLASH_TC,
options::OPT__SLASH_TP);
const arg_iterator ie = Args.filtered_end();
Arg *Previous = *it++;
bool ShowNote = false;
while (it != ie) {
Diag(clang::diag::warn_drv_overriding_flag_option)
<< Previous->getSpelling() << (*it)->getSpelling();
Previous = *it++;
ShowNote = true;
}
if (ShowNote)
Diag(clang::diag::note_drv_t_option_is_global);
// No driver mode exposes -x and /TC or /TP; we don't support mixing them.
assert(!Args.hasArg(options::OPT_x) && "-x and /TC or /TP is not allowed");
}
for (ArgList::const_iterator it = Args.begin(), ie = Args.end();
it != ie; ++it) {
Arg *A = *it;
if (A->getOption().getKind() == Option::InputClass) {
const char *Value = A->getValue();
types::ID Ty = types::TY_INVALID;
// Infer the input type if necessary.
if (InputType == types::TY_Nothing) {
// If there was an explicit arg for this, claim it.
if (InputTypeArg)
InputTypeArg->claim();
// stdin must be handled specially.
if (memcmp(Value, "-", 2) == 0) {
// If running with -E, treat as a C input (this changes the builtin
// macros, for example). This may be overridden by -ObjC below.
//
// Otherwise emit an error but still use a valid type to avoid
// spurious errors (e.g., no inputs).
if (!Args.hasArgNoClaim(options::OPT_E) && !CCCIsCPP())
Diag(clang::diag::err_drv_unknown_stdin_type);
Ty = types::TY_C;
} else {
// Otherwise lookup by extension.
// Fallback is C if invoked as C preprocessor or Object otherwise.
// We use a host hook here because Darwin at least has its own
// idea of what .s is.
if (const char *Ext = strrchr(Value, '.'))
Ty = TC.LookupTypeForExtension(Ext + 1);
if (Ty == types::TY_INVALID) {
if (CCCIsCPP())
Ty = types::TY_C;
else
Ty = types::TY_Object;
}
// If the driver is invoked as C++ compiler (like clang++ or c++) it
// should autodetect some input files as C++ for g++ compatibility.
if (CCCIsCXX()) {
types::ID OldTy = Ty;
Ty = types::lookupCXXTypeForCType(Ty);
if (Ty != OldTy)
Diag(clang::diag::warn_drv_treating_input_as_cxx)
<< getTypeName(OldTy) << getTypeName(Ty);
}
}
// -ObjC and -ObjC++ override the default language, but only for "source
// files". We just treat everything that isn't a linker input as a
// source file.
//
// FIXME: Clean this up if we move the phase sequence into the type.
if (Ty != types::TY_Object) {
if (Args.hasArg(options::OPT_ObjC))
Ty = types::TY_ObjC;
else if (Args.hasArg(options::OPT_ObjCXX))
Ty = types::TY_ObjCXX;
}
} else {
assert(InputTypeArg && "InputType set w/o InputTypeArg");
InputTypeArg->claim();
Ty = InputType;
}
if (DiagnoseInputExistance(*this, Args, Value))
Inputs.push_back(std::make_pair(Ty, A));
} else if (A->getOption().matches(options::OPT__SLASH_Tc)) {
StringRef Value = A->getValue();
if (DiagnoseInputExistance(*this, Args, Value)) {
Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
Inputs.push_back(std::make_pair(types::TY_C, InputArg));
}
A->claim();
} else if (A->getOption().matches(options::OPT__SLASH_Tp)) {
StringRef Value = A->getValue();
if (DiagnoseInputExistance(*this, Args, Value)) {
Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
Inputs.push_back(std::make_pair(types::TY_CXX, InputArg));
}
A->claim();
} else if (A->getOption().hasFlag(options::LinkerInput)) {
// Just treat as object type, we could make a special type for this if
// necessary.
Inputs.push_back(std::make_pair(types::TY_Object, A));
} else if (A->getOption().matches(options::OPT_x)) {
InputTypeArg = A;
InputType = types::lookupTypeForTypeSpecifier(A->getValue());
A->claim();
// Follow gcc behavior and treat as linker input for invalid -x
// options. Its not clear why we shouldn't just revert to unknown; but
// this isn't very important, we might as well be bug compatible.
if (!InputType) {
Diag(clang::diag::err_drv_unknown_language) << A->getValue();
InputType = types::TY_Object;
}
}
}
if (CCCIsCPP() && Inputs.empty()) {
// If called as standalone preprocessor, stdin is processed
// if no other input is present.
Arg *A = MakeInputArg(Args, Opts, "-");
Inputs.push_back(std::make_pair(types::TY_C, A));
}
}
void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
const InputList &Inputs, ActionList &Actions) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
if (!SuppressMissingInputWarning && Inputs.empty()) {
Diag(clang::diag::err_drv_no_input_files);
return;
}
Arg *FinalPhaseArg;
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
if (FinalPhase == phases::Link && Args.hasArg(options::OPT_emit_llvm)) {
Diag(clang::diag::err_drv_emit_llvm_link);
}
// Reject -Z* at the top level, these options should never have been exposed
// by gcc.
if (Arg *A = Args.getLastArg(options::OPT_Z_Joined))
Diag(clang::diag::err_drv_use_of_Z_option) << A->getAsString(Args);
// Diagnose misuse of /Fo.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fo)) {
StringRef V = A->getValue();
if (V.empty()) {
// It has to have a value.
Diag(clang::diag::err_drv_missing_argument) << A->getSpelling() << 1;
Args.eraseArg(options::OPT__SLASH_Fo);
} else if (Inputs.size() > 1 && !llvm::sys::path::is_separator(V.back())) {
// Check whether /Fo tries to name an output file for multiple inputs.
Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
<< A->getSpelling() << V;
Args.eraseArg(options::OPT__SLASH_Fo);
}
}
// Diagnose misuse of /Fa.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fa)) {
StringRef V = A->getValue();
if (Inputs.size() > 1 && !llvm::sys::path::is_separator(V.back())) {
// Check whether /Fa tries to name an asm file for multiple inputs.
Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
<< A->getSpelling() << V;
Args.eraseArg(options::OPT__SLASH_Fa);
}
}
// Diagnose misuse of /Fe.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fe)) {
if (A->getValue()[0] == '\0') {
// It has to have a value.
Diag(clang::diag::err_drv_missing_argument) << A->getSpelling() << 1;
Args.eraseArg(options::OPT__SLASH_Fe);
}
}
// Construct the actions to perform.
ActionList LinkerInputs;
ActionList SplitInputs;
llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PL;
for (unsigned i = 0, e = Inputs.size(); i != e; ++i) {
types::ID InputType = Inputs[i].first;
const Arg *InputArg = Inputs[i].second;
PL.clear();
types::getCompilationPhases(InputType, PL);
// If the first step comes after the final phase we are doing as part of
// this compilation, warn the user about it.
phases::ID InitialPhase = PL[0];
if (InitialPhase > FinalPhase) {
// Claim here to avoid the more general unused warning.
InputArg->claim();
// Suppress all unused style warnings with -Qunused-arguments
if (Args.hasArg(options::OPT_Qunused_arguments))
continue;
// Special case when final phase determined by binary name, rather than
// by a command-line argument with a corresponding Arg.
if (CCCIsCPP())
Diag(clang::diag::warn_drv_input_file_unused_by_cpp)
<< InputArg->getAsString(Args)
<< getPhaseName(InitialPhase);
// Special case '-E' warning on a previously preprocessed file to make
// more sense.
else if (InitialPhase == phases::Compile &&
FinalPhase == phases::Preprocess &&
getPreprocessedType(InputType) == types::TY_INVALID)
Diag(clang::diag::warn_drv_preprocessed_input_file_unused)
<< InputArg->getAsString(Args)
<< !!FinalPhaseArg
<< FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "";
else
Diag(clang::diag::warn_drv_input_file_unused)
<< InputArg->getAsString(Args)
<< getPhaseName(InitialPhase)
<< !!FinalPhaseArg
<< FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "";
continue;
}
// Build the pipeline for this file.
OwningPtr<Action> Current(new InputAction(*InputArg, InputType));
for (SmallVectorImpl<phases::ID>::iterator
i = PL.begin(), e = PL.end(); i != e; ++i) {
phases::ID Phase = *i;
// We are done if this step is past what the user requested.
if (Phase > FinalPhase)
break;
// Queue linker inputs.
if (Phase == phases::Link) {
assert((i + 1) == e && "linking must be final compilation step.");
LinkerInputs.push_back(Current.take());
break;
}
// Some types skip the assembler phase (e.g., llvm-bc), but we can't
// encode this in the steps because the intermediate type depends on
// arguments. Just special case here.
if (Phase == phases::Assemble && Current->getType() != types::TY_PP_Asm)
continue;
// Otherwise construct the appropriate action.
Current.reset(ConstructPhaseAction(Args, Phase, Current.take()));
if (Current->getType() == types::TY_Nothing)
break;
}
// If we ended with something, add to the output list.
if (Current)
Actions.push_back(Current.take());
}
// Add a link action if necessary.
if (!LinkerInputs.empty())
Actions.push_back(new LinkJobAction(LinkerInputs, types::TY_Image));
// If we are linking, claim any options which are obviously only used for
// compilation.
if (FinalPhase == phases::Link && PL.size() == 1) {
Args.ClaimAllArgs(options::OPT_CompileOnly_Group);
Args.ClaimAllArgs(options::OPT_cl_compile_Group);
}
// Claim ignored clang-cl options.
Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
}
Action *Driver::ConstructPhaseAction(const ArgList &Args, phases::ID Phase,
Action *Input) const {
llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
// Build the appropriate action.
switch (Phase) {
case phases::Link: llvm_unreachable("link action invalid here.");
case phases::Preprocess: {
types::ID OutputTy;
// -{M, MM} alter the output type.
if (Args.hasArg(options::OPT_M, options::OPT_MM)) {
OutputTy = types::TY_Dependencies;
} else {
OutputTy = Input->getType();
if (!Args.hasFlag(options::OPT_frewrite_includes,
options::OPT_fno_rewrite_includes, false))
OutputTy = types::getPreprocessedType(OutputTy);
assert(OutputTy != types::TY_INVALID &&
"Cannot preprocess this input type!");
}
return new PreprocessJobAction(Input, OutputTy);
}
case phases::Precompile: {
types::ID OutputTy = types::TY_PCH;
if (Args.hasArg(options::OPT_fsyntax_only)) {
// Syntax checks should not emit a PCH file
OutputTy = types::TY_Nothing;
}
return new PrecompileJobAction(Input, OutputTy);
}
case phases::Compile: {
if (Args.hasArg(options::OPT_fsyntax_only)) {
return new CompileJobAction(Input, types::TY_Nothing);
} else if (Args.hasArg(options::OPT_rewrite_objc)) {
return new CompileJobAction(Input, types::TY_RewrittenObjC);
} else if (Args.hasArg(options::OPT_rewrite_legacy_objc)) {
return new CompileJobAction(Input, types::TY_RewrittenLegacyObjC);
} else if (Args.hasArg(options::OPT__analyze, options::OPT__analyze_auto)) {
return new AnalyzeJobAction(Input, types::TY_Plist);
} else if (Args.hasArg(options::OPT__migrate)) {
return new MigrateJobAction(Input, types::TY_Remap);
} else if (Args.hasArg(options::OPT_emit_ast)) {
return new CompileJobAction(Input, types::TY_AST);
} else if (Args.hasArg(options::OPT_module_file_info)) {
return new CompileJobAction(Input, types::TY_ModuleFile);
} else if (IsUsingLTO(Args)) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
return new CompileJobAction(Input, Output);
} else if (Args.hasArg(options::OPT_emit_llvm)) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
return new CompileJobAction(Input, Output);
} else {
return new CompileJobAction(Input, types::TY_PP_Asm);
}
}
case phases::Assemble:
return new AssembleJobAction(Input, types::TY_Object);
}
llvm_unreachable("invalid phase in ConstructPhaseAction");
}
bool Driver::IsUsingLTO(const ArgList &Args) const {
if (Args.hasFlag(options::OPT_flto, options::OPT_fno_lto, false))
return true;
return false;
}
void Driver::BuildJobs(Compilation &C) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
// It is an error to provide a -o option if we are making multiple output
// files.
if (FinalOutput) {
unsigned NumOutputs = 0;
for (ActionList::const_iterator it = C.getActions().begin(),
ie = C.getActions().end(); it != ie; ++it)
if ((*it)->getType() != types::TY_Nothing)
++NumOutputs;
if (NumOutputs > 1) {
Diag(clang::diag::err_drv_output_argument_with_multiple_files);
FinalOutput = 0;
}
}
// Collect the list of architectures.
llvm::StringSet<> ArchNames;
if (C.getDefaultToolChain().getTriple().isOSDarwin()) {
for (ArgList::const_iterator it = C.getArgs().begin(), ie = C.getArgs().end();
it != ie; ++it) {
Arg *A = *it;
if (A->getOption().matches(options::OPT_arch))
ArchNames.insert(A->getValue());
}
}
for (ActionList::const_iterator it = C.getActions().begin(),
ie = C.getActions().end(); it != ie; ++it) {
Action *A = *it;
// If we are linking an image for multiple archs then the linker wants
// -arch_multiple and -final_output <final image name>. Unfortunately, this
// doesn't fit in cleanly because we have to pass this information down.
//
// FIXME: This is a hack; find a cleaner way to integrate this into the
// process.
const char *LinkingOutput = 0;
if (isa<LipoJobAction>(A)) {
if (FinalOutput)
LinkingOutput = FinalOutput->getValue();
else
LinkingOutput = DefaultImageName.c_str();
}
InputInfo II;
BuildJobsForAction(C, A, &C.getDefaultToolChain(),
/*BoundArch*/0,
/*AtTopLevel*/ true,
/*MultipleArchs*/ ArchNames.size() > 1,
/*LinkingOutput*/ LinkingOutput,
II);
}
// If the user passed -Qunused-arguments or there were errors, don't warn
// about any unused arguments.
if (Diags.hasErrorOccurred() ||
C.getArgs().hasArg(options::OPT_Qunused_arguments))
return;
// Claim -### here.
(void) C.getArgs().hasArg(options::OPT__HASH_HASH_HASH);
// Claim --driver-mode, it was handled earlier.
(void) C.getArgs().hasArg(options::OPT_driver_mode);
for (ArgList::const_iterator it = C.getArgs().begin(), ie = C.getArgs().end();
it != ie; ++it) {
Arg *A = *it;
// FIXME: It would be nice to be able to send the argument to the
// DiagnosticsEngine, so that extra values, position, and so on could be
// printed.
if (!A->isClaimed()) {
if (A->getOption().hasFlag(options::NoArgumentUnused))
continue;
// Suppress the warning automatically if this is just a flag, and it is an
// instance of an argument we already claimed.
const Option &Opt = A->getOption();
if (Opt.getKind() == Option::FlagClass) {
bool DuplicateClaimed = false;
for (arg_iterator it = C.getArgs().filtered_begin(&Opt),
ie = C.getArgs().filtered_end(); it != ie; ++it) {
if ((*it)->isClaimed()) {
DuplicateClaimed = true;
break;
}
}
if (DuplicateClaimed)
continue;
}
Diag(clang::diag::warn_drv_unused_argument)
<< A->getAsString(C.getArgs());
}
}
}
static const Tool *SelectToolForJob(Compilation &C, const ToolChain *TC,
const JobAction *JA,
const ActionList *&Inputs) {
const Tool *ToolForJob = 0;
// See if we should look for a compiler with an integrated assembler. We match
// bottom up, so what we are actually looking for is an assembler job with a
// compiler input.
if (TC->useIntegratedAs() &&
!C.getArgs().hasArg(options::OPT_save_temps) &&
!C.getArgs().hasArg(options::OPT__SLASH_FA) &&
!C.getArgs().hasArg(options::OPT__SLASH_Fa) &&
isa<AssembleJobAction>(JA) &&
Inputs->size() == 1 && isa<CompileJobAction>(*Inputs->begin())) {
const Tool *Compiler =
TC->SelectTool(cast<JobAction>(**Inputs->begin()));
if (!Compiler)
return NULL;
if (Compiler->hasIntegratedAssembler()) {
Inputs = &(*Inputs)[0]->getInputs();
ToolForJob = Compiler;
}
}
// Otherwise use the tool for the current job.
if (!ToolForJob)
ToolForJob = TC->SelectTool(*JA);
// See if we should use an integrated preprocessor. We do so when we have
// exactly one input, since this is the only use case we care about
// (irrelevant since we don't support combine yet).
if (Inputs->size() == 1 && isa<PreprocessJobAction>(*Inputs->begin()) &&
!C.getArgs().hasArg(options::OPT_no_integrated_cpp) &&
!C.getArgs().hasArg(options::OPT_traditional_cpp) &&
!C.getArgs().hasArg(options::OPT_save_temps) &&
!C.getArgs().hasArg(options::OPT_rewrite_objc) &&
ToolForJob->hasIntegratedCPP())
Inputs = &(*Inputs)[0]->getInputs();
return ToolForJob;
}
void Driver::BuildJobsForAction(Compilation &C,
const Action *A,
const ToolChain *TC,
const char *BoundArch,
bool AtTopLevel,
bool MultipleArchs,
const char *LinkingOutput,
InputInfo &Result) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
if (const InputAction *IA = dyn_cast<InputAction>(A)) {
// FIXME: It would be nice to not claim this here; maybe the old scheme of
// just using Args was better?
const Arg &Input = IA->getInputArg();
Input.claim();
if (Input.getOption().matches(options::OPT_INPUT)) {
const char *Name = Input.getValue();
Result = InputInfo(Name, A->getType(), Name);
} else
Result = InputInfo(&Input, A->getType(), "");
return;
}
if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) {
const ToolChain *TC;
const char *ArchName = BAA->getArchName();
if (ArchName)
TC = &getToolChain(C.getArgs(), ArchName);
else
TC = &C.getDefaultToolChain();
BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(),
AtTopLevel, MultipleArchs, LinkingOutput, Result);
return;
}
const ActionList *Inputs = &A->getInputs();
const JobAction *JA = cast<JobAction>(A);
const Tool *T = SelectToolForJob(C, TC, JA, Inputs);
if (!T)
return;
// Only use pipes when there is exactly one input.
InputInfoList InputInfos;
for (ActionList::const_iterator it = Inputs->begin(), ie = Inputs->end();
it != ie; ++it) {
// Treat dsymutil and verify sub-jobs as being at the top-level too, they
// shouldn't get temporary output names.
// FIXME: Clean this up.
bool SubJobAtTopLevel = false;
if (AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A)))
SubJobAtTopLevel = true;
InputInfo II;
BuildJobsForAction(C, *it, TC, BoundArch, SubJobAtTopLevel, MultipleArchs,
LinkingOutput, II);
InputInfos.push_back(II);
}
// Always use the first input as the base input.
const char *BaseInput = InputInfos[0].getBaseInput();
// ... except dsymutil actions, which use their actual input as the base
// input.
if (JA->getType() == types::TY_dSYM)
BaseInput = InputInfos[0].getFilename();
// Determine the place to write output to, if any.
if (JA->getType() == types::TY_Nothing)
Result = InputInfo(A->getType(), BaseInput);
else
Result = InputInfo(GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
AtTopLevel, MultipleArchs),
A->getType(), BaseInput);
if (CCCPrintBindings && !CCGenDiagnostics) {
llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"'
<< " - \"" << T->getName() << "\", inputs: [";
for (unsigned i = 0, e = InputInfos.size(); i != e; ++i) {
llvm::errs() << InputInfos[i].getAsString();
if (i + 1 != e)
llvm::errs() << ", ";
}
llvm::errs() << "], output: " << Result.getAsString() << "\n";
} else {
T->ConstructJob(C, *JA, Result, InputInfos,
C.getArgsForToolChain(TC, BoundArch), LinkingOutput);
}
}
/// \brief Create output filename based on ArgValue, which could either be a
/// full filename, filename without extension, or a directory. If ArgValue
/// does not provide a filename, then use BaseName, and use the extension
/// suitable for FileType.
static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue,
StringRef BaseName, types::ID FileType) {
SmallString<128> Filename = ArgValue;
if (ArgValue.empty()) {
// If the argument is empty, output to BaseName in the current dir.
Filename = BaseName;
} else if (llvm::sys::path::is_separator(Filename.back())) {
// If the argument is a directory, output to BaseName in that dir.
llvm::sys::path::append(Filename, BaseName);
}
if (!llvm::sys::path::has_extension(ArgValue)) {
// If the argument didn't provide an extension, then set it.
const char *Extension = types::getTypeTempSuffix(FileType, true);
if (FileType == types::TY_Image &&
Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd)) {
// The output file is a dll.
Extension = "dll";
}
llvm::sys::path::replace_extension(Filename, Extension);
}
return Args.MakeArgString(Filename.c_str());
}
const char *Driver::GetNamedOutputPath(Compilation &C,
const JobAction &JA,
const char *BaseInput,
const char *BoundArch,
bool AtTopLevel,
bool MultipleArchs) const {
llvm::PrettyStackTraceString CrashInfo("Computing output path");
// Output to a user requested destination?
if (AtTopLevel && !isa<DsymutilJobAction>(JA) &&
!isa<VerifyJobAction>(JA)) {
if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
return C.addResultFile(FinalOutput->getValue(), &JA);
}
// Default to writing to stdout?
if (AtTopLevel && !CCGenDiagnostics &&
(isa<PreprocessJobAction>(JA) || JA.getType() == types::TY_ModuleFile))
return "-";
// Is this the assembly listing for /FA?
if (JA.getType() == types::TY_PP_Asm &&
(C.getArgs().hasArg(options::OPT__SLASH_FA) ||
C.getArgs().hasArg(options::OPT__SLASH_Fa))) {
// Use /Fa and the input filename to determine the asm file name.
StringRef BaseName = llvm::sys::path::filename(BaseInput);
StringRef FaValue = C.getArgs().getLastArgValue(options::OPT__SLASH_Fa);
return C.addResultFile(MakeCLOutputFilename(C.getArgs(), FaValue, BaseName,
JA.getType()), &JA);
}
// Output to a temporary file?
if ((!AtTopLevel && !C.getArgs().hasArg(options::OPT_save_temps) &&
!C.getArgs().hasArg(options::OPT__SLASH_Fo)) ||
CCGenDiagnostics) {
StringRef Name = llvm::sys::path::filename(BaseInput);
std::pair<StringRef, StringRef> Split = Name.split('.');
std::string TmpName =
GetTemporaryPath(Split.first,
types::getTypeTempSuffix(JA.getType(), IsCLMode()));
return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
}
SmallString<128> BasePath(BaseInput);
StringRef BaseName;
// Dsymutil actions should use the full path.
if (isa<DsymutilJobAction>(JA) || isa<VerifyJobAction>(JA))
BaseName = BasePath;
else
BaseName = llvm::sys::path::filename(BasePath);
// Determine what the derived output name should be.
const char *NamedOutput;
if (JA.getType() == types::TY_Object &&
C.getArgs().hasArg(options::OPT__SLASH_Fo)) {
// The /Fo flag decides the object filename.
StringRef Val = C.getArgs().getLastArg(options::OPT__SLASH_Fo)->getValue();
NamedOutput = MakeCLOutputFilename(C.getArgs(), Val, BaseName,
types::TY_Object);
} else if (JA.getType() == types::TY_Image &&
C.getArgs().hasArg(options::OPT__SLASH_Fe)) {
// The /Fe flag names the linked file.
StringRef Val = C.getArgs().getLastArg(options::OPT__SLASH_Fe)->getValue();
NamedOutput = MakeCLOutputFilename(C.getArgs(), Val, BaseName,
types::TY_Image);
} else if (JA.getType() == types::TY_Image) {
if (IsCLMode()) {
// clang-cl uses BaseName for the executable name.
NamedOutput = MakeCLOutputFilename(C.getArgs(), "", BaseName,
types::TY_Image);
} else if (MultipleArchs && BoundArch) {
SmallString<128> Output(DefaultImageName.c_str());
Output += "-";
Output.append(BoundArch);
NamedOutput = C.getArgs().MakeArgString(Output.c_str());
} else
NamedOutput = DefaultImageName.c_str();
} else {
const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());
assert(Suffix && "All types used for output should have a suffix.");
std::string::size_type End = std::string::npos;
if (!types::appendSuffixForType(JA.getType()))
End = BaseName.rfind('.');
SmallString<128> Suffixed(BaseName.substr(0, End));
if (MultipleArchs && BoundArch) {
Suffixed += "-";
Suffixed.append(BoundArch);
}
Suffixed += '.';
Suffixed += Suffix;
NamedOutput = C.getArgs().MakeArgString(Suffixed.c_str());
}
// If we're saving temps and the temp file conflicts with the input file,
// then avoid overwriting input file.
if (!AtTopLevel && C.getArgs().hasArg(options::OPT_save_temps) &&
NamedOutput == BaseName) {
bool SameFile = false;
SmallString<256> Result;
llvm::sys::fs::current_path(Result);
llvm::sys::path::append(Result, BaseName);
llvm::sys::fs::equivalent(BaseInput, Result.c_str(), SameFile);
// Must share the same path to conflict.
if (SameFile) {
StringRef Name = llvm::sys::path::filename(BaseInput);
std::pair<StringRef, StringRef> Split = Name.split('.');
std::string TmpName =
GetTemporaryPath(Split.first,
types::getTypeTempSuffix(JA.getType(), IsCLMode()));
return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
}
}
// As an annoying special case, PCH generation doesn't strip the pathname.
if (JA.getType() == types::TY_PCH) {
llvm::sys::path::remove_filename(BasePath);
if (BasePath.empty())
BasePath = NamedOutput;
else
llvm::sys::path::append(BasePath, NamedOutput);
return C.addResultFile(C.getArgs().MakeArgString(BasePath.c_str()), &JA);
} else {
return C.addResultFile(NamedOutput, &JA);
}
}
std::string Driver::GetFilePath(const char *Name, const ToolChain &TC) const {
// Respect a limited subset of the '-Bprefix' functionality in GCC by
// attempting to use this prefix when looking for file paths.
for (Driver::prefix_list::const_iterator it = PrefixDirs.begin(),
ie = PrefixDirs.end(); it != ie; ++it) {
std::string Dir(*it);
if (Dir.empty())
continue;
if (Dir[0] == '=')
Dir = SysRoot + Dir.substr(1);
SmallString<128> P(Dir);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return P.str();
}
SmallString<128> P(ResourceDir);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return P.str();
const ToolChain::path_list &List = TC.getFilePaths();
for (ToolChain::path_list::const_iterator
it = List.begin(), ie = List.end(); it != ie; ++it) {
std::string Dir(*it);
if (Dir.empty())
continue;
if (Dir[0] == '=')
Dir = SysRoot + Dir.substr(1);
SmallString<128> P(Dir);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return P.str();
}
return Name;
}
std::string Driver::GetProgramPath(const char *Name,
const ToolChain &TC) const {
// FIXME: Needs a better variable than DefaultTargetTriple
std::string TargetSpecificExecutable(DefaultTargetTriple + "-" + Name);
// Respect a limited subset of the '-Bprefix' functionality in GCC by
// attempting to use this prefix when looking for program paths.
for (Driver::prefix_list::const_iterator it = PrefixDirs.begin(),
ie = PrefixDirs.end(); it != ie; ++it) {
if (llvm::sys::fs::is_directory(*it)) {
SmallString<128> P(*it);
llvm::sys::path::append(P, TargetSpecificExecutable);
if (llvm::sys::fs::can_execute(Twine(P)))
return P.str();
llvm::sys::path::remove_filename(P);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::can_execute(Twine(P)))
return P.str();
} else {
SmallString<128> P(*it + Name);
if (llvm::sys::fs::can_execute(Twine(P)))
return P.str();
}
}
const ToolChain::path_list &List = TC.getProgramPaths();
for (ToolChain::path_list::const_iterator
it = List.begin(), ie = List.end(); it != ie; ++it) {
SmallString<128> P(*it);
llvm::sys::path::append(P, TargetSpecificExecutable);
if (llvm::sys::fs::can_execute(Twine(P)))
return P.str();
llvm::sys::path::remove_filename(P);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::can_execute(Twine(P)))
return P.str();
}
// If all else failed, search the path.
std::string P(llvm::sys::FindProgramByName(TargetSpecificExecutable));
if (!P.empty())
return P;
P = llvm::sys::FindProgramByName(Name);
if (!P.empty())
return P;
return Name;
}
std::string Driver::GetTemporaryPath(StringRef Prefix, const char *Suffix)
const {
SmallString<128> Path;
llvm::error_code EC =
llvm::sys::fs::createTemporaryFile(Prefix, Suffix, Path);
if (EC) {
Diag(clang::diag::err_unable_to_make_temp) << EC.message();
return "";
}
return Path.str();
}
/// \brief Compute target triple from args.
///
/// This routine provides the logic to compute a target triple from various
/// args passed to the driver and the default triple string.
static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple,
const ArgList &Args,
StringRef DarwinArchName) {
// FIXME: Already done in Compilation *Driver::BuildCompilation
if (const Arg *A = Args.getLastArg(options::OPT_target))
DefaultTargetTriple = A->getValue();
llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple));
// Handle Darwin-specific options available here.
if (Target.isOSDarwin()) {
// If an explict Darwin arch name is given, that trumps all.
if (!DarwinArchName.empty()) {
Target.setArch(
tools::darwin::getArchTypeForDarwinArchName(DarwinArchName));
return Target;
}
// Handle the Darwin '-arch' flag.
if (Arg *A = Args.getLastArg(options::OPT_arch)) {
llvm::Triple::ArchType DarwinArch
= tools::darwin::getArchTypeForDarwinArchName(A->getValue());
if (DarwinArch != llvm::Triple::UnknownArch)
Target.setArch(DarwinArch);
}
}
// Handle pseudo-target flags '-EL' and '-EB'.
if (Arg *A = Args.getLastArg(options::OPT_EL, options::OPT_EB)) {
if (A->getOption().matches(options::OPT_EL)) {
if (Target.getArch() == llvm::Triple::mips)
Target.setArch(llvm::Triple::mipsel);
else if (Target.getArch() == llvm::Triple::mips64)
Target.setArch(llvm::Triple::mips64el);
} else {
if (Target.getArch() == llvm::Triple::mipsel)
Target.setArch(llvm::Triple::mips);
else if (Target.getArch() == llvm::Triple::mips64el)
Target.setArch(llvm::Triple::mips64);
}
}
// Skip further flag support on OSes which don't support '-m32' or '-m64'.
if (Target.getArchName() == "tce" ||
Target.getOS() == llvm::Triple::AuroraUX ||
Target.getOS() == llvm::Triple::Minix)
return Target;
// Handle pseudo-target flags '-m32' and '-m64'.
// FIXME: Should this information be in llvm::Triple?
if (Arg *A = Args.getLastArg(options::OPT_m32, options::OPT_m64)) {
if (A->getOption().matches(options::OPT_m32)) {
if (Target.getArch() == llvm::Triple::x86_64)
Target.setArch(llvm::Triple::x86);
if (Target.getArch() == llvm::Triple::ppc64)
Target.setArch(llvm::Triple::ppc);
} else {
if (Target.getArch() == llvm::Triple::x86)
Target.setArch(llvm::Triple::x86_64);
if (Target.getArch() == llvm::Triple::ppc)
Target.setArch(llvm::Triple::ppc64);
}
}
return Target;
}
const ToolChain &Driver::getToolChain(const ArgList &Args,
StringRef DarwinArchName) const {
llvm::Triple Target = computeTargetTriple(DefaultTargetTriple, Args,
DarwinArchName);
ToolChain *&TC = ToolChains[Target.str()];
if (!TC) {
switch (Target.getOS()) {
case llvm::Triple::AuroraUX:
TC = new toolchains::AuroraUX(*this, Target, Args);
break;
case llvm::Triple::Darwin:
case llvm::Triple::MacOSX:
case llvm::Triple::IOS:
if (Target.getArch() == llvm::Triple::x86 ||
Target.getArch() == llvm::Triple::x86_64 ||
Target.getArch() == llvm::Triple::arm ||
Target.getArch() == llvm::Triple::thumb)
TC = new toolchains::DarwinClang(*this, Target, Args);
else
TC = new toolchains::Darwin_Generic_GCC(*this, Target, Args);
break;
case llvm::Triple::DragonFly:
TC = new toolchains::DragonFly(*this, Target, Args);
break;
case llvm::Triple::OpenBSD:
TC = new toolchains::OpenBSD(*this, Target, Args);
break;
case llvm::Triple::Bitrig:
TC = new toolchains::Bitrig(*this, Target, Args);
break;
case llvm::Triple::NetBSD:
TC = new toolchains::NetBSD(*this, Target, Args);
break;
case llvm::Triple::FreeBSD:
TC = new toolchains::FreeBSD(*this, Target, Args);
break;
case llvm::Triple::Minix:
TC = new toolchains::Minix(*this, Target, Args);
break;
case llvm::Triple::Linux:
if (Target.getArch() == llvm::Triple::hexagon)
TC = new toolchains::Hexagon_TC(*this, Target, Args);
else
TC = new toolchains::Linux(*this, Target, Args);
break;
case llvm::Triple::Solaris:
TC = new toolchains::Solaris(*this, Target, Args);
break;
case llvm::Triple::Win32:
TC = new toolchains::Windows(*this, Target, Args);
break;
case llvm::Triple::MinGW32:
// FIXME: We need a MinGW toolchain. Fallthrough for now.
default:
// TCE is an OSless target
if (Target.getArchName() == "tce") {
TC = new toolchains::TCEToolChain(*this, Target, Args);
break;
}
// If Hexagon is configured as an OSless target
if (Target.getArch() == llvm::Triple::hexagon) {
TC = new toolchains::Hexagon_TC(*this, Target, Args);
break;
}
if (Target.getArch() == llvm::Triple::xcore) {
TC = new toolchains::XCore(*this, Target, Args);
break;
}
TC = new toolchains::Generic_GCC(*this, Target, Args);
break;
}
}
return *TC;
}
bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
// Check if user requested no clang, or clang doesn't understand this type (we
// only handle single inputs for now).
if (JA.size() != 1 ||
!types::isAcceptedByClang((*JA.begin())->getType()))
return false;
// Otherwise make sure this is an action clang understands.
if (!isa<PreprocessJobAction>(JA) && !isa<PrecompileJobAction>(JA) &&
!isa<CompileJobAction>(JA))
return false;
return true;
}
/// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and return the
/// grouped values as integers. Numbers which are not provided are set to 0.
///
/// \return True if the entire string was parsed (9.2), or all groups were
/// parsed (10.3.5extrastuff).
bool Driver::GetReleaseVersion(const char *Str, unsigned &Major,
unsigned &Minor, unsigned &Micro,
bool &HadExtra) {
HadExtra = false;
Major = Minor = Micro = 0;
if (*Str == '\0')
return true;
char *End;
Major = (unsigned) strtol(Str, &End, 10);
if (*Str != '\0' && *End == '\0')
return true;
if (*End != '.')
return false;
Str = End+1;
Minor = (unsigned) strtol(Str, &End, 10);
if (*Str != '\0' && *End == '\0')
return true;
if (*End != '.')
return false;
Str = End+1;
Micro = (unsigned) strtol(Str, &End, 10);
if (*Str != '\0' && *End == '\0')
return true;
if (Str == End)
return false;
HadExtra = true;
return true;
}
std::pair<unsigned, unsigned> Driver::getIncludeExcludeOptionFlagMasks() const {
unsigned IncludedFlagsBitmask = 0;
unsigned ExcludedFlagsBitmask = options::NoDriverOption;
if (Mode == CLMode) {
// Include CL and Core options.
IncludedFlagsBitmask |= options::CLOption;
IncludedFlagsBitmask |= options::CoreOption;
} else {
ExcludedFlagsBitmask |= options::CLOption;
}
return std::make_pair(IncludedFlagsBitmask, ExcludedFlagsBitmask);
}
+
+bool clang::driver::isOptimizationLevelFast(const llvm::opt::ArgList &Args) {
+ return Args.hasFlag(options::OPT_Ofast, options::OPT_O_Group, false);
+}
Index: head/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp (revision 265925)
@@ -1,450 +1,453 @@
//===--- ToolChain.cpp - Collections of tools for one platform ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Tools.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Driver/Action.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "clang/Driver/ToolChain.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
using namespace clang::driver;
using namespace clang;
using namespace llvm::opt;
ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
const ArgList &A)
: D(D), Triple(T), Args(A) {
}
ToolChain::~ToolChain() {
}
const Driver &ToolChain::getDriver() const {
return D;
}
bool ToolChain::useIntegratedAs() const {
return Args.hasFlag(options::OPT_integrated_as,
options::OPT_no_integrated_as,
IsIntegratedAssemblerDefault());
}
const SanitizerArgs& ToolChain::getSanitizerArgs() const {
if (!SanitizerArguments.get())
SanitizerArguments.reset(new SanitizerArgs(*this, Args));
return *SanitizerArguments.get();
}
std::string ToolChain::getDefaultUniversalArchName() const {
// In universal driver terms, the arch name accepted by -arch isn't exactly
// the same as the ones that appear in the triple. Roughly speaking, this is
// an inverse of the darwin::getArchTypeForDarwinArchName() function, but the
// only interesting special case is powerpc.
switch (Triple.getArch()) {
case llvm::Triple::ppc:
return "ppc";
case llvm::Triple::ppc64:
return "ppc64";
case llvm::Triple::ppc64le:
return "ppc64le";
default:
return Triple.getArchName();
}
}
bool ToolChain::IsUnwindTablesDefault() const {
return false;
}
Tool *ToolChain::getClang() const {
if (!Clang)
Clang.reset(new tools::Clang(*this));
return Clang.get();
}
Tool *ToolChain::buildAssembler() const {
return new tools::ClangAs(*this);
}
Tool *ToolChain::buildLinker() const {
llvm_unreachable("Linking is not supported by this toolchain");
}
Tool *ToolChain::getAssemble() const {
if (!Assemble)
Assemble.reset(buildAssembler());
return Assemble.get();
}
Tool *ToolChain::getClangAs() const {
if (!Assemble)
Assemble.reset(new tools::ClangAs(*this));
return Assemble.get();
}
Tool *ToolChain::getLink() const {
if (!Link)
Link.reset(buildLinker());
return Link.get();
}
Tool *ToolChain::getTool(Action::ActionClass AC) const {
switch (AC) {
case Action::AssembleJobClass:
return getAssemble();
case Action::LinkJobClass:
return getLink();
case Action::InputClass:
case Action::BindArchClass:
case Action::LipoJobClass:
case Action::DsymutilJobClass:
case Action::VerifyJobClass:
llvm_unreachable("Invalid tool kind.");
case Action::CompileJobClass:
case Action::PrecompileJobClass:
case Action::PreprocessJobClass:
case Action::AnalyzeJobClass:
case Action::MigrateJobClass:
return getClang();
}
llvm_unreachable("Invalid tool kind.");
}
Tool *ToolChain::SelectTool(const JobAction &JA) const {
if (getDriver().ShouldUseClangCompiler(JA))
return getClang();
Action::ActionClass AC = JA.getKind();
if (AC == Action::AssembleJobClass && useIntegratedAs())
return getClangAs();
return getTool(AC);
}
std::string ToolChain::GetFilePath(const char *Name) const {
return D.GetFilePath(Name, *this);
}
std::string ToolChain::GetProgramPath(const char *Name) const {
return D.GetProgramPath(Name, *this);
}
types::ID ToolChain::LookupTypeForExtension(const char *Ext) const {
return types::lookupTypeForExtension(Ext);
}
bool ToolChain::HasNativeLLVMSupport() const {
return false;
}
ObjCRuntime ToolChain::getDefaultObjCRuntime(bool isNonFragile) const {
return ObjCRuntime(isNonFragile ? ObjCRuntime::GNUstep : ObjCRuntime::GCC,
VersionTuple());
}
/// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting.
//
// FIXME: tblgen this.
static const char *getARMTargetCPU(const ArgList &Args,
const llvm::Triple &Triple) {
// For Darwin targets, the -arch option (which is translated to a
// corresponding -march option) should determine the architecture
// (and the Mach-O slice) regardless of any -mcpu options.
if (!Triple.isOSDarwin()) {
// FIXME: Warn on inconsistent use of -mcpu and -march.
// If we have -mcpu=, use that.
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
return A->getValue();
}
StringRef MArch;
if (Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
// Otherwise, if we have -march= choose the base CPU for that arch.
MArch = A->getValue();
} else {
// Otherwise, use the Arch from the triple.
MArch = Triple.getArchName();
}
if (Triple.getOS() == llvm::Triple::NetBSD) {
if (MArch == "armv6")
return "arm1176jzf-s";
}
const char *result = llvm::StringSwitch<const char *>(MArch)
.Cases("armv2", "armv2a","arm2")
.Case("armv3", "arm6")
.Case("armv3m", "arm7m")
.Case("armv4", "strongarm")
.Case("armv4t", "arm7tdmi")
.Cases("armv5", "armv5t", "arm10tdmi")
.Cases("armv5e", "armv5te", "arm1026ejs")
.Case("armv5tej", "arm926ej-s")
.Cases("armv6", "armv6k", "arm1136jf-s")
.Case("armv6j", "arm1136j-s")
.Cases("armv6z", "armv6zk", "arm1176jzf-s")
.Case("armv6t2", "arm1156t2-s")
.Cases("armv6m", "armv6-m", "cortex-m0")
.Cases("armv7", "armv7a", "armv7-a", "cortex-a8")
.Cases("armv7l", "armv7-l", "cortex-a8")
.Cases("armv7f", "armv7-f", "cortex-a9-mp")
.Cases("armv7s", "armv7-s", "swift")
.Cases("armv7r", "armv7-r", "cortex-r4")
.Cases("armv7m", "armv7-m", "cortex-m3")
.Cases("armv7em", "armv7e-m", "cortex-m4")
.Cases("armv8", "armv8a", "armv8-a", "cortex-a53")
.Case("ep9312", "ep9312")
.Case("iwmmxt", "iwmmxt")
.Case("xscale", "xscale")
// If all else failed, return the most base CPU with thumb interworking
// supported by LLVM.
.Default(0);
if (result)
return result;
return
Triple.getEnvironment() == llvm::Triple::GNUEABIHF
? "arm1176jzf-s"
: "arm7tdmi";
}
/// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular
/// CPU.
//
// FIXME: This is redundant with -mcpu, why does LLVM use this.
// FIXME: tblgen this, or kill it!
static const char *getLLVMArchSuffixForARM(StringRef CPU) {
return llvm::StringSwitch<const char *>(CPU)
.Case("strongarm", "v4")
.Cases("arm7tdmi", "arm7tdmi-s", "arm710t", "v4t")
.Cases("arm720t", "arm9", "arm9tdmi", "v4t")
.Cases("arm920", "arm920t", "arm922t", "v4t")
.Cases("arm940t", "ep9312","v4t")
.Cases("arm10tdmi", "arm1020t", "v5")
.Cases("arm9e", "arm926ej-s", "arm946e-s", "v5e")
.Cases("arm966e-s", "arm968e-s", "arm10e", "v5e")
.Cases("arm1020e", "arm1022e", "xscale", "iwmmxt", "v5e")
.Cases("arm1136j-s", "arm1136jf-s", "arm1176jz-s", "v6")
.Cases("arm1176jzf-s", "mpcorenovfp", "mpcore", "v6")
.Cases("arm1156t2-s", "arm1156t2f-s", "v6t2")
.Cases("cortex-a5", "cortex-a7", "cortex-a8", "v7")
.Cases("cortex-a9", "cortex-a12", "cortex-a15", "v7")
.Cases("cortex-r4", "cortex-r5", "v7r")
.Case("cortex-m0", "v6m")
.Case("cortex-m3", "v7m")
.Case("cortex-m4", "v7em")
.Case("cortex-a9-mp", "v7f")
.Case("swift", "v7s")
.Cases("cortex-a53", "cortex-a57", "v8")
.Default("");
}
std::string ToolChain::ComputeLLVMTriple(const ArgList &Args,
types::ID InputType) const {
switch (getTriple().getArch()) {
default:
return getTripleString();
case llvm::Triple::x86_64: {
llvm::Triple Triple = getTriple();
if (!Triple.isOSDarwin())
return getTripleString();
if (Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
// x86_64h goes in the triple. Other -march options just use the
// vanilla triple we already have.
StringRef MArch = A->getValue();
if (MArch == "x86_64h")
Triple.setArchName(MArch);
}
return Triple.getTriple();
}
case llvm::Triple::arm:
case llvm::Triple::thumb: {
// FIXME: Factor into subclasses.
llvm::Triple Triple = getTriple();
// Thumb2 is the default for V7 on Darwin.
//
// FIXME: Thumb should just be another -target-feaure, not in the triple.
StringRef Suffix =
getLLVMArchSuffixForARM(getARMTargetCPU(Args, Triple));
bool ThumbDefault = Suffix.startswith("v6m") ||
(Suffix.startswith("v7") && getTriple().isOSDarwin());
std::string ArchName = "arm";
// Assembly files should start in ARM mode.
if (InputType != types::TY_PP_Asm &&
Args.hasFlag(options::OPT_mthumb, options::OPT_mno_thumb, ThumbDefault))
ArchName = "thumb";
Triple.setArchName(ArchName + Suffix.str());
return Triple.getTriple();
}
}
}
std::string ToolChain::ComputeEffectiveClangTriple(const ArgList &Args,
types::ID InputType) const {
// Diagnose use of Darwin OS deployment target arguments on non-Darwin.
if (Arg *A = Args.getLastArg(options::OPT_mmacosx_version_min_EQ,
options::OPT_miphoneos_version_min_EQ,
options::OPT_mios_simulator_version_min_EQ))
getDriver().Diag(diag::err_drv_clang_unsupported)
<< A->getAsString(Args);
return ComputeLLVMTriple(Args, InputType);
}
void ToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
// Each toolchain should provide the appropriate include flags.
}
void ToolChain::addClangTargetOptions(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
}
ToolChain::RuntimeLibType ToolChain::GetRuntimeLibType(
const ArgList &Args) const
{
if (Arg *A = Args.getLastArg(options::OPT_rtlib_EQ)) {
StringRef Value = A->getValue();
if (Value == "compiler-rt")
return ToolChain::RLT_CompilerRT;
if (Value == "libgcc")
return ToolChain::RLT_Libgcc;
getDriver().Diag(diag::err_drv_invalid_rtlib_name)
<< A->getAsString(Args);
}
return GetDefaultRuntimeLibType();
}
ToolChain::CXXStdlibType ToolChain::GetCXXStdlibType(const ArgList &Args) const{
if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
StringRef Value = A->getValue();
if (Value == "libc++")
return ToolChain::CST_Libcxx;
if (Value == "libstdc++")
return ToolChain::CST_Libstdcxx;
getDriver().Diag(diag::err_drv_invalid_stdlib_name)
<< A->getAsString(Args);
}
return ToolChain::CST_Libstdcxx;
}
/// \brief Utility function to add a system include directory to CC1 arguments.
/*static*/ void ToolChain::addSystemInclude(const ArgList &DriverArgs,
ArgStringList &CC1Args,
const Twine &Path) {
CC1Args.push_back("-internal-isystem");
CC1Args.push_back(DriverArgs.MakeArgString(Path));
}
/// \brief Utility function to add a system include directory with extern "C"
/// semantics to CC1 arguments.
///
/// Note that this should be used rarely, and only for directories that
/// historically and for legacy reasons are treated as having implicit extern
/// "C" semantics. These semantics are *ignored* by and large today, but its
/// important to preserve the preprocessor changes resulting from the
/// classification.
/*static*/ void ToolChain::addExternCSystemInclude(const ArgList &DriverArgs,
ArgStringList &CC1Args,
const Twine &Path) {
CC1Args.push_back("-internal-externc-isystem");
CC1Args.push_back(DriverArgs.MakeArgString(Path));
}
void ToolChain::addExternCSystemIncludeIfExists(const ArgList &DriverArgs,
ArgStringList &CC1Args,
const Twine &Path) {
if (llvm::sys::fs::exists(Path))
addExternCSystemInclude(DriverArgs, CC1Args, Path);
}
/// \brief Utility function to add a list of system include directories to CC1.
/*static*/ void ToolChain::addSystemIncludes(const ArgList &DriverArgs,
ArgStringList &CC1Args,
ArrayRef<StringRef> Paths) {
for (ArrayRef<StringRef>::iterator I = Paths.begin(), E = Paths.end();
I != E; ++I) {
CC1Args.push_back("-internal-isystem");
CC1Args.push_back(DriverArgs.MakeArgString(*I));
}
}
void ToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
// Header search paths should be handled by each of the subclasses.
// Historically, they have not been, and instead have been handled inside of
// the CC1-layer frontend. As the logic is hoisted out, this generic function
// will slowly stop being called.
//
// While it is being called, replicate a bit of a hack to propagate the
// '-stdlib=' flag down to CC1 so that it can in turn customize the C++
// header search paths with it. Once all systems are overriding this
// function, the CC1 flag and this line can be removed.
DriverArgs.AddAllArgs(CC1Args, options::OPT_stdlib_EQ);
}
void ToolChain::AddCXXStdlibLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
CXXStdlibType Type = GetCXXStdlibType(Args);
switch (Type) {
case ToolChain::CST_Libcxx:
CmdArgs.push_back("-lc++");
break;
case ToolChain::CST_Libstdcxx:
CmdArgs.push_back("-lstdc++");
break;
}
}
void ToolChain::AddCCKextLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
CmdArgs.push_back("-lcc_kext");
}
bool ToolChain::AddFastMathRuntimeIfAvailable(const ArgList &Args,
ArgStringList &CmdArgs) const {
- // Check if -ffast-math or -funsafe-math is enabled.
- Arg *A = Args.getLastArg(options::OPT_ffast_math,
- options::OPT_fno_fast_math,
- options::OPT_funsafe_math_optimizations,
- options::OPT_fno_unsafe_math_optimizations);
+ // Do not check for -fno-fast-math or -fno-unsafe-math when -Ofast passed
+ // (to keep the linker options consistent with gcc and clang itself).
+ if (!isOptimizationLevelFast(Args)) {
+ // Check if -ffast-math or -funsafe-math.
+ Arg *A =
+ Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math,
+ options::OPT_funsafe_math_optimizations,
+ options::OPT_fno_unsafe_math_optimizations);
- if (!A || A->getOption().getID() == options::OPT_fno_fast_math ||
- A->getOption().getID() == options::OPT_fno_unsafe_math_optimizations)
- return false;
-
+ if (!A || A->getOption().getID() == options::OPT_fno_fast_math ||
+ A->getOption().getID() == options::OPT_fno_unsafe_math_optimizations)
+ return false;
+ }
// If crtfastmath.o exists add it to the arguments.
std::string Path = GetFilePath("crtfastmath.o");
if (Path == "crtfastmath.o") // Not found.
return false;
CmdArgs.push_back(Args.MakeArgString(Path));
return true;
}
Index: head/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp (revision 265925)
@@ -1,2930 +1,2932 @@
//===--- ToolChains.cpp - ToolChain Implementations -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "ToolChains.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/Version.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
#include "llvm/Support/Program.h"
// FIXME: This needs to be listed last until we fix the broken include guards
// in these files and the LLVM config.h files.
#include "clang/Config/config.h" // for GCC_INSTALL_PREFIX
#include <cstdlib> // ::getenv
using namespace clang::driver;
using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;
/// Darwin - Darwin tool chain for i386 and x86_64.
Darwin::Darwin(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
: ToolChain(D, Triple, Args), TargetInitialized(false)
{
// Compute the initial Darwin version from the triple
unsigned Major, Minor, Micro;
if (!Triple.getMacOSXVersion(Major, Minor, Micro))
getDriver().Diag(diag::err_drv_invalid_darwin_version) <<
Triple.getOSName();
llvm::raw_string_ostream(MacosxVersionMin)
<< Major << '.' << Minor << '.' << Micro;
// FIXME: DarwinVersion is only used to find GCC's libexec directory.
// It should be removed when we stop supporting that.
DarwinVersion[0] = Minor + 4;
DarwinVersion[1] = Micro;
DarwinVersion[2] = 0;
// Compute the initial iOS version from the triple
Triple.getiOSVersion(Major, Minor, Micro);
llvm::raw_string_ostream(iOSVersionMin)
<< Major << '.' << Minor << '.' << Micro;
}
types::ID Darwin::LookupTypeForExtension(const char *Ext) const {
types::ID Ty = types::lookupTypeForExtension(Ext);
// Darwin always preprocesses assembly files (unless -x is used explicitly).
if (Ty == types::TY_PP_Asm)
return types::TY_Asm;
return Ty;
}
bool Darwin::HasNativeLLVMSupport() const {
return true;
}
/// Darwin provides an ARC runtime starting in MacOS X 10.7 and iOS 5.0.
ObjCRuntime Darwin::getDefaultObjCRuntime(bool isNonFragile) const {
if (isTargetIPhoneOS())
return ObjCRuntime(ObjCRuntime::iOS, TargetVersion);
if (isNonFragile)
return ObjCRuntime(ObjCRuntime::MacOSX, TargetVersion);
return ObjCRuntime(ObjCRuntime::FragileMacOSX, TargetVersion);
}
/// Darwin provides a blocks runtime starting in MacOS X 10.6 and iOS 3.2.
bool Darwin::hasBlocksRuntime() const {
if (isTargetIPhoneOS())
return !isIPhoneOSVersionLT(3, 2);
else
return !isMacosxVersionLT(10, 6);
}
static const char *GetArmArchForMArch(StringRef Value) {
return llvm::StringSwitch<const char*>(Value)
.Case("armv6k", "armv6")
.Case("armv6m", "armv6m")
.Case("armv5tej", "armv5")
.Case("xscale", "xscale")
.Case("armv4t", "armv4t")
.Case("armv7", "armv7")
.Cases("armv7a", "armv7-a", "armv7")
.Cases("armv7r", "armv7-r", "armv7")
.Cases("armv7em", "armv7e-m", "armv7em")
.Cases("armv7f", "armv7-f", "armv7f")
.Cases("armv7k", "armv7-k", "armv7k")
.Cases("armv7m", "armv7-m", "armv7m")
.Cases("armv7s", "armv7-s", "armv7s")
.Default(0);
}
static const char *GetArmArchForMCpu(StringRef Value) {
return llvm::StringSwitch<const char *>(Value)
.Cases("arm9e", "arm946e-s", "arm966e-s", "arm968e-s", "arm926ej-s","armv5")
.Cases("arm10e", "arm10tdmi", "armv5")
.Cases("arm1020t", "arm1020e", "arm1022e", "arm1026ej-s", "armv5")
.Case("xscale", "xscale")
.Cases("arm1136j-s", "arm1136jf-s", "arm1176jz-s", "arm1176jzf-s", "armv6")
.Case("cortex-m0", "armv6m")
.Cases("cortex-a5", "cortex-a7", "cortex-a8", "armv7")
.Cases("cortex-a9", "cortex-a12", "cortex-a15", "armv7")
.Cases("cortex-r4", "cortex-r5", "armv7r")
.Case("cortex-a9-mp", "armv7f")
.Case("cortex-m3", "armv7m")
.Case("cortex-m4", "armv7em")
.Case("swift", "armv7s")
.Default(0);
}
StringRef Darwin::getDarwinArchName(const ArgList &Args) const {
switch (getTriple().getArch()) {
default:
return getArchName();
case llvm::Triple::thumb:
case llvm::Triple::arm: {
if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
if (const char *Arch = GetArmArchForMArch(A->getValue()))
return Arch;
if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
if (const char *Arch = GetArmArchForMCpu(A->getValue()))
return Arch;
return "arm";
}
}
}
Darwin::~Darwin() {
}
std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
types::ID InputType) const {
llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
// If the target isn't initialized (e.g., an unknown Darwin platform, return
// the default triple).
if (!isTargetInitialized())
return Triple.getTriple();
if (Triple.getArchName() == "thumbv6m" ||
Triple.getArchName() == "thumbv7m" ||
Triple.getArchName() == "thumbv7em") {
// OS is ios or macosx unless it's the v6m or v7m.
Triple.setOS(llvm::Triple::Darwin);
Triple.setEnvironment(llvm::Triple::EABI);
} else {
SmallString<16> Str;
Str += isTargetIPhoneOS() ? "ios" : "macosx";
Str += getTargetVersion().getAsString();
Triple.setOSName(Str);
}
return Triple.getTriple();
}
void Generic_ELF::anchor() {}
Tool *Darwin::getTool(Action::ActionClass AC) const {
switch (AC) {
case Action::LipoJobClass:
if (!Lipo)
Lipo.reset(new tools::darwin::Lipo(*this));
return Lipo.get();
case Action::DsymutilJobClass:
if (!Dsymutil)
Dsymutil.reset(new tools::darwin::Dsymutil(*this));
return Dsymutil.get();
case Action::VerifyJobClass:
if (!VerifyDebug)
VerifyDebug.reset(new tools::darwin::VerifyDebug(*this));
return VerifyDebug.get();
default:
return ToolChain::getTool(AC);
}
}
Tool *Darwin::buildLinker() const {
return new tools::darwin::Link(*this);
}
Tool *Darwin::buildAssembler() const {
return new tools::darwin::Assemble(*this);
}
DarwinClang::DarwinClang(const Driver &D, const llvm::Triple& Triple,
const ArgList &Args)
: Darwin(D, Triple, Args)
{
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
// We expect 'as', 'ld', etc. to be adjacent to our install dir.
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
}
void DarwinClang::AddLinkARCArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
CmdArgs.push_back("-force_load");
SmallString<128> P(getDriver().ClangExecutable);
llvm::sys::path::remove_filename(P); // 'clang'
llvm::sys::path::remove_filename(P); // 'bin'
llvm::sys::path::append(P, "lib", "arc", "libarclite_");
// Mash in the platform.
if (isTargetIOSSimulator())
P += "iphonesimulator";
else if (isTargetIPhoneOS())
P += "iphoneos";
else
P += "macosx";
P += ".a";
CmdArgs.push_back(Args.MakeArgString(P));
}
void DarwinClang::AddLinkRuntimeLib(const ArgList &Args,
ArgStringList &CmdArgs,
const char *DarwinStaticLib,
bool AlwaysLink) const {
SmallString<128> P(getDriver().ResourceDir);
llvm::sys::path::append(P, "lib", "darwin", DarwinStaticLib);
// For now, allow missing resource libraries to support developers who may
// not have compiler-rt checked out or integrated into their build (unless
// we explicitly force linking with this library).
if (AlwaysLink || llvm::sys::fs::exists(P.str()))
CmdArgs.push_back(Args.MakeArgString(P.str()));
}
void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
// Darwin only supports the compiler-rt based runtime libraries.
switch (GetRuntimeLibType(Args)) {
case ToolChain::RLT_CompilerRT:
break;
default:
getDriver().Diag(diag::err_drv_unsupported_rtlib_for_platform)
<< Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "darwin";
return;
}
// Darwin doesn't support real static executables, don't link any runtime
// libraries with -static.
if (Args.hasArg(options::OPT_static) ||
Args.hasArg(options::OPT_fapple_kext) ||
Args.hasArg(options::OPT_mkernel))
return;
// Reject -static-libgcc for now, we can deal with this when and if someone
// cares. This is useful in situations where someone wants to statically link
// something like libstdc++, and needs its runtime support routines.
if (const Arg *A = Args.getLastArg(options::OPT_static_libgcc)) {
getDriver().Diag(diag::err_drv_unsupported_opt)
<< A->getAsString(Args);
return;
}
// If we are building profile support, link that library in.
if (Args.hasArg(options::OPT_fprofile_arcs) ||
Args.hasArg(options::OPT_fprofile_generate) ||
Args.hasArg(options::OPT_fcreate_profile) ||
Args.hasArg(options::OPT_coverage)) {
// Select the appropriate runtime library for the target.
if (isTargetIPhoneOS()) {
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.profile_ios.a");
} else {
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.profile_osx.a");
}
}
const SanitizerArgs &Sanitize = getSanitizerArgs();
// Add Ubsan runtime library, if required.
if (Sanitize.needsUbsanRt()) {
// FIXME: Move this check to SanitizerArgs::filterUnsupportedKinds.
if (isTargetIPhoneOS()) {
getDriver().Diag(diag::err_drv_clang_unsupported_per_platform)
<< "-fsanitize=undefined";
} else {
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.ubsan_osx.a", true);
// The Ubsan runtime library requires C++.
AddCXXStdlibLibArgs(Args, CmdArgs);
}
}
// Add ASAN runtime library, if required. Dynamic libraries and bundles
// should not be linked with the runtime library.
if (Sanitize.needsAsanRt()) {
// FIXME: Move this check to SanitizerArgs::filterUnsupportedKinds.
if (isTargetIPhoneOS() && !isTargetIOSSimulator()) {
getDriver().Diag(diag::err_drv_clang_unsupported_per_platform)
<< "-fsanitize=address";
} else {
if (!Args.hasArg(options::OPT_dynamiclib) &&
!Args.hasArg(options::OPT_bundle)) {
// The ASAN runtime library requires C++.
AddCXXStdlibLibArgs(Args, CmdArgs);
}
if (isTargetMacOS()) {
AddLinkRuntimeLib(Args, CmdArgs,
"libclang_rt.asan_osx_dynamic.dylib",
true);
} else {
if (isTargetIOSSimulator()) {
AddLinkRuntimeLib(Args, CmdArgs,
"libclang_rt.asan_iossim_dynamic.dylib",
true);
}
}
}
}
// Otherwise link libSystem, then the dynamic runtime library, and finally any
// target specific static runtime library.
CmdArgs.push_back("-lSystem");
// Select the dynamic runtime library and the target specific static library.
if (isTargetIPhoneOS()) {
// If we are compiling as iOS / simulator, don't attempt to link libgcc_s.1,
// it never went into the SDK.
// Linking against libgcc_s.1 isn't needed for iOS 5.0+
if (isIPhoneOSVersionLT(5, 0) && !isTargetIOSSimulator())
CmdArgs.push_back("-lgcc_s.1");
// We currently always need a static runtime library for iOS.
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.ios.a");
} else {
// The dynamic runtime library was merged with libSystem for 10.6 and
// beyond; only 10.4 and 10.5 need an additional runtime library.
if (isMacosxVersionLT(10, 5))
CmdArgs.push_back("-lgcc_s.10.4");
else if (isMacosxVersionLT(10, 6))
CmdArgs.push_back("-lgcc_s.10.5");
// For OS X, we thought we would only need a static runtime library when
// targeting 10.4, to provide versions of the static functions which were
// omitted from 10.4.dylib.
//
// Unfortunately, that turned out to not be true, because Darwin system
// headers can still use eprintf on i386, and it is not exported from
// libSystem. Therefore, we still must provide a runtime library just for
// the tiny tiny handful of projects that *might* use that symbol.
if (isMacosxVersionLT(10, 5)) {
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.10.4.a");
} else {
if (getTriple().getArch() == llvm::Triple::x86)
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.eprintf.a");
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.osx.a");
}
}
}
void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
const OptTable &Opts = getDriver().getOpts();
// Support allowing the SDKROOT environment variable used by xcrun and other
// Xcode tools to define the default sysroot, by making it the default for
// isysroot.
if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
// Warn if the path does not exist.
if (!llvm::sys::fs::exists(A->getValue()))
getDriver().Diag(clang::diag::warn_missing_sysroot) << A->getValue();
} else {
if (char *env = ::getenv("SDKROOT")) {
// We only use this value as the default if it is an absolute path,
// exists, and it is not the root path.
if (llvm::sys::path::is_absolute(env) && llvm::sys::fs::exists(env) &&
StringRef(env) != "/") {
Args.append(Args.MakeSeparateArg(
0, Opts.getOption(options::OPT_isysroot), env));
}
}
}
Arg *OSXVersion = Args.getLastArg(options::OPT_mmacosx_version_min_EQ);
Arg *iOSVersion = Args.getLastArg(options::OPT_miphoneos_version_min_EQ);
Arg *iOSSimVersion = Args.getLastArg(
options::OPT_mios_simulator_version_min_EQ);
if (OSXVersion && (iOSVersion || iOSSimVersion)) {
getDriver().Diag(diag::err_drv_argument_not_allowed_with)
<< OSXVersion->getAsString(Args)
<< (iOSVersion ? iOSVersion : iOSSimVersion)->getAsString(Args);
iOSVersion = iOSSimVersion = 0;
} else if (iOSVersion && iOSSimVersion) {
getDriver().Diag(diag::err_drv_argument_not_allowed_with)
<< iOSVersion->getAsString(Args)
<< iOSSimVersion->getAsString(Args);
iOSSimVersion = 0;
} else if (!OSXVersion && !iOSVersion && !iOSSimVersion) {
// If no deployment target was specified on the command line, check for
// environment defines.
StringRef OSXTarget;
StringRef iOSTarget;
StringRef iOSSimTarget;
if (char *env = ::getenv("MACOSX_DEPLOYMENT_TARGET"))
OSXTarget = env;
if (char *env = ::getenv("IPHONEOS_DEPLOYMENT_TARGET"))
iOSTarget = env;
if (char *env = ::getenv("IOS_SIMULATOR_DEPLOYMENT_TARGET"))
iOSSimTarget = env;
// If no '-miphoneos-version-min' specified on the command line and
// IPHONEOS_DEPLOYMENT_TARGET is not defined, see if we can set the default
// based on -isysroot.
if (iOSTarget.empty()) {
if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
StringRef first, second;
StringRef isysroot = A->getValue();
llvm::tie(first, second) = isysroot.split(StringRef("SDKs/iPhoneOS"));
if (second != "")
iOSTarget = second.substr(0,3);
}
}
// If no OSX or iOS target has been specified and we're compiling for armv7,
// go ahead as assume we're targeting iOS.
if (OSXTarget.empty() && iOSTarget.empty() &&
(getDarwinArchName(Args) == "armv7" ||
getDarwinArchName(Args) == "armv7s"))
iOSTarget = iOSVersionMin;
// Handle conflicting deployment targets
//
// FIXME: Don't hardcode default here.
// Do not allow conflicts with the iOS simulator target.
if (!iOSSimTarget.empty() && (!OSXTarget.empty() || !iOSTarget.empty())) {
getDriver().Diag(diag::err_drv_conflicting_deployment_targets)
<< "IOS_SIMULATOR_DEPLOYMENT_TARGET"
<< (!OSXTarget.empty() ? "MACOSX_DEPLOYMENT_TARGET" :
"IPHONEOS_DEPLOYMENT_TARGET");
}
// Allow conflicts among OSX and iOS for historical reasons, but choose the
// default platform.
if (!OSXTarget.empty() && !iOSTarget.empty()) {
if (getTriple().getArch() == llvm::Triple::arm ||
getTriple().getArch() == llvm::Triple::thumb)
OSXTarget = "";
else
iOSTarget = "";
}
if (!OSXTarget.empty()) {
const Option O = Opts.getOption(options::OPT_mmacosx_version_min_EQ);
OSXVersion = Args.MakeJoinedArg(0, O, OSXTarget);
Args.append(OSXVersion);
} else if (!iOSTarget.empty()) {
const Option O = Opts.getOption(options::OPT_miphoneos_version_min_EQ);
iOSVersion = Args.MakeJoinedArg(0, O, iOSTarget);
Args.append(iOSVersion);
} else if (!iOSSimTarget.empty()) {
const Option O = Opts.getOption(
options::OPT_mios_simulator_version_min_EQ);
iOSSimVersion = Args.MakeJoinedArg(0, O, iOSSimTarget);
Args.append(iOSSimVersion);
} else {
// Otherwise, assume we are targeting OS X.
const Option O = Opts.getOption(options::OPT_mmacosx_version_min_EQ);
OSXVersion = Args.MakeJoinedArg(0, O, MacosxVersionMin);
Args.append(OSXVersion);
}
}
// Reject invalid architecture combinations.
if (iOSSimVersion && (getTriple().getArch() != llvm::Triple::x86 &&
getTriple().getArch() != llvm::Triple::x86_64)) {
getDriver().Diag(diag::err_drv_invalid_arch_for_deployment_target)
<< getTriple().getArchName() << iOSSimVersion->getAsString(Args);
}
// Set the tool chain target information.
unsigned Major, Minor, Micro;
bool HadExtra;
if (OSXVersion) {
assert((!iOSVersion && !iOSSimVersion) && "Unknown target platform!");
if (!Driver::GetReleaseVersion(OSXVersion->getValue(), Major, Minor,
Micro, HadExtra) || HadExtra ||
Major != 10 || Minor >= 100 || Micro >= 100)
getDriver().Diag(diag::err_drv_invalid_version_number)
<< OSXVersion->getAsString(Args);
} else {
const Arg *Version = iOSVersion ? iOSVersion : iOSSimVersion;
assert(Version && "Unknown target platform!");
if (!Driver::GetReleaseVersion(Version->getValue(), Major, Minor,
Micro, HadExtra) || HadExtra ||
Major >= 10 || Minor >= 100 || Micro >= 100)
getDriver().Diag(diag::err_drv_invalid_version_number)
<< Version->getAsString(Args);
}
bool IsIOSSim = bool(iOSSimVersion);
// In GCC, the simulator historically was treated as being OS X in some
// contexts, like determining the link logic, despite generally being called
// with an iOS deployment target. For compatibility, we detect the
// simulator as iOS + x86, and treat it differently in a few contexts.
if (iOSVersion && (getTriple().getArch() == llvm::Triple::x86 ||
getTriple().getArch() == llvm::Triple::x86_64))
IsIOSSim = true;
setTarget(/*IsIPhoneOS=*/ !OSXVersion, Major, Minor, Micro, IsIOSSim);
}
void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
CXXStdlibType Type = GetCXXStdlibType(Args);
switch (Type) {
case ToolChain::CST_Libcxx:
CmdArgs.push_back("-lc++");
break;
case ToolChain::CST_Libstdcxx: {
// Unfortunately, -lstdc++ doesn't always exist in the standard search path;
// it was previously found in the gcc lib dir. However, for all the Darwin
// platforms we care about it was -lstdc++.6, so we search for that
// explicitly if we can't see an obvious -lstdc++ candidate.
// Check in the sysroot first.
if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
SmallString<128> P(A->getValue());
llvm::sys::path::append(P, "usr", "lib", "libstdc++.dylib");
if (!llvm::sys::fs::exists(P.str())) {
llvm::sys::path::remove_filename(P);
llvm::sys::path::append(P, "libstdc++.6.dylib");
if (llvm::sys::fs::exists(P.str())) {
CmdArgs.push_back(Args.MakeArgString(P.str()));
return;
}
}
}
// Otherwise, look in the root.
// FIXME: This should be removed someday when we don't have to care about
// 10.6 and earlier, where /usr/lib/libstdc++.dylib does not exist.
if (!llvm::sys::fs::exists("/usr/lib/libstdc++.dylib") &&
llvm::sys::fs::exists("/usr/lib/libstdc++.6.dylib")) {
CmdArgs.push_back("/usr/lib/libstdc++.6.dylib");
return;
}
// Otherwise, let the linker search.
CmdArgs.push_back("-lstdc++");
break;
}
}
}
void DarwinClang::AddCCKextLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
// For Darwin platforms, use the compiler-rt-based support library
// instead of the gcc-provided one (which is also incidentally
// only present in the gcc lib dir, which makes it hard to find).
SmallString<128> P(getDriver().ResourceDir);
llvm::sys::path::append(P, "lib", "darwin");
// Use the newer cc_kext for iOS ARM after 6.0.
if (!isTargetIPhoneOS() || isTargetIOSSimulator() ||
!isIPhoneOSVersionLT(6, 0)) {
llvm::sys::path::append(P, "libclang_rt.cc_kext.a");
} else {
llvm::sys::path::append(P, "libclang_rt.cc_kext_ios5.a");
}
// For now, allow missing resource libraries to support developers who may
// not have compiler-rt checked out or integrated into their build.
if (llvm::sys::fs::exists(P.str()))
CmdArgs.push_back(Args.MakeArgString(P.str()));
}
DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args,
const char *BoundArch) const {
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
const OptTable &Opts = getDriver().getOpts();
// FIXME: We really want to get out of the tool chain level argument
// translation business, as it makes the driver functionality much
// more opaque. For now, we follow gcc closely solely for the
// purpose of easily achieving feature parity & testability. Once we
// have something that works, we should reevaluate each translation
// and try to push it down into tool specific logic.
for (ArgList::const_iterator it = Args.begin(),
ie = Args.end(); it != ie; ++it) {
Arg *A = *it;
if (A->getOption().matches(options::OPT_Xarch__)) {
// Skip this argument unless the architecture matches either the toolchain
// triple arch, or the arch being bound.
llvm::Triple::ArchType XarchArch =
tools::darwin::getArchTypeForDarwinArchName(A->getValue(0));
if (!(XarchArch == getArch() ||
(BoundArch && XarchArch ==
tools::darwin::getArchTypeForDarwinArchName(BoundArch))))
continue;
Arg *OriginalArg = A;
unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
unsigned Prev = Index;
Arg *XarchArg = Opts.ParseOneArg(Args, Index);
// If the argument parsing failed or more than one argument was
// consumed, the -Xarch_ argument's parameter tried to consume
// extra arguments. Emit an error and ignore.
//
// We also want to disallow any options which would alter the
// driver behavior; that isn't going to work in our model. We
// use isDriverOption() as an approximation, although things
// like -O4 are going to slip through.
if (!XarchArg || Index > Prev + 1) {
getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
<< A->getAsString(Args);
continue;
} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
<< A->getAsString(Args);
continue;
}
XarchArg->setBaseArg(A);
A = XarchArg;
DAL->AddSynthesizedArg(A);
// Linker input arguments require custom handling. The problem is that we
// have already constructed the phase actions, so we can not treat them as
// "input arguments".
if (A->getOption().hasFlag(options::LinkerInput)) {
// Convert the argument into individual Zlinker_input_args.
for (unsigned i = 0, e = A->getNumValues(); i != e; ++i) {
DAL->AddSeparateArg(OriginalArg,
Opts.getOption(options::OPT_Zlinker_input),
A->getValue(i));
}
continue;
}
}
// Sob. These is strictly gcc compatible for the time being. Apple
// gcc translates options twice, which means that self-expanding
// options add duplicates.
switch ((options::ID) A->getOption().getID()) {
default:
DAL->append(A);
break;
case options::OPT_mkernel:
case options::OPT_fapple_kext:
DAL->append(A);
DAL->AddFlagArg(A, Opts.getOption(options::OPT_static));
break;
case options::OPT_dependency_file:
DAL->AddSeparateArg(A, Opts.getOption(options::OPT_MF),
A->getValue());
break;
case options::OPT_gfull:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_g_Flag));
DAL->AddFlagArg(A,
Opts.getOption(options::OPT_fno_eliminate_unused_debug_symbols));
break;
case options::OPT_gused:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_g_Flag));
DAL->AddFlagArg(A,
Opts.getOption(options::OPT_feliminate_unused_debug_symbols));
break;
case options::OPT_shared:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_dynamiclib));
break;
case options::OPT_fconstant_cfstrings:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_mconstant_cfstrings));
break;
case options::OPT_fno_constant_cfstrings:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_mno_constant_cfstrings));
break;
case options::OPT_Wnonportable_cfstrings:
DAL->AddFlagArg(A,
Opts.getOption(options::OPT_mwarn_nonportable_cfstrings));
break;
case options::OPT_Wno_nonportable_cfstrings:
DAL->AddFlagArg(A,
Opts.getOption(options::OPT_mno_warn_nonportable_cfstrings));
break;
case options::OPT_fpascal_strings:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_mpascal_strings));
break;
case options::OPT_fno_pascal_strings:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_mno_pascal_strings));
break;
}
}
if (getTriple().getArch() == llvm::Triple::x86 ||
getTriple().getArch() == llvm::Triple::x86_64)
if (!Args.hasArgNoClaim(options::OPT_mtune_EQ))
DAL->AddJoinedArg(0, Opts.getOption(options::OPT_mtune_EQ), "core2");
// Add the arch options based on the particular spelling of -arch, to match
// how the driver driver works.
if (BoundArch) {
StringRef Name = BoundArch;
const Option MCpu = Opts.getOption(options::OPT_mcpu_EQ);
const Option MArch = Opts.getOption(options::OPT_march_EQ);
// This code must be kept in sync with LLVM's getArchTypeForDarwinArch,
// which defines the list of which architectures we accept.
if (Name == "ppc")
;
else if (Name == "ppc601")
DAL->AddJoinedArg(0, MCpu, "601");
else if (Name == "ppc603")
DAL->AddJoinedArg(0, MCpu, "603");
else if (Name == "ppc604")
DAL->AddJoinedArg(0, MCpu, "604");
else if (Name == "ppc604e")
DAL->AddJoinedArg(0, MCpu, "604e");
else if (Name == "ppc750")
DAL->AddJoinedArg(0, MCpu, "750");
else if (Name == "ppc7400")
DAL->AddJoinedArg(0, MCpu, "7400");
else if (Name == "ppc7450")
DAL->AddJoinedArg(0, MCpu, "7450");
else if (Name == "ppc970")
DAL->AddJoinedArg(0, MCpu, "970");
else if (Name == "ppc64" || Name == "ppc64le")
DAL->AddFlagArg(0, Opts.getOption(options::OPT_m64));
else if (Name == "i386")
;
else if (Name == "i486")
DAL->AddJoinedArg(0, MArch, "i486");
else if (Name == "i586")
DAL->AddJoinedArg(0, MArch, "i586");
else if (Name == "i686")
DAL->AddJoinedArg(0, MArch, "i686");
else if (Name == "pentium")
DAL->AddJoinedArg(0, MArch, "pentium");
else if (Name == "pentium2")
DAL->AddJoinedArg(0, MArch, "pentium2");
else if (Name == "pentpro")
DAL->AddJoinedArg(0, MArch, "pentiumpro");
else if (Name == "pentIIm3")
DAL->AddJoinedArg(0, MArch, "pentium2");
else if (Name == "x86_64")
DAL->AddFlagArg(0, Opts.getOption(options::OPT_m64));
else if (Name == "x86_64h") {
DAL->AddFlagArg(0, Opts.getOption(options::OPT_m64));
DAL->AddJoinedArg(0, MArch, "x86_64h");
}
else if (Name == "arm")
DAL->AddJoinedArg(0, MArch, "armv4t");
else if (Name == "armv4t")
DAL->AddJoinedArg(0, MArch, "armv4t");
else if (Name == "armv5")
DAL->AddJoinedArg(0, MArch, "armv5tej");
else if (Name == "xscale")
DAL->AddJoinedArg(0, MArch, "xscale");
else if (Name == "armv6")
DAL->AddJoinedArg(0, MArch, "armv6k");
else if (Name == "armv6m")
DAL->AddJoinedArg(0, MArch, "armv6m");
else if (Name == "armv7")
DAL->AddJoinedArg(0, MArch, "armv7a");
else if (Name == "armv7em")
DAL->AddJoinedArg(0, MArch, "armv7em");
else if (Name == "armv7f")
DAL->AddJoinedArg(0, MArch, "armv7f");
else if (Name == "armv7k")
DAL->AddJoinedArg(0, MArch, "armv7k");
else if (Name == "armv7m")
DAL->AddJoinedArg(0, MArch, "armv7m");
else if (Name == "armv7s")
DAL->AddJoinedArg(0, MArch, "armv7s");
else
llvm_unreachable("invalid Darwin arch");
}
// Add an explicit version min argument for the deployment target. We do this
// after argument translation because -Xarch_ arguments may add a version min
// argument.
if (BoundArch)
AddDeploymentTarget(*DAL);
// For iOS 6, undo the translation to add -static for -mkernel/-fapple-kext.
// FIXME: It would be far better to avoid inserting those -static arguments,
// but we can't check the deployment target in the translation code until
// it is set here.
if (isTargetIPhoneOS() && !isIPhoneOSVersionLT(6, 0)) {
for (ArgList::iterator it = DAL->begin(), ie = DAL->end(); it != ie; ) {
Arg *A = *it;
++it;
if (A->getOption().getID() != options::OPT_mkernel &&
A->getOption().getID() != options::OPT_fapple_kext)
continue;
assert(it != ie && "unexpected argument translation");
A = *it;
assert(A->getOption().getID() == options::OPT_static &&
"missing expected -static argument");
it = DAL->getArgs().erase(it);
}
}
// Default to use libc++ on OS X 10.9+ and iOS 7+.
if (((isTargetMacOS() && !isMacosxVersionLT(10, 9)) ||
(isTargetIPhoneOS() && !isIPhoneOSVersionLT(7, 0))) &&
!Args.getLastArg(options::OPT_stdlib_EQ))
DAL->AddJoinedArg(0, Opts.getOption(options::OPT_stdlib_EQ), "libc++");
// Validate the C++ standard library choice.
CXXStdlibType Type = GetCXXStdlibType(*DAL);
if (Type == ToolChain::CST_Libcxx) {
// Check whether the target provides libc++.
StringRef where;
// Complain about targetting iOS < 5.0 in any way.
if (isTargetIPhoneOS() && isIPhoneOSVersionLT(5, 0))
where = "iOS 5.0";
if (where != StringRef()) {
getDriver().Diag(clang::diag::err_drv_invalid_libcxx_deployment)
<< where;
}
}
return DAL;
}
bool Darwin::IsUnwindTablesDefault() const {
return getArch() == llvm::Triple::x86_64;
}
bool Darwin::UseDwarfDebugFlags() const {
if (const char *S = ::getenv("RC_DEBUG_OPTIONS"))
return S[0] != '\0';
return false;
}
bool Darwin::UseSjLjExceptions() const {
// Darwin uses SjLj exceptions on ARM.
return (getTriple().getArch() == llvm::Triple::arm ||
getTriple().getArch() == llvm::Triple::thumb);
}
bool Darwin::isPICDefault() const {
return true;
}
bool Darwin::isPIEDefault() const {
return false;
}
bool Darwin::isPICDefaultForced() const {
return getArch() == llvm::Triple::x86_64;
}
bool Darwin::SupportsProfiling() const {
// Profiling instrumentation is only supported on x86.
return getArch() == llvm::Triple::x86 || getArch() == llvm::Triple::x86_64;
}
bool Darwin::SupportsObjCGC() const {
// Garbage collection is supported everywhere except on iPhone OS.
return !isTargetIPhoneOS();
}
void Darwin::CheckObjCARC() const {
if (isTargetIPhoneOS() || !isMacosxVersionLT(10, 6))
return;
getDriver().Diag(diag::err_arc_unsupported_on_toolchain);
}
std::string
Darwin_Generic_GCC::ComputeEffectiveClangTriple(const ArgList &Args,
types::ID InputType) const {
return ComputeLLVMTriple(Args, InputType);
}
/// Generic_GCC - A tool chain using the 'gcc' command to perform
/// all subcommands; this relies on gcc translating the majority of
/// command line options.
/// \brief Parse a GCCVersion object out of a string of text.
///
/// This is the primary means of forming GCCVersion objects.
/*static*/
Generic_GCC::GCCVersion Linux::GCCVersion::Parse(StringRef VersionText) {
const GCCVersion BadVersion = { VersionText.str(), -1, -1, -1, "", "", "" };
std::pair<StringRef, StringRef> First = VersionText.split('.');
std::pair<StringRef, StringRef> Second = First.second.split('.');
GCCVersion GoodVersion = { VersionText.str(), -1, -1, -1, "", "", "" };
if (First.first.getAsInteger(10, GoodVersion.Major) ||
GoodVersion.Major < 0)
return BadVersion;
GoodVersion.MajorStr = First.first.str();
if (Second.first.getAsInteger(10, GoodVersion.Minor) ||
GoodVersion.Minor < 0)
return BadVersion;
GoodVersion.MinorStr = Second.first.str();
// First look for a number prefix and parse that if present. Otherwise just
// stash the entire patch string in the suffix, and leave the number
// unspecified. This covers versions strings such as:
// 4.4
// 4.4.0
// 4.4.x
// 4.4.2-rc4
// 4.4.x-patched
// And retains any patch number it finds.
StringRef PatchText = GoodVersion.PatchSuffix = Second.second.str();
if (!PatchText.empty()) {
if (size_t EndNumber = PatchText.find_first_not_of("0123456789")) {
// Try to parse the number and any suffix.
if (PatchText.slice(0, EndNumber).getAsInteger(10, GoodVersion.Patch) ||
GoodVersion.Patch < 0)
return BadVersion;
GoodVersion.PatchSuffix = PatchText.substr(EndNumber);
}
}
return GoodVersion;
}
/// \brief Less-than for GCCVersion, implementing a Strict Weak Ordering.
bool Generic_GCC::GCCVersion::isOlderThan(int RHSMajor, int RHSMinor,
int RHSPatch,
StringRef RHSPatchSuffix) const {
if (Major != RHSMajor)
return Major < RHSMajor;
if (Minor != RHSMinor)
return Minor < RHSMinor;
if (Patch != RHSPatch) {
// Note that versions without a specified patch sort higher than those with
// a patch.
if (RHSPatch == -1)
return true;
if (Patch == -1)
return false;
// Otherwise just sort on the patch itself.
return Patch < RHSPatch;
}
if (PatchSuffix != RHSPatchSuffix) {
// Sort empty suffixes higher.
if (RHSPatchSuffix.empty())
return true;
if (PatchSuffix.empty())
return false;
// Provide a lexicographic sort to make this a total ordering.
return PatchSuffix < RHSPatchSuffix;
}
// The versions are equal.
return false;
}
static StringRef getGCCToolchainDir(const ArgList &Args) {
const Arg *A = Args.getLastArg(options::OPT_gcc_toolchain);
if (A)
return A->getValue();
return GCC_INSTALL_PREFIX;
}
/// \brief Initialize a GCCInstallationDetector from the driver.
///
/// This performs all of the autodetection and sets up the various paths.
/// Once constructed, a GCCInstallationDetector is essentially immutable.
///
/// FIXME: We shouldn't need an explicit TargetTriple parameter here, and
/// should instead pull the target out of the driver. This is currently
/// necessary because the driver doesn't store the final version of the target
/// triple.
void
Generic_GCC::GCCInstallationDetector::init(
const llvm::Triple &TargetTriple, const ArgList &Args) {
llvm::Triple BiarchVariantTriple =
TargetTriple.isArch32Bit() ? TargetTriple.get64BitArchVariant()
: TargetTriple.get32BitArchVariant();
llvm::Triple::ArchType TargetArch = TargetTriple.getArch();
// The library directories which may contain GCC installations.
SmallVector<StringRef, 4> CandidateLibDirs, CandidateBiarchLibDirs;
// The compatible GCC triples for this particular architecture.
SmallVector<StringRef, 10> CandidateTripleAliases;
SmallVector<StringRef, 10> CandidateBiarchTripleAliases;
CollectLibDirsAndTriples(TargetTriple, BiarchVariantTriple, CandidateLibDirs,
CandidateTripleAliases, CandidateBiarchLibDirs,
CandidateBiarchTripleAliases);
// Compute the set of prefixes for our search.
SmallVector<std::string, 8> Prefixes(D.PrefixDirs.begin(),
D.PrefixDirs.end());
StringRef GCCToolchainDir = getGCCToolchainDir(Args);
if (GCCToolchainDir != "") {
if (GCCToolchainDir.back() == '/')
GCCToolchainDir = GCCToolchainDir.drop_back(); // remove the /
Prefixes.push_back(GCCToolchainDir);
} else {
// If we have a SysRoot, try that first.
if (!D.SysRoot.empty()) {
Prefixes.push_back(D.SysRoot);
Prefixes.push_back(D.SysRoot + "/usr");
}
// Then look for gcc installed alongside clang.
Prefixes.push_back(D.InstalledDir + "/..");
// And finally in /usr.
if (D.SysRoot.empty())
Prefixes.push_back("/usr");
}
// Loop over the various components which exist and select the best GCC
// installation available. GCC installs are ranked by version number.
Version = GCCVersion::Parse("0.0.0");
for (unsigned i = 0, ie = Prefixes.size(); i < ie; ++i) {
if (!llvm::sys::fs::exists(Prefixes[i]))
continue;
for (unsigned j = 0, je = CandidateLibDirs.size(); j < je; ++j) {
const std::string LibDir = Prefixes[i] + CandidateLibDirs[j].str();
if (!llvm::sys::fs::exists(LibDir))
continue;
for (unsigned k = 0, ke = CandidateTripleAliases.size(); k < ke; ++k)
ScanLibDirForGCCTriple(TargetArch, Args, LibDir,
CandidateTripleAliases[k]);
}
for (unsigned j = 0, je = CandidateBiarchLibDirs.size(); j < je; ++j) {
const std::string LibDir = Prefixes[i] + CandidateBiarchLibDirs[j].str();
if (!llvm::sys::fs::exists(LibDir))
continue;
for (unsigned k = 0, ke = CandidateBiarchTripleAliases.size(); k < ke;
++k)
ScanLibDirForGCCTriple(TargetArch, Args, LibDir,
CandidateBiarchTripleAliases[k],
/*NeedsBiarchSuffix=*/ true);
}
}
}
void Generic_GCC::GCCInstallationDetector::print(raw_ostream &OS) const {
for (std::set<std::string>::const_iterator
I = CandidateGCCInstallPaths.begin(),
E = CandidateGCCInstallPaths.end();
I != E; ++I)
OS << "Found candidate GCC installation: " << *I << "\n";
OS << "Selected GCC installation: " << GCCInstallPath << "\n";
}
/*static*/ void Generic_GCC::GCCInstallationDetector::CollectLibDirsAndTriples(
const llvm::Triple &TargetTriple, const llvm::Triple &BiarchTriple,
SmallVectorImpl<StringRef> &LibDirs,
SmallVectorImpl<StringRef> &TripleAliases,
SmallVectorImpl<StringRef> &BiarchLibDirs,
SmallVectorImpl<StringRef> &BiarchTripleAliases) {
// Declare a bunch of static data sets that we'll select between below. These
// are specifically designed to always refer to string literals to avoid any
// lifetime or initialization issues.
static const char *const AArch64LibDirs[] = { "/lib" };
static const char *const AArch64Triples[] = { "aarch64-none-linux-gnu",
"aarch64-linux-gnu" };
static const char *const ARMLibDirs[] = { "/lib" };
static const char *const ARMTriples[] = { "arm-linux-gnueabi",
"arm-linux-androideabi" };
static const char *const ARMHFTriples[] = { "arm-linux-gnueabihf",
"armv7hl-redhat-linux-gnueabi" };
static const char *const X86_64LibDirs[] = { "/lib64", "/lib" };
static const char *const X86_64Triples[] = {
"x86_64-linux-gnu", "x86_64-unknown-linux-gnu", "x86_64-pc-linux-gnu",
"x86_64-redhat-linux6E", "x86_64-redhat-linux", "x86_64-suse-linux",
"x86_64-manbo-linux-gnu", "x86_64-linux-gnu", "x86_64-slackware-linux"
};
static const char *const X86LibDirs[] = { "/lib32", "/lib" };
static const char *const X86Triples[] = {
"i686-linux-gnu", "i686-pc-linux-gnu", "i486-linux-gnu", "i386-linux-gnu",
"i386-redhat-linux6E", "i686-redhat-linux", "i586-redhat-linux",
"i386-redhat-linux", "i586-suse-linux", "i486-slackware-linux",
"i686-montavista-linux"
};
static const char *const MIPSLibDirs[] = { "/lib" };
static const char *const MIPSTriples[] = { "mips-linux-gnu",
"mips-mti-linux-gnu" };
static const char *const MIPSELLibDirs[] = { "/lib" };
static const char *const MIPSELTriples[] = { "mipsel-linux-gnu",
"mipsel-linux-android" };
static const char *const MIPS64LibDirs[] = { "/lib64", "/lib" };
static const char *const MIPS64Triples[] = { "mips64-linux-gnu",
"mips-mti-linux-gnu" };
static const char *const MIPS64ELLibDirs[] = { "/lib64", "/lib" };
static const char *const MIPS64ELTriples[] = { "mips64el-linux-gnu",
"mips-mti-linux-gnu" };
static const char *const PPCLibDirs[] = { "/lib32", "/lib" };
static const char *const PPCTriples[] = {
"powerpc-linux-gnu", "powerpc-unknown-linux-gnu", "powerpc-linux-gnuspe",
"powerpc-suse-linux", "powerpc-montavista-linuxspe"
};
static const char *const PPC64LibDirs[] = { "/lib64", "/lib" };
static const char *const PPC64Triples[] = { "powerpc64-linux-gnu",
"powerpc64-unknown-linux-gnu",
"powerpc64-suse-linux",
"ppc64-redhat-linux" };
static const char *const PPC64LELibDirs[] = { "/lib64", "/lib" };
static const char *const PPC64LETriples[] = { "powerpc64le-linux-gnu",
"powerpc64le-unknown-linux-gnu",
"powerpc64le-suse-linux",
"ppc64le-redhat-linux" };
static const char *const SPARCv8LibDirs[] = { "/lib32", "/lib" };
static const char *const SPARCv8Triples[] = { "sparc-linux-gnu",
"sparcv8-linux-gnu" };
static const char *const SPARCv9LibDirs[] = { "/lib64", "/lib" };
static const char *const SPARCv9Triples[] = { "sparc64-linux-gnu",
"sparcv9-linux-gnu" };
static const char *const SystemZLibDirs[] = { "/lib64", "/lib" };
static const char *const SystemZTriples[] = {
"s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu",
"s390x-suse-linux", "s390x-redhat-linux"
};
switch (TargetTriple.getArch()) {
case llvm::Triple::aarch64:
LibDirs.append(AArch64LibDirs,
AArch64LibDirs + llvm::array_lengthof(AArch64LibDirs));
TripleAliases.append(AArch64Triples,
AArch64Triples + llvm::array_lengthof(AArch64Triples));
BiarchLibDirs.append(AArch64LibDirs,
AArch64LibDirs + llvm::array_lengthof(AArch64LibDirs));
BiarchTripleAliases.append(
AArch64Triples, AArch64Triples + llvm::array_lengthof(AArch64Triples));
break;
case llvm::Triple::arm:
case llvm::Triple::thumb:
LibDirs.append(ARMLibDirs, ARMLibDirs + llvm::array_lengthof(ARMLibDirs));
if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) {
TripleAliases.append(ARMHFTriples,
ARMHFTriples + llvm::array_lengthof(ARMHFTriples));
} else {
TripleAliases.append(ARMTriples,
ARMTriples + llvm::array_lengthof(ARMTriples));
}
break;
case llvm::Triple::x86_64:
LibDirs.append(X86_64LibDirs,
X86_64LibDirs + llvm::array_lengthof(X86_64LibDirs));
TripleAliases.append(X86_64Triples,
X86_64Triples + llvm::array_lengthof(X86_64Triples));
BiarchLibDirs.append(X86LibDirs,
X86LibDirs + llvm::array_lengthof(X86LibDirs));
BiarchTripleAliases.append(X86Triples,
X86Triples + llvm::array_lengthof(X86Triples));
break;
case llvm::Triple::x86:
LibDirs.append(X86LibDirs, X86LibDirs + llvm::array_lengthof(X86LibDirs));
TripleAliases.append(X86Triples,
X86Triples + llvm::array_lengthof(X86Triples));
BiarchLibDirs.append(X86_64LibDirs,
X86_64LibDirs + llvm::array_lengthof(X86_64LibDirs));
BiarchTripleAliases.append(
X86_64Triples, X86_64Triples + llvm::array_lengthof(X86_64Triples));
break;
case llvm::Triple::mips:
LibDirs.append(MIPSLibDirs,
MIPSLibDirs + llvm::array_lengthof(MIPSLibDirs));
TripleAliases.append(MIPSTriples,
MIPSTriples + llvm::array_lengthof(MIPSTriples));
BiarchLibDirs.append(MIPS64LibDirs,
MIPS64LibDirs + llvm::array_lengthof(MIPS64LibDirs));
BiarchTripleAliases.append(
MIPS64Triples, MIPS64Triples + llvm::array_lengthof(MIPS64Triples));
break;
case llvm::Triple::mipsel:
LibDirs.append(MIPSELLibDirs,
MIPSELLibDirs + llvm::array_lengthof(MIPSELLibDirs));
TripleAliases.append(MIPSELTriples,
MIPSELTriples + llvm::array_lengthof(MIPSELTriples));
TripleAliases.append(MIPSTriples,
MIPSTriples + llvm::array_lengthof(MIPSTriples));
BiarchLibDirs.append(
MIPS64ELLibDirs,
MIPS64ELLibDirs + llvm::array_lengthof(MIPS64ELLibDirs));
BiarchTripleAliases.append(
MIPS64ELTriples,
MIPS64ELTriples + llvm::array_lengthof(MIPS64ELTriples));
break;
case llvm::Triple::mips64:
LibDirs.append(MIPS64LibDirs,
MIPS64LibDirs + llvm::array_lengthof(MIPS64LibDirs));
TripleAliases.append(MIPS64Triples,
MIPS64Triples + llvm::array_lengthof(MIPS64Triples));
BiarchLibDirs.append(MIPSLibDirs,
MIPSLibDirs + llvm::array_lengthof(MIPSLibDirs));
BiarchTripleAliases.append(MIPSTriples,
MIPSTriples + llvm::array_lengthof(MIPSTriples));
break;
case llvm::Triple::mips64el:
LibDirs.append(MIPS64ELLibDirs,
MIPS64ELLibDirs + llvm::array_lengthof(MIPS64ELLibDirs));
TripleAliases.append(
MIPS64ELTriples,
MIPS64ELTriples + llvm::array_lengthof(MIPS64ELTriples));
BiarchLibDirs.append(MIPSELLibDirs,
MIPSELLibDirs + llvm::array_lengthof(MIPSELLibDirs));
BiarchTripleAliases.append(
MIPSELTriples, MIPSELTriples + llvm::array_lengthof(MIPSELTriples));
BiarchTripleAliases.append(
MIPSTriples, MIPSTriples + llvm::array_lengthof(MIPSTriples));
break;
case llvm::Triple::ppc:
LibDirs.append(PPCLibDirs, PPCLibDirs + llvm::array_lengthof(PPCLibDirs));
TripleAliases.append(PPCTriples,
PPCTriples + llvm::array_lengthof(PPCTriples));
BiarchLibDirs.append(PPC64LibDirs,
PPC64LibDirs + llvm::array_lengthof(PPC64LibDirs));
BiarchTripleAliases.append(
PPC64Triples, PPC64Triples + llvm::array_lengthof(PPC64Triples));
break;
case llvm::Triple::ppc64:
LibDirs.append(PPC64LibDirs,
PPC64LibDirs + llvm::array_lengthof(PPC64LibDirs));
TripleAliases.append(PPC64Triples,
PPC64Triples + llvm::array_lengthof(PPC64Triples));
BiarchLibDirs.append(PPCLibDirs,
PPCLibDirs + llvm::array_lengthof(PPCLibDirs));
BiarchTripleAliases.append(PPCTriples,
PPCTriples + llvm::array_lengthof(PPCTriples));
break;
case llvm::Triple::ppc64le:
LibDirs.append(PPC64LELibDirs,
PPC64LELibDirs + llvm::array_lengthof(PPC64LELibDirs));
TripleAliases.append(PPC64LETriples,
PPC64LETriples + llvm::array_lengthof(PPC64LETriples));
break;
case llvm::Triple::sparc:
LibDirs.append(SPARCv8LibDirs,
SPARCv8LibDirs + llvm::array_lengthof(SPARCv8LibDirs));
TripleAliases.append(SPARCv8Triples,
SPARCv8Triples + llvm::array_lengthof(SPARCv8Triples));
BiarchLibDirs.append(SPARCv9LibDirs,
SPARCv9LibDirs + llvm::array_lengthof(SPARCv9LibDirs));
BiarchTripleAliases.append(
SPARCv9Triples, SPARCv9Triples + llvm::array_lengthof(SPARCv9Triples));
break;
case llvm::Triple::sparcv9:
LibDirs.append(SPARCv9LibDirs,
SPARCv9LibDirs + llvm::array_lengthof(SPARCv9LibDirs));
TripleAliases.append(SPARCv9Triples,
SPARCv9Triples + llvm::array_lengthof(SPARCv9Triples));
BiarchLibDirs.append(SPARCv8LibDirs,
SPARCv8LibDirs + llvm::array_lengthof(SPARCv8LibDirs));
BiarchTripleAliases.append(
SPARCv8Triples, SPARCv8Triples + llvm::array_lengthof(SPARCv8Triples));
break;
case llvm::Triple::systemz:
LibDirs.append(SystemZLibDirs,
SystemZLibDirs + llvm::array_lengthof(SystemZLibDirs));
TripleAliases.append(SystemZTriples,
SystemZTriples + llvm::array_lengthof(SystemZTriples));
break;
default:
// By default, just rely on the standard lib directories and the original
// triple.
break;
}
// Always append the drivers target triple to the end, in case it doesn't
// match any of our aliases.
TripleAliases.push_back(TargetTriple.str());
// Also include the multiarch variant if it's different.
if (TargetTriple.str() != BiarchTriple.str())
BiarchTripleAliases.push_back(BiarchTriple.str());
}
static bool isSoftFloatABI(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_msoft_float,
options::OPT_mhard_float,
options::OPT_mfloat_abi_EQ);
if (!A) return false;
return A->getOption().matches(options::OPT_msoft_float) ||
(A->getOption().matches(options::OPT_mfloat_abi_EQ) &&
A->getValue() == StringRef("soft"));
}
static bool isMipsArch(llvm::Triple::ArchType Arch) {
return Arch == llvm::Triple::mips ||
Arch == llvm::Triple::mipsel ||
Arch == llvm::Triple::mips64 ||
Arch == llvm::Triple::mips64el;
}
static bool isMips16(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_mips16,
options::OPT_mno_mips16);
return A && A->getOption().matches(options::OPT_mips16);
}
static bool isMips32r2(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_march_EQ,
options::OPT_mcpu_EQ);
return A && A->getValue() == StringRef("mips32r2");
}
static bool isMips64r2(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_march_EQ,
options::OPT_mcpu_EQ);
return A && A->getValue() == StringRef("mips64r2");
}
static bool isMicroMips(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_mmicromips,
options::OPT_mno_micromips);
return A && A->getOption().matches(options::OPT_mmicromips);
}
static bool isMipsFP64(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_mfp64, options::OPT_mfp32);
return A && A->getOption().matches(options::OPT_mfp64);
}
static bool isMipsNan2008(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_mnan_EQ);
return A && A->getValue() == StringRef("2008");
}
// FIXME: There is the same routine in the Tools.cpp.
static bool hasMipsN32ABIArg(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_mabi_EQ);
return A && (A->getValue() == StringRef("n32"));
}
static bool hasCrtBeginObj(Twine Path) {
return llvm::sys::fs::exists(Path + "/crtbegin.o");
}
static bool findTargetBiarchSuffix(std::string &Suffix, StringRef Path,
llvm::Triple::ArchType TargetArch,
const ArgList &Args) {
// FIXME: This routine was only intended to model bi-arch toolchains which
// use -m32 and -m64 to swap between variants of a target. It shouldn't be
// doing ABI-based builtin location for MIPS.
if (hasMipsN32ABIArg(Args))
Suffix = "/n32";
else if (TargetArch == llvm::Triple::x86_64 ||
TargetArch == llvm::Triple::ppc64 ||
TargetArch == llvm::Triple::sparcv9 ||
TargetArch == llvm::Triple::systemz ||
TargetArch == llvm::Triple::mips64 ||
TargetArch == llvm::Triple::mips64el)
Suffix = "/64";
else
Suffix = "/32";
return hasCrtBeginObj(Path + Suffix);
}
void Generic_GCC::GCCInstallationDetector::findMIPSABIDirSuffix(
std::string &Suffix, llvm::Triple::ArchType TargetArch, StringRef Path,
const llvm::opt::ArgList &Args) {
if (!isMipsArch(TargetArch))
return;
// Some MIPS toolchains put libraries and object files compiled
// using different options in to the sub-directoris which names
// reflects the flags used for compilation. For example sysroot
// directory might looks like the following examples:
//
// /usr
// /lib <= crt*.o files compiled with '-mips32'
// /mips16
// /usr
// /lib <= crt*.o files compiled with '-mips16'
// /el
// /usr
// /lib <= crt*.o files compiled with '-mips16 -EL'
//
// or
//
// /usr
// /lib <= crt*.o files compiled with '-mips32r2'
// /mips16
// /usr
// /lib <= crt*.o files compiled with '-mips32r2 -mips16'
// /mips32
// /usr
// /lib <= crt*.o files compiled with '-mips32'
//
// Unfortunately different toolchains use different and partially
// overlapped naming schemes. So we have to make a trick for detection
// of using toolchain. We lookup a path which unique for each toolchains.
bool IsMentorToolChain = hasCrtBeginObj(Path + "/mips16/soft-float");
bool IsFSFToolChain = hasCrtBeginObj(Path + "/mips32/mips16/sof");
if (IsMentorToolChain && IsFSFToolChain)
D.Diag(diag::err_drv_unknown_toolchain);
if (IsMentorToolChain) {
if (isMips16(Args))
Suffix += "/mips16";
else if (isMicroMips(Args))
Suffix += "/micromips";
if (isSoftFloatABI(Args))
Suffix += "/soft-float";
if (TargetArch == llvm::Triple::mipsel ||
TargetArch == llvm::Triple::mips64el)
Suffix += "/el";
} else if (IsFSFToolChain) {
if (TargetArch == llvm::Triple::mips ||
TargetArch == llvm::Triple::mipsel) {
if (isMicroMips(Args))
Suffix += "/micromips";
else if (isMips32r2(Args))
Suffix += "";
else
Suffix += "/mips32";
if (isMips16(Args))
Suffix += "/mips16";
} else {
if (isMips64r2(Args))
Suffix += hasMipsN32ABIArg(Args) ? "/mips64r2" : "/mips64r2/64";
else
Suffix += hasMipsN32ABIArg(Args) ? "/mips64" : "/mips64/64";
}
if (TargetArch == llvm::Triple::mipsel ||
TargetArch == llvm::Triple::mips64el)
Suffix += "/el";
if (isSoftFloatABI(Args))
Suffix += "/sof";
else {
if (isMipsFP64(Args))
Suffix += "/fp64";
if (isMipsNan2008(Args))
Suffix += "/nan2008";
}
}
if (!hasCrtBeginObj(Path + Suffix))
Suffix.clear();
}
void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
llvm::Triple::ArchType TargetArch, const ArgList &Args,
const std::string &LibDir, StringRef CandidateTriple,
bool NeedsBiarchSuffix) {
// There are various different suffixes involving the triple we
// check for. We also record what is necessary to walk from each back
// up to the lib directory.
const std::string LibSuffixes[] = {
"/gcc/" + CandidateTriple.str(),
// Debian puts cross-compilers in gcc-cross
"/gcc-cross/" + CandidateTriple.str(),
"/" + CandidateTriple.str() + "/gcc/" + CandidateTriple.str(),
// The Freescale PPC SDK has the gcc libraries in
// <sysroot>/usr/lib/<triple>/x.y.z so have a look there as well.
"/" + CandidateTriple.str(),
// Ubuntu has a strange mis-matched pair of triples that this happens to
// match.
// FIXME: It may be worthwhile to generalize this and look for a second
// triple.
"/i386-linux-gnu/gcc/" + CandidateTriple.str()
};
const std::string InstallSuffixes[] = {
"/../../..", // gcc/
"/../../..", // gcc-cross/
"/../../../..", // <triple>/gcc/
"/../..", // <triple>/
"/../../../.." // i386-linux-gnu/gcc/<triple>/
};
// Only look at the final, weird Ubuntu suffix for i386-linux-gnu.
const unsigned NumLibSuffixes =
(llvm::array_lengthof(LibSuffixes) - (TargetArch != llvm::Triple::x86));
for (unsigned i = 0; i < NumLibSuffixes; ++i) {
StringRef LibSuffix = LibSuffixes[i];
llvm::error_code EC;
for (llvm::sys::fs::directory_iterator LI(LibDir + LibSuffix, EC), LE;
!EC && LI != LE; LI = LI.increment(EC)) {
StringRef VersionText = llvm::sys::path::filename(LI->path());
GCCVersion CandidateVersion = GCCVersion::Parse(VersionText);
if (CandidateVersion.Major != -1) // Filter obviously bad entries.
if (!CandidateGCCInstallPaths.insert(LI->path()).second)
continue; // Saw this path before; no need to look at it again.
if (CandidateVersion.isOlderThan(4, 1, 1))
continue;
if (CandidateVersion <= Version)
continue;
std::string MIPSABIDirSuffix;
findMIPSABIDirSuffix(MIPSABIDirSuffix, TargetArch, LI->path(), Args);
// Some versions of SUSE and Fedora on ppc64 put 32-bit libs
// in what would normally be GCCInstallPath and put the 64-bit
// libs in a subdirectory named 64. The simple logic we follow is that
// *if* there is a subdirectory of the right name with crtbegin.o in it,
// we use that. If not, and if not a biarch triple alias, we look for
// crtbegin.o without the subdirectory.
std::string BiarchSuffix;
if (findTargetBiarchSuffix(BiarchSuffix,
LI->path() + MIPSABIDirSuffix,
TargetArch, Args)) {
GCCBiarchSuffix = BiarchSuffix;
} else if (NeedsBiarchSuffix ||
!hasCrtBeginObj(LI->path() + MIPSABIDirSuffix)) {
continue;
} else {
GCCBiarchSuffix.clear();
}
Version = CandidateVersion;
GCCTriple.setTriple(CandidateTriple);
// FIXME: We hack together the directory name here instead of
// using LI to ensure stable path separators across Windows and
// Linux.
GCCInstallPath = LibDir + LibSuffixes[i] + "/" + VersionText.str();
GCCParentLibPath = GCCInstallPath + InstallSuffixes[i];
GCCMIPSABIDirSuffix = MIPSABIDirSuffix;
IsValid = true;
}
}
}
Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple& Triple,
const ArgList &Args)
: ToolChain(D, Triple, Args), GCCInstallation(getDriver()) {
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
}
Generic_GCC::~Generic_GCC() {
}
Tool *Generic_GCC::getTool(Action::ActionClass AC) const {
switch (AC) {
case Action::PreprocessJobClass:
if (!Preprocess)
Preprocess.reset(new tools::gcc::Preprocess(*this));
return Preprocess.get();
case Action::PrecompileJobClass:
if (!Precompile)
Precompile.reset(new tools::gcc::Precompile(*this));
return Precompile.get();
case Action::CompileJobClass:
if (!Compile)
Compile.reset(new tools::gcc::Compile(*this));
return Compile.get();
default:
return ToolChain::getTool(AC);
}
}
Tool *Generic_GCC::buildAssembler() const {
return new tools::gcc::Assemble(*this);
}
Tool *Generic_GCC::buildLinker() const {
return new tools::gcc::Link(*this);
}
void Generic_GCC::printVerboseInfo(raw_ostream &OS) const {
// Print the information about how we detected the GCC installation.
GCCInstallation.print(OS);
}
bool Generic_GCC::IsUnwindTablesDefault() const {
return getArch() == llvm::Triple::x86_64;
}
bool Generic_GCC::isPICDefault() const {
return false;
}
bool Generic_GCC::isPIEDefault() const {
return false;
}
bool Generic_GCC::isPICDefaultForced() const {
return false;
}
+void Generic_GCC::addClangTargetOptions(const ArgList &DriverArgs,
+ ArgStringList &CC1Args) const {
+ const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion();
+ bool UseInitArrayDefault =
+ getTriple().getArch() == llvm::Triple::aarch64 ||
+ (getTriple().getOS() == llvm::Triple::Linux && (
+ !V.isOlderThan(4, 7, 0) ||
+ getTriple().getEnvironment() == llvm::Triple::Android));
+
+ if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
+ options::OPT_fno_use_init_array,
+ UseInitArrayDefault))
+ CC1Args.push_back("-fuse-init-array");
+}
+
/// Hexagon Toolchain
std::string Hexagon_TC::GetGnuDir(const std::string &InstalledDir) {
// Locate the rest of the toolchain ...
if (strlen(GCC_INSTALL_PREFIX))
return std::string(GCC_INSTALL_PREFIX);
std::string InstallRelDir = InstalledDir + "/../../gnu";
if (llvm::sys::fs::exists(InstallRelDir))
return InstallRelDir;
std::string PrefixRelDir = std::string(LLVM_PREFIX) + "/../gnu";
if (llvm::sys::fs::exists(PrefixRelDir))
return PrefixRelDir;
return InstallRelDir;
}
static void GetHexagonLibraryPaths(
const ArgList &Args,
const std::string Ver,
const std::string MarchString,
const std::string &InstalledDir,
ToolChain::path_list *LibPaths)
{
bool buildingLib = Args.hasArg(options::OPT_shared);
//----------------------------------------------------------------------------
// -L Args
//----------------------------------------------------------------------------
for (arg_iterator
it = Args.filtered_begin(options::OPT_L),
ie = Args.filtered_end();
it != ie;
++it) {
for (unsigned i = 0, e = (*it)->getNumValues(); i != e; ++i)
LibPaths->push_back((*it)->getValue(i));
}
//----------------------------------------------------------------------------
// Other standard paths
//----------------------------------------------------------------------------
const std::string MarchSuffix = "/" + MarchString;
const std::string G0Suffix = "/G0";
const std::string MarchG0Suffix = MarchSuffix + G0Suffix;
const std::string RootDir = Hexagon_TC::GetGnuDir(InstalledDir) + "/";
// lib/gcc/hexagon/...
std::string LibGCCHexagonDir = RootDir + "lib/gcc/hexagon/";
if (buildingLib) {
LibPaths->push_back(LibGCCHexagonDir + Ver + MarchG0Suffix);
LibPaths->push_back(LibGCCHexagonDir + Ver + G0Suffix);
}
LibPaths->push_back(LibGCCHexagonDir + Ver + MarchSuffix);
LibPaths->push_back(LibGCCHexagonDir + Ver);
// lib/gcc/...
LibPaths->push_back(RootDir + "lib/gcc");
// hexagon/lib/...
std::string HexagonLibDir = RootDir + "hexagon/lib";
if (buildingLib) {
LibPaths->push_back(HexagonLibDir + MarchG0Suffix);
LibPaths->push_back(HexagonLibDir + G0Suffix);
}
LibPaths->push_back(HexagonLibDir + MarchSuffix);
LibPaths->push_back(HexagonLibDir);
}
Hexagon_TC::Hexagon_TC(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: Linux(D, Triple, Args) {
const std::string InstalledDir(getDriver().getInstalledDir());
const std::string GnuDir = Hexagon_TC::GetGnuDir(InstalledDir);
// Note: Generic_GCC::Generic_GCC adds InstalledDir and getDriver().Dir to
// program paths
const std::string BinDir(GnuDir + "/bin");
if (llvm::sys::fs::exists(BinDir))
getProgramPaths().push_back(BinDir);
// Determine version of GCC libraries and headers to use.
const std::string HexagonDir(GnuDir + "/lib/gcc/hexagon");
llvm::error_code ec;
GCCVersion MaxVersion= GCCVersion::Parse("0.0.0");
for (llvm::sys::fs::directory_iterator di(HexagonDir, ec), de;
!ec && di != de; di = di.increment(ec)) {
GCCVersion cv = GCCVersion::Parse(llvm::sys::path::filename(di->path()));
if (MaxVersion < cv)
MaxVersion = cv;
}
GCCLibAndIncVersion = MaxVersion;
ToolChain::path_list *LibPaths= &getFilePaths();
// Remove paths added by Linux toolchain. Currently Hexagon_TC really targets
// 'elf' OS type, so the Linux paths are not appropriate. When we actually
// support 'linux' we'll need to fix this up
LibPaths->clear();
GetHexagonLibraryPaths(
Args,
GetGCCLibAndIncVersion(),
GetTargetCPU(Args),
InstalledDir,
LibPaths);
}
Hexagon_TC::~Hexagon_TC() {
}
Tool *Hexagon_TC::buildAssembler() const {
return new tools::hexagon::Assemble(*this);
}
Tool *Hexagon_TC::buildLinker() const {
return new tools::hexagon::Link(*this);
}
void Hexagon_TC::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
const Driver &D = getDriver();
if (DriverArgs.hasArg(options::OPT_nostdinc) ||
DriverArgs.hasArg(options::OPT_nostdlibinc))
return;
std::string Ver(GetGCCLibAndIncVersion());
std::string GnuDir = Hexagon_TC::GetGnuDir(D.InstalledDir);
std::string HexagonDir(GnuDir + "/lib/gcc/hexagon/" + Ver);
addExternCSystemInclude(DriverArgs, CC1Args, HexagonDir + "/include");
addExternCSystemInclude(DriverArgs, CC1Args, HexagonDir + "/include-fixed");
addExternCSystemInclude(DriverArgs, CC1Args, GnuDir + "/hexagon/include");
}
void Hexagon_TC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
DriverArgs.hasArg(options::OPT_nostdincxx))
return;
const Driver &D = getDriver();
std::string Ver(GetGCCLibAndIncVersion());
SmallString<128> IncludeDir(Hexagon_TC::GetGnuDir(D.InstalledDir));
llvm::sys::path::append(IncludeDir, "hexagon/include/c++/");
llvm::sys::path::append(IncludeDir, Ver);
addSystemInclude(DriverArgs, CC1Args, IncludeDir.str());
}
ToolChain::CXXStdlibType
Hexagon_TC::GetCXXStdlibType(const ArgList &Args) const {
Arg *A = Args.getLastArg(options::OPT_stdlib_EQ);
if (!A)
return ToolChain::CST_Libstdcxx;
StringRef Value = A->getValue();
if (Value != "libstdc++") {
getDriver().Diag(diag::err_drv_invalid_stdlib_name)
<< A->getAsString(Args);
}
return ToolChain::CST_Libstdcxx;
}
static int getHexagonVersion(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_march_EQ, options::OPT_mcpu_EQ);
// Select the default CPU (v4) if none was given.
if (!A)
return 4;
// FIXME: produce errors if we cannot parse the version.
StringRef WhichHexagon = A->getValue();
if (WhichHexagon.startswith("hexagonv")) {
int Val;
if (!WhichHexagon.substr(sizeof("hexagonv") - 1).getAsInteger(10, Val))
return Val;
}
if (WhichHexagon.startswith("v")) {
int Val;
if (!WhichHexagon.substr(1).getAsInteger(10, Val))
return Val;
}
// FIXME: should probably be an error.
return 4;
}
StringRef Hexagon_TC::GetTargetCPU(const ArgList &Args)
{
int V = getHexagonVersion(Args);
// FIXME: We don't support versions < 4. We should error on them.
switch (V) {
default:
llvm_unreachable("Unexpected version");
case 5:
return "v5";
case 4:
return "v4";
case 3:
return "v3";
case 2:
return "v2";
case 1:
return "v1";
}
}
// End Hexagon
/// TCEToolChain - A tool chain using the llvm bitcode tools to perform
/// all subcommands. See http://tce.cs.tut.fi for our peculiar target.
/// Currently does not support anything else but compilation.
TCEToolChain::TCEToolChain(const Driver &D, const llvm::Triple& Triple,
const ArgList &Args)
: ToolChain(D, Triple, Args) {
// Path mangling to find libexec
std::string Path(getDriver().Dir);
Path += "/../libexec";
getProgramPaths().push_back(Path);
}
TCEToolChain::~TCEToolChain() {
}
bool TCEToolChain::IsMathErrnoDefault() const {
return true;
}
bool TCEToolChain::isPICDefault() const {
return false;
}
bool TCEToolChain::isPIEDefault() const {
return false;
}
bool TCEToolChain::isPICDefaultForced() const {
return false;
}
/// OpenBSD - OpenBSD tool chain which can call as(1) and ld(1) directly.
OpenBSD::OpenBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
getFilePaths().push_back(getDriver().Dir + "/../lib");
getFilePaths().push_back("/usr/lib");
}
Tool *OpenBSD::buildAssembler() const {
return new tools::openbsd::Assemble(*this);
}
Tool *OpenBSD::buildLinker() const {
return new tools::openbsd::Link(*this);
}
/// Bitrig - Bitrig tool chain which can call as(1) and ld(1) directly.
Bitrig::Bitrig(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
getFilePaths().push_back(getDriver().Dir + "/../lib");
getFilePaths().push_back("/usr/lib");
}
Tool *Bitrig::buildAssembler() const {
return new tools::bitrig::Assemble(*this);
}
Tool *Bitrig::buildLinker() const {
return new tools::bitrig::Link(*this);
}
void Bitrig::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
DriverArgs.hasArg(options::OPT_nostdincxx))
return;
switch (GetCXXStdlibType(DriverArgs)) {
case ToolChain::CST_Libcxx:
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/");
break;
case ToolChain::CST_Libstdcxx:
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/stdc++");
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/stdc++/backward");
StringRef Triple = getTriple().str();
if (Triple.startswith("amd64"))
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/stdc++/x86_64" +
Triple.substr(5));
else
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/stdc++/" +
Triple);
break;
}
}
void Bitrig::AddCXXStdlibLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
switch (GetCXXStdlibType(Args)) {
case ToolChain::CST_Libcxx:
CmdArgs.push_back("-lc++");
CmdArgs.push_back("-lcxxrt");
// Include supc++ to provide Unwind until provided by libcxx.
CmdArgs.push_back("-lgcc");
break;
case ToolChain::CST_Libstdcxx:
CmdArgs.push_back("-lstdc++");
break;
}
}
/// FreeBSD - FreeBSD tool chain which can call as(1) and ld(1) directly.
FreeBSD::FreeBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
// When targeting 32-bit platforms, look for '/usr/lib32/crt1.o' and fall
// back to '/usr/lib' if it doesn't exist.
if ((Triple.getArch() == llvm::Triple::x86 ||
Triple.getArch() == llvm::Triple::ppc) &&
llvm::sys::fs::exists(getDriver().SysRoot + "/usr/lib32/crt1.o"))
getFilePaths().push_back(getDriver().SysRoot + "/usr/lib32");
else
getFilePaths().push_back(getDriver().SysRoot + "/usr/lib");
}
ToolChain::CXXStdlibType
FreeBSD::GetCXXStdlibType(const ArgList &Args) const {
if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
StringRef Value = A->getValue();
if (Value == "libstdc++")
return ToolChain::CST_Libstdcxx;
if (Value == "libc++")
return ToolChain::CST_Libcxx;
getDriver().Diag(diag::err_drv_invalid_stdlib_name)
<< A->getAsString(Args);
}
if (getTriple().getOSMajorVersion() >= 10)
return ToolChain::CST_Libcxx;
return ToolChain::CST_Libstdcxx;
}
void FreeBSD::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
DriverArgs.hasArg(options::OPT_nostdincxx))
return;
switch (GetCXXStdlibType(DriverArgs)) {
case ToolChain::CST_Libcxx:
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/v1");
break;
case ToolChain::CST_Libstdcxx:
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/4.2");
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/4.2/backward");
break;
}
}
Tool *FreeBSD::buildAssembler() const {
return new tools::freebsd::Assemble(*this);
}
Tool *FreeBSD::buildLinker() const {
return new tools::freebsd::Link(*this);
}
bool FreeBSD::UseSjLjExceptions() const {
// FreeBSD uses SjLj exceptions on ARM oabi.
switch (getTriple().getEnvironment()) {
case llvm::Triple::GNUEABIHF:
case llvm::Triple::GNUEABI:
case llvm::Triple::EABI:
return false;
default:
return (getTriple().getArch() == llvm::Triple::arm ||
getTriple().getArch() == llvm::Triple::thumb);
}
}
/// NetBSD - NetBSD tool chain which can call as(1) and ld(1) directly.
NetBSD::NetBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
if (getDriver().UseStdLib) {
// When targeting a 32-bit platform, try the special directory used on
// 64-bit hosts, and only fall back to the main library directory if that
// doesn't work.
// FIXME: It'd be nicer to test if this directory exists, but I'm not sure
// what all logic is needed to emulate the '=' prefix here.
if (Triple.getArch() == llvm::Triple::x86)
getFilePaths().push_back("=/usr/lib/i386");
getFilePaths().push_back("=/usr/lib");
}
}
Tool *NetBSD::buildAssembler() const {
return new tools::netbsd::Assemble(*this);
}
Tool *NetBSD::buildLinker() const {
return new tools::netbsd::Link(*this);
}
ToolChain::CXXStdlibType
NetBSD::GetCXXStdlibType(const ArgList &Args) const {
if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
StringRef Value = A->getValue();
if (Value == "libstdc++")
return ToolChain::CST_Libstdcxx;
if (Value == "libc++")
return ToolChain::CST_Libcxx;
getDriver().Diag(diag::err_drv_invalid_stdlib_name)
<< A->getAsString(Args);
}
unsigned Major, Minor, Micro;
getTriple().getOSVersion(Major, Minor, Micro);
if (Major >= 7 || (Major == 6 && Minor == 99 && Micro >= 23) || Major == 0) {
if (getArch() == llvm::Triple::x86 || getArch() == llvm::Triple::x86_64)
return ToolChain::CST_Libcxx;
}
return ToolChain::CST_Libstdcxx;
}
void NetBSD::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
DriverArgs.hasArg(options::OPT_nostdincxx))
return;
switch (GetCXXStdlibType(DriverArgs)) {
case ToolChain::CST_Libcxx:
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/");
break;
case ToolChain::CST_Libstdcxx:
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/g++");
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/g++/backward");
break;
}
}
/// Minix - Minix tool chain which can call as(1) and ld(1) directly.
Minix::Minix(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
getFilePaths().push_back(getDriver().Dir + "/../lib");
getFilePaths().push_back("/usr/lib");
}
Tool *Minix::buildAssembler() const {
return new tools::minix::Assemble(*this);
}
Tool *Minix::buildLinker() const {
return new tools::minix::Link(*this);
}
/// AuroraUX - AuroraUX tool chain which can call as(1) and ld(1) directly.
AuroraUX::AuroraUX(const Driver &D, const llvm::Triple& Triple,
const ArgList &Args)
: Generic_GCC(D, Triple, Args) {
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
getFilePaths().push_back(getDriver().Dir + "/../lib");
getFilePaths().push_back("/usr/lib");
getFilePaths().push_back("/usr/sfw/lib");
getFilePaths().push_back("/opt/gcc4/lib");
getFilePaths().push_back("/opt/gcc4/lib/gcc/i386-pc-solaris2.11/4.2.4");
}
Tool *AuroraUX::buildAssembler() const {
return new tools::auroraux::Assemble(*this);
}
Tool *AuroraUX::buildLinker() const {
return new tools::auroraux::Link(*this);
}
/// Solaris - Solaris tool chain which can call as(1) and ld(1) directly.
Solaris::Solaris(const Driver &D, const llvm::Triple& Triple,
const ArgList &Args)
: Generic_GCC(D, Triple, Args) {
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
getFilePaths().push_back(getDriver().Dir + "/../lib");
getFilePaths().push_back("/usr/lib");
}
Tool *Solaris::buildAssembler() const {
return new tools::solaris::Assemble(*this);
}
Tool *Solaris::buildLinker() const {
return new tools::solaris::Link(*this);
}
/// Distribution (very bare-bones at the moment).
enum Distro {
ArchLinux,
DebianLenny,
DebianSqueeze,
DebianWheezy,
DebianJessie,
Exherbo,
RHEL4,
RHEL5,
RHEL6,
Fedora,
OpenSUSE,
UbuntuHardy,
UbuntuIntrepid,
UbuntuJaunty,
UbuntuKarmic,
UbuntuLucid,
UbuntuMaverick,
UbuntuNatty,
UbuntuOneiric,
UbuntuPrecise,
UbuntuQuantal,
UbuntuRaring,
UbuntuSaucy,
UbuntuTrusty,
UnknownDistro
};
static bool IsRedhat(enum Distro Distro) {
return Distro == Fedora || (Distro >= RHEL4 && Distro <= RHEL6);
}
static bool IsOpenSUSE(enum Distro Distro) {
return Distro == OpenSUSE;
}
static bool IsDebian(enum Distro Distro) {
return Distro >= DebianLenny && Distro <= DebianJessie;
}
static bool IsUbuntu(enum Distro Distro) {
return Distro >= UbuntuHardy && Distro <= UbuntuTrusty;
}
static Distro DetectDistro(llvm::Triple::ArchType Arch) {
OwningPtr<llvm::MemoryBuffer> File;
if (!llvm::MemoryBuffer::getFile("/etc/lsb-release", File)) {
StringRef Data = File.get()->getBuffer();
SmallVector<StringRef, 8> Lines;
Data.split(Lines, "\n");
Distro Version = UnknownDistro;
for (unsigned i = 0, s = Lines.size(); i != s; ++i)
if (Version == UnknownDistro && Lines[i].startswith("DISTRIB_CODENAME="))
Version = llvm::StringSwitch<Distro>(Lines[i].substr(17))
.Case("hardy", UbuntuHardy)
.Case("intrepid", UbuntuIntrepid)
.Case("jaunty", UbuntuJaunty)
.Case("karmic", UbuntuKarmic)
.Case("lucid", UbuntuLucid)
.Case("maverick", UbuntuMaverick)
.Case("natty", UbuntuNatty)
.Case("oneiric", UbuntuOneiric)
.Case("precise", UbuntuPrecise)
.Case("quantal", UbuntuQuantal)
.Case("raring", UbuntuRaring)
.Case("saucy", UbuntuSaucy)
.Case("trusty", UbuntuTrusty)
.Default(UnknownDistro);
return Version;
}
if (!llvm::MemoryBuffer::getFile("/etc/redhat-release", File)) {
StringRef Data = File.get()->getBuffer();
if (Data.startswith("Fedora release"))
return Fedora;
else if (Data.startswith("Red Hat Enterprise Linux") &&
Data.find("release 6") != StringRef::npos)
return RHEL6;
else if ((Data.startswith("Red Hat Enterprise Linux") ||
Data.startswith("CentOS")) &&
Data.find("release 5") != StringRef::npos)
return RHEL5;
else if ((Data.startswith("Red Hat Enterprise Linux") ||
Data.startswith("CentOS")) &&
Data.find("release 4") != StringRef::npos)
return RHEL4;
return UnknownDistro;
}
if (!llvm::MemoryBuffer::getFile("/etc/debian_version", File)) {
StringRef Data = File.get()->getBuffer();
if (Data[0] == '5')
return DebianLenny;
else if (Data.startswith("squeeze/sid") || Data[0] == '6')
return DebianSqueeze;
else if (Data.startswith("wheezy/sid") || Data[0] == '7')
return DebianWheezy;
else if (Data.startswith("jessie/sid") || Data[0] == '8')
return DebianJessie;
return UnknownDistro;
}
if (llvm::sys::fs::exists("/etc/SuSE-release"))
return OpenSUSE;
if (llvm::sys::fs::exists("/etc/exherbo-release"))
return Exherbo;
if (llvm::sys::fs::exists("/etc/arch-release"))
return ArchLinux;
return UnknownDistro;
}
/// \brief Get our best guess at the multiarch triple for a target.
///
/// Debian-based systems are starting to use a multiarch setup where they use
/// a target-triple directory in the library and header search paths.
/// Unfortunately, this triple does not align with the vanilla target triple,
/// so we provide a rough mapping here.
static std::string getMultiarchTriple(const llvm::Triple TargetTriple,
StringRef SysRoot) {
// For most architectures, just use whatever we have rather than trying to be
// clever.
switch (TargetTriple.getArch()) {
default:
return TargetTriple.str();
// We use the existence of '/lib/<triple>' as a directory to detect some
// common linux triples that don't quite match the Clang triple for both
// 32-bit and 64-bit targets. Multiarch fixes its install triples to these
// regardless of what the actual target triple is.
case llvm::Triple::arm:
case llvm::Triple::thumb:
if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) {
if (llvm::sys::fs::exists(SysRoot + "/lib/arm-linux-gnueabihf"))
return "arm-linux-gnueabihf";
} else {
if (llvm::sys::fs::exists(SysRoot + "/lib/arm-linux-gnueabi"))
return "arm-linux-gnueabi";
}
return TargetTriple.str();
case llvm::Triple::x86:
if (llvm::sys::fs::exists(SysRoot + "/lib/i386-linux-gnu"))
return "i386-linux-gnu";
return TargetTriple.str();
case llvm::Triple::x86_64:
if (llvm::sys::fs::exists(SysRoot + "/lib/x86_64-linux-gnu"))
return "x86_64-linux-gnu";
return TargetTriple.str();
case llvm::Triple::aarch64:
if (llvm::sys::fs::exists(SysRoot + "/lib/aarch64-linux-gnu"))
return "aarch64-linux-gnu";
return TargetTriple.str();
case llvm::Triple::mips:
if (llvm::sys::fs::exists(SysRoot + "/lib/mips-linux-gnu"))
return "mips-linux-gnu";
return TargetTriple.str();
case llvm::Triple::mipsel:
if (llvm::sys::fs::exists(SysRoot + "/lib/mipsel-linux-gnu"))
return "mipsel-linux-gnu";
return TargetTriple.str();
case llvm::Triple::ppc:
if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc-linux-gnuspe"))
return "powerpc-linux-gnuspe";
if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc-linux-gnu"))
return "powerpc-linux-gnu";
return TargetTriple.str();
case llvm::Triple::ppc64:
if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc64-linux-gnu"))
return "powerpc64-linux-gnu";
case llvm::Triple::ppc64le:
if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc64le-linux-gnu"))
return "powerpc64le-linux-gnu";
return TargetTriple.str();
}
}
static void addPathIfExists(Twine Path, ToolChain::path_list &Paths) {
if (llvm::sys::fs::exists(Path)) Paths.push_back(Path.str());
}
static StringRef getMultilibDir(const llvm::Triple &Triple,
const ArgList &Args) {
if (isMipsArch(Triple.getArch())) {
// lib32 directory has a special meaning on MIPS targets.
// It contains N32 ABI binaries. Use this folder if produce
// code for N32 ABI only.
if (hasMipsN32ABIArg(Args))
return "lib32";
return Triple.isArch32Bit() ? "lib" : "lib64";
}
// It happens that only x86 and PPC use the 'lib32' variant of multilib, and
// using that variant while targeting other architectures causes problems
// because the libraries are laid out in shared system roots that can't cope
// with a 'lib32' multilib search path being considered. So we only enable
// them when we know we may need it.
//
// FIXME: This is a bit of a hack. We should really unify this code for
// reasoning about multilib spellings with the lib dir spellings in the
// GCCInstallationDetector, but that is a more significant refactoring.
if (Triple.getArch() == llvm::Triple::x86 ||
Triple.getArch() == llvm::Triple::ppc)
return "lib32";
return Triple.isArch32Bit() ? "lib" : "lib64";
}
Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
GCCInstallation.init(Triple, Args);
llvm::Triple::ArchType Arch = Triple.getArch();
std::string SysRoot = computeSysRoot();
// Cross-compiling binutils and GCC installations (vanilla and openSUSE at
// least) put various tools in a triple-prefixed directory off of the parent
// of the GCC installation. We use the GCC triple here to ensure that we end
// up with tools that support the same amount of cross compiling as the
// detected GCC installation. For example, if we find a GCC installation
// targeting x86_64, but it is a bi-arch GCC installation, it can also be
// used to target i386.
// FIXME: This seems unlikely to be Linux-specific.
ToolChain::path_list &PPaths = getProgramPaths();
PPaths.push_back(Twine(GCCInstallation.getParentLibPath() + "/../" +
GCCInstallation.getTriple().str() + "/bin").str());
Linker = GetProgramPath("ld");
Distro Distro = DetectDistro(Arch);
if (IsOpenSUSE(Distro) || IsUbuntu(Distro)) {
ExtraOpts.push_back("-z");
ExtraOpts.push_back("relro");
}
if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb)
ExtraOpts.push_back("-X");
const bool IsAndroid = Triple.getEnvironment() == llvm::Triple::Android;
const bool IsMips = isMipsArch(Arch);
if (IsMips && !SysRoot.empty())
ExtraOpts.push_back("--sysroot=" + SysRoot);
// Do not use 'gnu' hash style for Mips targets because .gnu.hash
// and the MIPS ABI require .dynsym to be sorted in different ways.
// .gnu.hash needs symbols to be grouped by hash code whereas the MIPS
// ABI requires a mapping between the GOT and the symbol table.
// Android loader does not support .gnu.hash.
if (!IsMips && !IsAndroid) {
if (IsRedhat(Distro) || IsOpenSUSE(Distro) ||
(IsUbuntu(Distro) && Distro >= UbuntuMaverick))
ExtraOpts.push_back("--hash-style=gnu");
if (IsDebian(Distro) || IsOpenSUSE(Distro) || Distro == UbuntuLucid ||
Distro == UbuntuJaunty || Distro == UbuntuKarmic)
ExtraOpts.push_back("--hash-style=both");
}
if (IsRedhat(Distro))
ExtraOpts.push_back("--no-add-needed");
if (Distro == DebianSqueeze || Distro == DebianWheezy ||
Distro == DebianJessie || IsOpenSUSE(Distro) ||
(IsRedhat(Distro) && Distro != RHEL4 && Distro != RHEL5) ||
(IsUbuntu(Distro) && Distro >= UbuntuKarmic))
ExtraOpts.push_back("--build-id");
if (IsOpenSUSE(Distro))
ExtraOpts.push_back("--enable-new-dtags");
// The selection of paths to try here is designed to match the patterns which
// the GCC driver itself uses, as this is part of the GCC-compatible driver.
// This was determined by running GCC in a fake filesystem, creating all
// possible permutations of these directories, and seeing which ones it added
// to the link paths.
path_list &Paths = getFilePaths();
const std::string Multilib = getMultilibDir(Triple, Args);
const std::string MultiarchTriple = getMultiarchTriple(Triple, SysRoot);
// Add the multilib suffixed paths where they are available.
if (GCCInstallation.isValid()) {
const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
const std::string &LibPath = GCCInstallation.getParentLibPath();
// Sourcery CodeBench MIPS toolchain holds some libraries under
// a biarch-like suffix of the GCC installation.
//
// FIXME: It would be cleaner to model this as a variant of bi-arch. IE,
// instead of a '64' biarch suffix it would be 'el' or something.
if (IsAndroid && IsMips && isMips32r2(Args)) {
assert(GCCInstallation.getBiarchSuffix().empty() &&
"Unexpected bi-arch suffix");
addPathIfExists(GCCInstallation.getInstallPath() + "/mips-r2", Paths);
} else {
addPathIfExists((GCCInstallation.getInstallPath() +
GCCInstallation.getMIPSABIDirSuffix() +
GCCInstallation.getBiarchSuffix()),
Paths);
}
// GCC cross compiling toolchains will install target libraries which ship
// as part of the toolchain under <prefix>/<triple>/<libdir> rather than as
// any part of the GCC installation in
// <prefix>/<libdir>/gcc/<triple>/<version>. This decision is somewhat
// debatable, but is the reality today. We need to search this tree even
// when we have a sysroot somewhere else. It is the responsibility of
// whomever is doing the cross build targetting a sysroot using a GCC
// installation that is *not* within the system root to ensure two things:
//
// 1) Any DSOs that are linked in from this tree or from the install path
// above must be preasant on the system root and found via an
// appropriate rpath.
// 2) There must not be libraries installed into
// <prefix>/<triple>/<libdir> unless they should be preferred over
// those within the system root.
//
// Note that this matches the GCC behavior. See the below comment for where
// Clang diverges from GCC's behavior.
addPathIfExists(LibPath + "/../" + GCCTriple.str() + "/lib/../" + Multilib +
GCCInstallation.getMIPSABIDirSuffix(),
Paths);
// If the GCC installation we found is inside of the sysroot, we want to
// prefer libraries installed in the parent prefix of the GCC installation.
// It is important to *not* use these paths when the GCC installation is
// outside of the system root as that can pick up unintended libraries.
// This usually happens when there is an external cross compiler on the
// host system, and a more minimal sysroot available that is the target of
// the cross. Note that GCC does include some of these directories in some
// configurations but this seems somewhere between questionable and simply
// a bug.
if (StringRef(LibPath).startswith(SysRoot)) {
addPathIfExists(LibPath + "/" + MultiarchTriple, Paths);
addPathIfExists(LibPath + "/../" + Multilib, Paths);
}
}
addPathIfExists(SysRoot + "/lib/" + MultiarchTriple, Paths);
addPathIfExists(SysRoot + "/lib/../" + Multilib, Paths);
addPathIfExists(SysRoot + "/usr/lib/" + MultiarchTriple, Paths);
addPathIfExists(SysRoot + "/usr/lib/../" + Multilib, Paths);
// Try walking via the GCC triple path in case of biarch or multiarch GCC
// installations with strange symlinks.
if (GCCInstallation.isValid()) {
addPathIfExists(SysRoot + "/usr/lib/" + GCCInstallation.getTriple().str() +
"/../../" + Multilib, Paths);
// Add the non-multilib suffixed paths (if potentially different).
const std::string &LibPath = GCCInstallation.getParentLibPath();
const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
if (!GCCInstallation.getBiarchSuffix().empty())
addPathIfExists(GCCInstallation.getInstallPath() +
GCCInstallation.getMIPSABIDirSuffix(), Paths);
// See comments above on the multilib variant for details of why this is
// included even from outside the sysroot.
addPathIfExists(LibPath + "/../" + GCCTriple.str() +
"/lib" + GCCInstallation.getMIPSABIDirSuffix(), Paths);
// See comments above on the multilib variant for details of why this is
// only included from within the sysroot.
if (StringRef(LibPath).startswith(SysRoot))
addPathIfExists(LibPath, Paths);
}
addPathIfExists(SysRoot + "/lib", Paths);
addPathIfExists(SysRoot + "/usr/lib", Paths);
}
bool FreeBSD::HasNativeLLVMSupport() const {
return true;
}
bool Linux::HasNativeLLVMSupport() const {
return true;
}
Tool *Linux::buildLinker() const {
return new tools::gnutools::Link(*this);
}
Tool *Linux::buildAssembler() const {
return new tools::gnutools::Assemble(*this);
-}
-
-void Linux::addClangTargetOptions(const ArgList &DriverArgs,
- ArgStringList &CC1Args) const {
- const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion();
- bool UseInitArrayDefault =
- !V.isOlderThan(4, 7, 0) ||
- getTriple().getArch() == llvm::Triple::aarch64 ||
- getTriple().getEnvironment() == llvm::Triple::Android;
- if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
- options::OPT_fno_use_init_array,
- UseInitArrayDefault))
- CC1Args.push_back("-fuse-init-array");
}
std::string Linux::computeSysRoot() const {
if (!getDriver().SysRoot.empty())
return getDriver().SysRoot;
if (!GCCInstallation.isValid() || !isMipsArch(getTriple().getArch()))
return std::string();
// Standalone MIPS toolchains use different names for sysroot folder
// and put it into different places. Here we try to check some known
// variants.
const StringRef InstallDir = GCCInstallation.getInstallPath();
const StringRef TripleStr = GCCInstallation.getTriple().str();
const StringRef MIPSABIDirSuffix = GCCInstallation.getMIPSABIDirSuffix();
std::string Path = (InstallDir + "/../../../../" + TripleStr + "/libc" +
MIPSABIDirSuffix).str();
if (llvm::sys::fs::exists(Path))
return Path;
Path = (InstallDir + "/../../../../sysroot" + MIPSABIDirSuffix).str();
if (llvm::sys::fs::exists(Path))
return Path;
return std::string();
}
void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
const Driver &D = getDriver();
std::string SysRoot = computeSysRoot();
if (DriverArgs.hasArg(options::OPT_nostdinc))
return;
if (!DriverArgs.hasArg(options::OPT_nostdlibinc))
addSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/local/include");
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
SmallString<128> P(D.ResourceDir);
llvm::sys::path::append(P, "include");
addSystemInclude(DriverArgs, CC1Args, P.str());
}
if (DriverArgs.hasArg(options::OPT_nostdlibinc))
return;
// Check for configure-time C include directories.
StringRef CIncludeDirs(C_INCLUDE_DIRS);
if (CIncludeDirs != "") {
SmallVector<StringRef, 5> dirs;
CIncludeDirs.split(dirs, ":");
for (SmallVectorImpl<StringRef>::iterator I = dirs.begin(), E = dirs.end();
I != E; ++I) {
StringRef Prefix = llvm::sys::path::is_absolute(*I) ? SysRoot : "";
addExternCSystemInclude(DriverArgs, CC1Args, Prefix + *I);
}
return;
}
// Lacking those, try to detect the correct set of system includes for the
// target triple.
// Sourcery CodeBench and modern FSF Mips toolchains put extern C
// system includes under three additional directories.
if (GCCInstallation.isValid() && isMipsArch(getTriple().getArch())) {
addExternCSystemIncludeIfExists(
DriverArgs, CC1Args, GCCInstallation.getInstallPath() + "/include");
addExternCSystemIncludeIfExists(
DriverArgs, CC1Args,
GCCInstallation.getInstallPath() + "/../../../../" +
GCCInstallation.getTriple().str() + "/libc/usr/include");
addExternCSystemIncludeIfExists(
DriverArgs, CC1Args,
GCCInstallation.getInstallPath() + "/../../../../sysroot/usr/include");
}
// Implement generic Debian multiarch support.
const StringRef X86_64MultiarchIncludeDirs[] = {
"/usr/include/x86_64-linux-gnu",
// FIXME: These are older forms of multiarch. It's not clear that they're
// in use in any released version of Debian, so we should consider
// removing them.
"/usr/include/i686-linux-gnu/64", "/usr/include/i486-linux-gnu/64"
};
const StringRef X86MultiarchIncludeDirs[] = {
"/usr/include/i386-linux-gnu",
// FIXME: These are older forms of multiarch. It's not clear that they're
// in use in any released version of Debian, so we should consider
// removing them.
"/usr/include/x86_64-linux-gnu/32", "/usr/include/i686-linux-gnu",
"/usr/include/i486-linux-gnu"
};
const StringRef AArch64MultiarchIncludeDirs[] = {
"/usr/include/aarch64-linux-gnu"
};
const StringRef ARMMultiarchIncludeDirs[] = {
"/usr/include/arm-linux-gnueabi"
};
const StringRef ARMHFMultiarchIncludeDirs[] = {
"/usr/include/arm-linux-gnueabihf"
};
const StringRef MIPSMultiarchIncludeDirs[] = {
"/usr/include/mips-linux-gnu"
};
const StringRef MIPSELMultiarchIncludeDirs[] = {
"/usr/include/mipsel-linux-gnu"
};
const StringRef PPCMultiarchIncludeDirs[] = {
"/usr/include/powerpc-linux-gnu"
};
const StringRef PPC64MultiarchIncludeDirs[] = {
"/usr/include/powerpc64-linux-gnu"
};
ArrayRef<StringRef> MultiarchIncludeDirs;
if (getTriple().getArch() == llvm::Triple::x86_64) {
MultiarchIncludeDirs = X86_64MultiarchIncludeDirs;
} else if (getTriple().getArch() == llvm::Triple::x86) {
MultiarchIncludeDirs = X86MultiarchIncludeDirs;
} else if (getTriple().getArch() == llvm::Triple::aarch64) {
MultiarchIncludeDirs = AArch64MultiarchIncludeDirs;
} else if (getTriple().getArch() == llvm::Triple::arm) {
if (getTriple().getEnvironment() == llvm::Triple::GNUEABIHF)
MultiarchIncludeDirs = ARMHFMultiarchIncludeDirs;
else
MultiarchIncludeDirs = ARMMultiarchIncludeDirs;
} else if (getTriple().getArch() == llvm::Triple::mips) {
MultiarchIncludeDirs = MIPSMultiarchIncludeDirs;
} else if (getTriple().getArch() == llvm::Triple::mipsel) {
MultiarchIncludeDirs = MIPSELMultiarchIncludeDirs;
} else if (getTriple().getArch() == llvm::Triple::ppc) {
MultiarchIncludeDirs = PPCMultiarchIncludeDirs;
} else if (getTriple().getArch() == llvm::Triple::ppc64) {
MultiarchIncludeDirs = PPC64MultiarchIncludeDirs;
}
for (ArrayRef<StringRef>::iterator I = MultiarchIncludeDirs.begin(),
E = MultiarchIncludeDirs.end();
I != E; ++I) {
if (llvm::sys::fs::exists(SysRoot + *I)) {
addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + *I);
break;
}
}
if (getTriple().getOS() == llvm::Triple::RTEMS)
return;
// Add an include of '/include' directly. This isn't provided by default by
// system GCCs, but is often used with cross-compiling GCCs, and harmless to
// add even when Clang is acting as-if it were a system compiler.
addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/include");
addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/include");
}
/// \brief Helper to add the three variant paths for a libstdc++ installation.
/*static*/ bool Linux::addLibStdCXXIncludePaths(Twine Base, Twine TargetArchDir,
const ArgList &DriverArgs,
ArgStringList &CC1Args) {
if (!llvm::sys::fs::exists(Base))
return false;
addSystemInclude(DriverArgs, CC1Args, Base);
addSystemInclude(DriverArgs, CC1Args, Base + "/" + TargetArchDir);
addSystemInclude(DriverArgs, CC1Args, Base + "/backward");
return true;
}
/// \brief Helper to add an extra variant path for an (Ubuntu) multilib
/// libstdc++ installation.
/*static*/ bool Linux::addLibStdCXXIncludePaths(Twine Base, Twine Suffix,
Twine TargetArchDir,
Twine BiarchSuffix,
Twine MIPSABIDirSuffix,
const ArgList &DriverArgs,
ArgStringList &CC1Args) {
if (!addLibStdCXXIncludePaths(Base + Suffix,
TargetArchDir + MIPSABIDirSuffix + BiarchSuffix,
DriverArgs, CC1Args))
return false;
addSystemInclude(DriverArgs, CC1Args, Base + "/" + TargetArchDir + Suffix
+ MIPSABIDirSuffix + BiarchSuffix);
return true;
}
void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
DriverArgs.hasArg(options::OPT_nostdincxx))
return;
// Check if libc++ has been enabled and provide its include paths if so.
if (GetCXXStdlibType(DriverArgs) == ToolChain::CST_Libcxx) {
// libc++ is always installed at a fixed path on Linux currently.
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/v1");
return;
}
// We need a detected GCC installation on Linux to provide libstdc++'s
// headers. We handled the libc++ case above.
if (!GCCInstallation.isValid())
return;
// By default, look for the C++ headers in an include directory adjacent to
// the lib directory of the GCC installation. Note that this is expect to be
// equivalent to '/usr/include/c++/X.Y' in almost all cases.
StringRef LibDir = GCCInstallation.getParentLibPath();
StringRef InstallDir = GCCInstallation.getInstallPath();
StringRef TripleStr = GCCInstallation.getTriple().str();
StringRef MIPSABIDirSuffix = GCCInstallation.getMIPSABIDirSuffix();
StringRef BiarchSuffix = GCCInstallation.getBiarchSuffix();
const GCCVersion &Version = GCCInstallation.getVersion();
if (addLibStdCXXIncludePaths(LibDir.str() + "/../include",
"/c++/" + Version.Text, TripleStr, BiarchSuffix,
MIPSABIDirSuffix, DriverArgs, CC1Args))
return;
const std::string IncludePathCandidates[] = {
// Gentoo is weird and places its headers inside the GCC install, so if the
// first attempt to find the headers fails, try these patterns.
InstallDir.str() + "/include/g++-v" + Version.MajorStr + "." +
Version.MinorStr,
InstallDir.str() + "/include/g++-v" + Version.MajorStr,
// Android standalone toolchain has C++ headers in yet another place.
LibDir.str() + "/../" + TripleStr.str() + "/include/c++/" + Version.Text,
// Freescale SDK C++ headers are directly in <sysroot>/usr/include/c++,
// without a subdirectory corresponding to the gcc version.
LibDir.str() + "/../include/c++",
};
for (unsigned i = 0; i < llvm::array_lengthof(IncludePathCandidates); ++i) {
if (addLibStdCXXIncludePaths(IncludePathCandidates[i],
TripleStr + MIPSABIDirSuffix + BiarchSuffix,
DriverArgs, CC1Args))
break;
}
}
bool Linux::isPIEDefault() const {
return getSanitizerArgs().hasZeroBaseShadow();
}
/// DragonFly - DragonFly tool chain which can call as(1) and ld(1) directly.
DragonFly::DragonFly(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
// Path mangling to find libexec
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
getFilePaths().push_back(getDriver().Dir + "/../lib");
getFilePaths().push_back("/usr/lib");
if (llvm::sys::fs::exists("/usr/lib/gcc47"))
getFilePaths().push_back("/usr/lib/gcc47");
else
getFilePaths().push_back("/usr/lib/gcc44");
}
Tool *DragonFly::buildAssembler() const {
return new tools::dragonfly::Assemble(*this);
}
Tool *DragonFly::buildLinker() const {
return new tools::dragonfly::Link(*this);
}
/// XCore tool chain
XCore::XCore(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args) : ToolChain(D, Triple, Args) {
// ProgramPaths are found via 'PATH' environment variable.
}
Tool *XCore::buildAssembler() const {
return new tools::XCore::Assemble(*this);
}
Tool *XCore::buildLinker() const {
return new tools::XCore::Link(*this);
}
bool XCore::isPICDefault() const {
return false;
}
bool XCore::isPIEDefault() const {
return false;
}
bool XCore::isPICDefaultForced() const {
return false;
}
bool XCore::SupportsProfiling() const {
return false;
}
bool XCore::hasBlocksRuntime() const {
return false;
}
void XCore::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (DriverArgs.hasArg(options::OPT_nostdinc) ||
DriverArgs.hasArg(options::OPT_nostdlibinc))
return;
if (const char *cl_include_dir = getenv("XCC_C_INCLUDE_PATH")) {
SmallVector<StringRef, 4> Dirs;
const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator,'\0'};
StringRef(cl_include_dir).split(Dirs, StringRef(EnvPathSeparatorStr));
ArrayRef<StringRef> DirVec(Dirs);
addSystemIncludes(DriverArgs, CC1Args, DirVec);
}
}
void XCore::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
CC1Args.push_back("-nostdsysteminc");
}
void XCore::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (DriverArgs.hasArg(options::OPT_nostdinc) ||
DriverArgs.hasArg(options::OPT_nostdlibinc))
return;
if (const char *cl_include_dir = getenv("XCC_CPLUS_INCLUDE_PATH")) {
SmallVector<StringRef, 4> Dirs;
const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator,'\0'};
StringRef(cl_include_dir).split(Dirs, StringRef(EnvPathSeparatorStr));
ArrayRef<StringRef> DirVec(Dirs);
addSystemIncludes(DriverArgs, CC1Args, DirVec);
}
}
void XCore::AddCXXStdlibLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
// We don't output any lib args. This is handled by xcc.
}
Index: head/contrib/llvm/tools/clang/lib/Driver/ToolChains.h
===================================================================
--- head/contrib/llvm/tools/clang/lib/Driver/ToolChains.h (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/Driver/ToolChains.h (revision 265925)
@@ -1,706 +1,706 @@
//===--- ToolChains.h - ToolChain Implementations ---------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef CLANG_LIB_DRIVER_TOOLCHAINS_H_
#define CLANG_LIB_DRIVER_TOOLCHAINS_H_
#include "Tools.h"
#include "clang/Basic/VersionTuple.h"
#include "clang/Driver/Action.h"
#include "clang/Driver/ToolChain.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Compiler.h"
#include <vector>
#include <set>
namespace clang {
namespace driver {
namespace toolchains {
/// Generic_GCC - A tool chain using the 'gcc' command to perform
/// all subcommands; this relies on gcc translating the majority of
/// command line options.
class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
protected:
/// \brief Struct to store and manipulate GCC versions.
///
/// We rely on assumptions about the form and structure of GCC version
/// numbers: they consist of at most three '.'-separated components, and each
/// component is a non-negative integer except for the last component. For
/// the last component we are very flexible in order to tolerate release
/// candidates or 'x' wildcards.
///
/// Note that the ordering established among GCCVersions is based on the
/// preferred version string to use. For example we prefer versions without
/// a hard-coded patch number to those with a hard coded patch number.
///
/// Currently this doesn't provide any logic for textual suffixes to patches
/// in the way that (for example) Debian's version format does. If that ever
/// becomes necessary, it can be added.
struct GCCVersion {
/// \brief The unparsed text of the version.
std::string Text;
/// \brief The parsed major, minor, and patch numbers.
int Major, Minor, Patch;
/// \brief The text of the parsed major, and major+minor versions.
std::string MajorStr, MinorStr;
/// \brief Any textual suffix on the patch number.
std::string PatchSuffix;
static GCCVersion Parse(StringRef VersionText);
bool isOlderThan(int RHSMajor, int RHSMinor, int RHSPatch,
StringRef RHSPatchSuffix = StringRef()) const;
bool operator<(const GCCVersion &RHS) const {
return isOlderThan(RHS.Major, RHS.Minor, RHS.Patch, RHS.PatchSuffix);
}
bool operator>(const GCCVersion &RHS) const { return RHS < *this; }
bool operator<=(const GCCVersion &RHS) const { return !(*this > RHS); }
bool operator>=(const GCCVersion &RHS) const { return !(*this < RHS); }
};
/// \brief This is a class to find a viable GCC installation for Clang to
/// use.
///
/// This class tries to find a GCC installation on the system, and report
/// information about it. It starts from the host information provided to the
/// Driver, and has logic for fuzzing that where appropriate.
class GCCInstallationDetector {
bool IsValid;
const Driver &D;
llvm::Triple GCCTriple;
// FIXME: These might be better as path objects.
std::string GCCInstallPath;
std::string GCCBiarchSuffix;
std::string GCCParentLibPath;
std::string GCCMIPSABIDirSuffix;
GCCVersion Version;
// We retain the list of install paths that were considered and rejected in
// order to print out detailed information in verbose mode.
std::set<std::string> CandidateGCCInstallPaths;
public:
GCCInstallationDetector(const Driver &D) : IsValid(false), D(D) {}
void init(const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args);
/// \brief Check whether we detected a valid GCC install.
bool isValid() const { return IsValid; }
/// \brief Get the GCC triple for the detected install.
const llvm::Triple &getTriple() const { return GCCTriple; }
/// \brief Get the detected GCC installation path.
StringRef getInstallPath() const { return GCCInstallPath; }
/// \brief Get the detected GCC installation path suffix for the bi-arch
/// target variant.
StringRef getBiarchSuffix() const { return GCCBiarchSuffix; }
/// \brief Get the detected GCC parent lib path.
StringRef getParentLibPath() const { return GCCParentLibPath; }
/// \brief Get the detected GCC MIPS ABI directory suffix.
///
/// This is used as a suffix both to the install directory of GCC and as
/// a suffix to its parent lib path in order to select a MIPS ABI-specific
/// subdirectory.
///
/// This will always be empty for any non-MIPS target.
///
// FIXME: This probably shouldn't exist at all, and should be factored
// into the multiarch and/or biarch support. Please don't add more uses of
// this interface, it is meant as a legacy crutch for the MIPS driver
// logic.
StringRef getMIPSABIDirSuffix() const { return GCCMIPSABIDirSuffix; }
/// \brief Get the detected GCC version string.
const GCCVersion &getVersion() const { return Version; }
/// \brief Print information about the detected GCC installation.
void print(raw_ostream &OS) const;
private:
static void
CollectLibDirsAndTriples(const llvm::Triple &TargetTriple,
const llvm::Triple &BiarchTriple,
SmallVectorImpl<StringRef> &LibDirs,
SmallVectorImpl<StringRef> &TripleAliases,
SmallVectorImpl<StringRef> &BiarchLibDirs,
SmallVectorImpl<StringRef> &BiarchTripleAliases);
void ScanLibDirForGCCTriple(llvm::Triple::ArchType TargetArch,
const llvm::opt::ArgList &Args,
const std::string &LibDir,
StringRef CandidateTriple,
bool NeedsBiarchSuffix = false);
void findMIPSABIDirSuffix(std::string &Suffix,
llvm::Triple::ArchType TargetArch, StringRef Path,
const llvm::opt::ArgList &Args);
};
GCCInstallationDetector GCCInstallation;
public:
Generic_GCC(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
~Generic_GCC();
virtual void printVerboseInfo(raw_ostream &OS) const;
virtual bool IsUnwindTablesDefault() const;
virtual bool isPICDefault() const;
virtual bool isPIEDefault() const;
virtual bool isPICDefaultForced() const;
+ virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args) const;
protected:
virtual Tool *getTool(Action::ActionClass AC) const;
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
/// \name ToolChain Implementation Helper Functions
/// @{
/// \brief Check whether the target triple's architecture is 64-bits.
bool isTarget64Bit() const { return getTriple().isArch64Bit(); }
/// \brief Check whether the target triple's architecture is 32-bits.
bool isTarget32Bit() const { return getTriple().isArch32Bit(); }
/// @}
private:
mutable OwningPtr<tools::gcc::Preprocess> Preprocess;
mutable OwningPtr<tools::gcc::Precompile> Precompile;
mutable OwningPtr<tools::gcc::Compile> Compile;
};
/// Darwin - The base Darwin tool chain.
class LLVM_LIBRARY_VISIBILITY Darwin : public ToolChain {
public:
/// The host version.
unsigned DarwinVersion[3];
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
virtual Tool *getTool(Action::ActionClass AC) const;
private:
mutable OwningPtr<tools::darwin::Lipo> Lipo;
mutable OwningPtr<tools::darwin::Dsymutil> Dsymutil;
mutable OwningPtr<tools::darwin::VerifyDebug> VerifyDebug;
/// Whether the information on the target has been initialized.
//
// FIXME: This should be eliminated. What we want to do is make this part of
// the "default target for arguments" selection process, once we get out of
// the argument translation business.
mutable bool TargetInitialized;
/// Whether we are targeting iPhoneOS target.
mutable bool TargetIsIPhoneOS;
/// Whether we are targeting the iPhoneOS simulator target.
mutable bool TargetIsIPhoneOSSimulator;
/// The OS version we are targeting.
mutable VersionTuple TargetVersion;
private:
/// The default macosx-version-min of this tool chain; empty until
/// initialized.
std::string MacosxVersionMin;
/// The default ios-version-min of this tool chain; empty until
/// initialized.
std::string iOSVersionMin;
private:
void AddDeploymentTarget(llvm::opt::DerivedArgList &Args) const;
public:
Darwin(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
~Darwin();
std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args,
types::ID InputType) const;
/// @name Darwin Specific Toolchain API
/// {
// FIXME: Eliminate these ...Target functions and derive separate tool chains
// for these targets and put version in constructor.
void setTarget(bool IsIPhoneOS, unsigned Major, unsigned Minor,
unsigned Micro, bool IsIOSSim) const {
assert((!IsIOSSim || IsIPhoneOS) && "Unexpected deployment target!");
// FIXME: For now, allow reinitialization as long as values don't
// change. This will go away when we move away from argument translation.
if (TargetInitialized && TargetIsIPhoneOS == IsIPhoneOS &&
TargetIsIPhoneOSSimulator == IsIOSSim &&
TargetVersion == VersionTuple(Major, Minor, Micro))
return;
assert(!TargetInitialized && "Target already initialized!");
TargetInitialized = true;
TargetIsIPhoneOS = IsIPhoneOS;
TargetIsIPhoneOSSimulator = IsIOSSim;
TargetVersion = VersionTuple(Major, Minor, Micro);
}
bool isTargetIPhoneOS() const {
assert(TargetInitialized && "Target not initialized!");
return TargetIsIPhoneOS;
}
bool isTargetIOSSimulator() const {
assert(TargetInitialized && "Target not initialized!");
return TargetIsIPhoneOSSimulator;
}
bool isTargetMacOS() const {
return !isTargetIOSSimulator() && !isTargetIPhoneOS();
}
bool isTargetInitialized() const { return TargetInitialized; }
VersionTuple getTargetVersion() const {
assert(TargetInitialized && "Target not initialized!");
return TargetVersion;
}
/// getDarwinArchName - Get the "Darwin" arch name for a particular compiler
/// invocation. For example, Darwin treats different ARM variations as
/// distinct architectures.
StringRef getDarwinArchName(const llvm::opt::ArgList &Args) const;
bool isIPhoneOSVersionLT(unsigned V0, unsigned V1=0, unsigned V2=0) const {
assert(isTargetIPhoneOS() && "Unexpected call for OS X target!");
return TargetVersion < VersionTuple(V0, V1, V2);
}
bool isMacosxVersionLT(unsigned V0, unsigned V1=0, unsigned V2=0) const {
assert(!isTargetIPhoneOS() && "Unexpected call for iPhoneOS target!");
return TargetVersion < VersionTuple(V0, V1, V2);
}
/// AddLinkARCArgs - Add the linker arguments to link the ARC runtime library.
virtual void AddLinkARCArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const = 0;
/// AddLinkRuntimeLibArgs - Add the linker arguments to link the compiler
/// runtime library.
virtual void
AddLinkRuntimeLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const = 0;
/// }
/// @name ToolChain Implementation
/// {
virtual types::ID LookupTypeForExtension(const char *Ext) const;
virtual bool HasNativeLLVMSupport() const;
virtual ObjCRuntime getDefaultObjCRuntime(bool isNonFragile) const;
virtual bool hasBlocksRuntime() const;
virtual llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args,
const char *BoundArch) const;
virtual bool IsBlocksDefault() const {
// Always allow blocks on Darwin; users interested in versioning are
// expected to use /usr/include/Blocks.h.
return true;
}
virtual bool IsIntegratedAssemblerDefault() const {
// Default integrated assembler to on for Darwin.
return true;
}
virtual bool IsMathErrnoDefault() const {
return false;
}
virtual bool IsEncodeExtendedBlockSignatureDefault() const {
return true;
}
virtual bool IsObjCNonFragileABIDefault() const {
// Non-fragile ABI is default for everything but i386.
return getTriple().getArch() != llvm::Triple::x86;
}
virtual bool UseObjCMixedDispatch() const {
// This is only used with the non-fragile ABI and non-legacy dispatch.
// Mixed dispatch is used everywhere except OS X before 10.6.
return !(!isTargetIPhoneOS() && isMacosxVersionLT(10, 6));
}
virtual bool IsUnwindTablesDefault() const;
virtual unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const {
// Stack protectors default to on for user code on 10.5,
// and for everything in 10.6 and beyond
return isTargetIPhoneOS() ||
(!isMacosxVersionLT(10, 6) ||
(!isMacosxVersionLT(10, 5) && !KernelOrKext));
}
virtual RuntimeLibType GetDefaultRuntimeLibType() const {
return ToolChain::RLT_CompilerRT;
}
virtual bool isPICDefault() const;
virtual bool isPIEDefault() const;
virtual bool isPICDefaultForced() const;
virtual bool SupportsProfiling() const;
virtual bool SupportsObjCGC() const;
virtual void CheckObjCARC() const;
virtual bool UseDwarfDebugFlags() const;
virtual bool UseSjLjExceptions() const;
/// }
};
/// DarwinClang - The Darwin toolchain used by Clang.
class LLVM_LIBRARY_VISIBILITY DarwinClang : public Darwin {
public:
DarwinClang(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
/// @name Darwin ToolChain Implementation
/// {
virtual void AddLinkRuntimeLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
void AddLinkRuntimeLib(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
const char *DarwinStaticLib,
bool AlwaysLink = false) const;
virtual void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
virtual void AddCCKextLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
virtual void AddLinkARCArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
/// }
};
/// Darwin_Generic_GCC - Generic Darwin tool chain using gcc.
class LLVM_LIBRARY_VISIBILITY Darwin_Generic_GCC : public Generic_GCC {
public:
Darwin_Generic_GCC(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args)
: Generic_GCC(D, Triple, Args) {}
std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args,
types::ID InputType) const;
virtual bool isPICDefault() const { return false; }
};
class LLVM_LIBRARY_VISIBILITY Generic_ELF : public Generic_GCC {
virtual void anchor();
public:
Generic_ELF(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args)
: Generic_GCC(D, Triple, Args) {}
virtual bool IsIntegratedAssemblerDefault() const {
// Default integrated assembler to on for x86.
return (getTriple().getArch() == llvm::Triple::aarch64 ||
getTriple().getArch() == llvm::Triple::x86 ||
getTriple().getArch() == llvm::Triple::x86_64);
}
};
class LLVM_LIBRARY_VISIBILITY AuroraUX : public Generic_GCC {
public:
AuroraUX(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
};
class LLVM_LIBRARY_VISIBILITY Solaris : public Generic_GCC {
public:
Solaris(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
virtual bool IsIntegratedAssemblerDefault() const { return true; }
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
};
class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {
public:
OpenBSD(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
virtual bool IsMathErrnoDefault() const { return false; }
virtual bool IsObjCNonFragileABIDefault() const { return true; }
virtual bool isPIEDefault() const { return true; }
virtual unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const {
return 1;
}
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
};
class LLVM_LIBRARY_VISIBILITY Bitrig : public Generic_ELF {
public:
Bitrig(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
virtual bool IsMathErrnoDefault() const { return false; }
virtual bool IsObjCNonFragileABIDefault() const { return true; }
virtual bool IsObjCLegacyDispatchDefault() const { return false; }
virtual void
AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
virtual void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
virtual unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const {
return 1;
}
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
};
class LLVM_LIBRARY_VISIBILITY FreeBSD : public Generic_ELF {
public:
FreeBSD(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
virtual bool HasNativeLLVMSupport() const;
virtual bool IsMathErrnoDefault() const { return false; }
virtual bool IsObjCNonFragileABIDefault() const { return true; }
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const;
virtual void
AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
virtual bool IsIntegratedAssemblerDefault() const {
if (getTriple().getArch() == llvm::Triple::ppc ||
getTriple().getArch() == llvm::Triple::ppc64)
return true;
return Generic_ELF::IsIntegratedAssemblerDefault();
}
virtual bool UseSjLjExceptions() const;
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
};
class LLVM_LIBRARY_VISIBILITY NetBSD : public Generic_ELF {
public:
NetBSD(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
virtual bool IsMathErrnoDefault() const { return false; }
virtual bool IsObjCNonFragileABIDefault() const { return true; }
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const;
virtual void
AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
virtual bool IsUnwindTablesDefault() const {
return true;
}
virtual bool IsIntegratedAssemblerDefault() const {
if (getTriple().getArch() == llvm::Triple::ppc)
return true;
return Generic_ELF::IsIntegratedAssemblerDefault();
}
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
};
class LLVM_LIBRARY_VISIBILITY Minix : public Generic_ELF {
public:
Minix(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
};
class LLVM_LIBRARY_VISIBILITY DragonFly : public Generic_ELF {
public:
DragonFly(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
virtual bool IsMathErrnoDefault() const { return false; }
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
};
class LLVM_LIBRARY_VISIBILITY Linux : public Generic_ELF {
public:
Linux(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
virtual bool HasNativeLLVMSupport() const;
virtual void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
- virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args) const;
virtual void
AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
virtual bool isPIEDefault() const;
std::string Linker;
std::vector<std::string> ExtraOpts;
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
private:
static bool addLibStdCXXIncludePaths(Twine Base, Twine Suffix,
Twine TargetArchDir,
Twine BiarchSuffix,
Twine MIPSABIDirSuffix,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args);
static bool addLibStdCXXIncludePaths(Twine Base, Twine TargetArchDir,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args);
std::string computeSysRoot() const;
};
class LLVM_LIBRARY_VISIBILITY Hexagon_TC : public Linux {
protected:
GCCVersion GCCLibAndIncVersion;
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
public:
Hexagon_TC(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
~Hexagon_TC();
virtual void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
virtual void
AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const;
StringRef GetGCCLibAndIncVersion() const { return GCCLibAndIncVersion.Text; }
static std::string GetGnuDir(const std::string &InstalledDir);
static StringRef GetTargetCPU(const llvm::opt::ArgList &Args);
};
/// TCEToolChain - A tool chain using the llvm bitcode tools to perform
/// all subcommands. See http://tce.cs.tut.fi for our peculiar target.
class LLVM_LIBRARY_VISIBILITY TCEToolChain : public ToolChain {
public:
TCEToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
~TCEToolChain();
bool IsMathErrnoDefault() const;
bool isPICDefault() const;
bool isPIEDefault() const;
bool isPICDefaultForced() const;
};
class LLVM_LIBRARY_VISIBILITY Windows : public ToolChain {
public:
Windows(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
virtual bool IsIntegratedAssemblerDefault() const;
virtual bool IsUnwindTablesDefault() const;
virtual bool isPICDefault() const;
virtual bool isPIEDefault() const;
virtual bool isPICDefaultForced() const;
virtual void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
virtual void
AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
protected:
virtual Tool *buildLinker() const;
virtual Tool *buildAssembler() const;
};
class LLVM_LIBRARY_VISIBILITY XCore : public ToolChain {
public:
XCore(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
protected:
virtual Tool *buildAssembler() const;
virtual Tool *buildLinker() const;
public:
virtual bool isPICDefault() const;
virtual bool isPIEDefault() const;
virtual bool isPICDefaultForced() const;
virtual bool SupportsProfiling() const;
virtual bool hasBlocksRuntime() const;
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
virtual void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
};
} // end namespace toolchains
} // end namespace driver
} // end namespace clang
#endif
Index: head/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Driver/Tools.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/Driver/Tools.cpp (revision 265925)
@@ -1,7191 +1,7190 @@
//===--- Tools.cpp - Tools Implementations --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Tools.h"
#include "InputInfo.h"
#include "ToolChains.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/Version.h"
#include "clang/Driver/Action.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Job.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "clang/Driver/ToolChain.h"
#include "clang/Driver/Util.h"
#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
#include <sys/stat.h>
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
/// CheckPreprocessingOptions - Perform some validation of preprocessing
/// arguments that is shared with gcc.
static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_C, options::OPT_CC))
if (!Args.hasArg(options::OPT_E) && !D.CCCIsCPP())
D.Diag(diag::err_drv_argument_only_allowed_with)
<< A->getAsString(Args) << "-E";
}
/// CheckCodeGenerationOptions - Perform some validation of code generation
/// arguments that is shared with gcc.
static void CheckCodeGenerationOptions(const Driver &D, const ArgList &Args) {
// In gcc, only ARM checks this, but it seems reasonable to check universally.
if (Args.hasArg(options::OPT_static))
if (const Arg *A = Args.getLastArg(options::OPT_dynamic,
options::OPT_mdynamic_no_pic))
D.Diag(diag::err_drv_argument_not_allowed_with)
<< A->getAsString(Args) << "-static";
}
// Quote target names for inclusion in GNU Make dependency files.
// Only the characters '$', '#', ' ', '\t' are quoted.
static void QuoteTarget(StringRef Target,
SmallVectorImpl<char> &Res) {
for (unsigned i = 0, e = Target.size(); i != e; ++i) {
switch (Target[i]) {
case ' ':
case '\t':
// Escape the preceding backslashes
for (int j = i - 1; j >= 0 && Target[j] == '\\'; --j)
Res.push_back('\\');
// Escape the space/tab
Res.push_back('\\');
break;
case '$':
Res.push_back('$');
break;
case '#':
Res.push_back('\\');
break;
default:
break;
}
Res.push_back(Target[i]);
}
}
static void addDirectoryList(const ArgList &Args,
ArgStringList &CmdArgs,
const char *ArgName,
const char *EnvVar) {
const char *DirList = ::getenv(EnvVar);
bool CombinedArg = false;
if (!DirList)
return; // Nothing to do.
StringRef Name(ArgName);
if (Name.equals("-I") || Name.equals("-L"))
CombinedArg = true;
StringRef Dirs(DirList);
if (Dirs.empty()) // Empty string should not add '.'.
return;
StringRef::size_type Delim;
while ((Delim = Dirs.find(llvm::sys::EnvPathSeparator)) != StringRef::npos) {
if (Delim == 0) { // Leading colon.
if (CombinedArg) {
CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + "."));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(".");
}
} else {
if (CombinedArg) {
CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + Dirs.substr(0, Delim)));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(Args.MakeArgString(Dirs.substr(0, Delim)));
}
}
Dirs = Dirs.substr(Delim + 1);
}
if (Dirs.empty()) { // Trailing colon.
if (CombinedArg) {
CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + "."));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(".");
}
} else { // Add the last path.
if (CombinedArg) {
CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + Dirs));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(Args.MakeArgString(Dirs));
}
}
}
static void AddLinkerInputs(const ToolChain &TC,
const InputInfoList &Inputs, const ArgList &Args,
ArgStringList &CmdArgs) {
const Driver &D = TC.getDriver();
// Add extra linker input arguments which are not treated as inputs
// (constructed via -Xarch_).
Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
if (!TC.HasNativeLLVMSupport()) {
// Don't try to pass LLVM inputs unless we have native support.
if (II.getType() == types::TY_LLVM_IR ||
II.getType() == types::TY_LTO_IR ||
II.getType() == types::TY_LLVM_BC ||
II.getType() == types::TY_LTO_BC)
D.Diag(diag::err_drv_no_linker_llvm_support)
<< TC.getTripleString();
}
// Add filenames immediately.
if (II.isFilename()) {
CmdArgs.push_back(II.getFilename());
continue;
}
// Otherwise, this is a linker input argument.
const Arg &A = II.getInputArg();
// Handle reserved library options.
if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) {
TC.AddCXXStdlibLibArgs(Args, CmdArgs);
} else if (A.getOption().matches(options::OPT_Z_reserved_lib_cckext)) {
TC.AddCCKextLibArgs(Args, CmdArgs);
} else
A.renderAsInput(Args, CmdArgs);
}
// LIBRARY_PATH - included following the user specified library paths.
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
}
/// \brief Determine whether Objective-C automated reference counting is
/// enabled.
static bool isObjCAutoRefCount(const ArgList &Args) {
return Args.hasFlag(options::OPT_fobjc_arc, options::OPT_fno_objc_arc, false);
}
/// \brief Determine whether we are linking the ObjC runtime.
static bool isObjCRuntimeLinked(const ArgList &Args) {
if (isObjCAutoRefCount(Args)) {
Args.ClaimAllArgs(options::OPT_fobjc_link_runtime);
return true;
}
return Args.hasArg(options::OPT_fobjc_link_runtime);
}
static void addProfileRT(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs,
llvm::Triple Triple) {
if (!(Args.hasArg(options::OPT_fprofile_arcs) ||
Args.hasArg(options::OPT_fprofile_generate) ||
Args.hasArg(options::OPT_fcreate_profile) ||
Args.hasArg(options::OPT_coverage)))
return;
// GCC links libgcov.a by adding -L<inst>/gcc/lib/gcc/<triple>/<ver> -lgcov to
// the link line. We cannot do the same thing because unlike gcov there is a
// libprofile_rt.so. We used to use the -l:libprofile_rt.a syntax, but that is
// not supported by old linkers.
std::string ProfileRT =
std::string(TC.getDriver().Dir) + "/../lib/libprofile_rt.a";
CmdArgs.push_back(Args.MakeArgString(ProfileRT));
}
static bool forwardToGCC(const Option &O) {
// Don't forward inputs from the original command line. They are added from
// InputInfoList.
return O.getKind() != Option::InputClass &&
!O.hasFlag(options::DriverOption) &&
!O.hasFlag(options::LinkerInput);
}
void Clang::AddPreprocessingOptions(Compilation &C,
const JobAction &JA,
const Driver &D,
const ArgList &Args,
ArgStringList &CmdArgs,
const InputInfo &Output,
const InputInfoList &Inputs) const {
Arg *A;
CheckPreprocessingOptions(D, Args);
Args.AddLastArg(CmdArgs, options::OPT_C);
Args.AddLastArg(CmdArgs, options::OPT_CC);
// Handle dependency file generation.
if ((A = Args.getLastArg(options::OPT_M, options::OPT_MM)) ||
(A = Args.getLastArg(options::OPT_MD)) ||
(A = Args.getLastArg(options::OPT_MMD))) {
// Determine the output location.
const char *DepFile;
if (Arg *MF = Args.getLastArg(options::OPT_MF)) {
DepFile = MF->getValue();
C.addFailureResultFile(DepFile, &JA);
} else if (Output.getType() == types::TY_Dependencies) {
DepFile = Output.getFilename();
} else if (A->getOption().matches(options::OPT_M) ||
A->getOption().matches(options::OPT_MM)) {
DepFile = "-";
} else {
DepFile = getDependencyFileName(Args, Inputs);
C.addFailureResultFile(DepFile, &JA);
}
CmdArgs.push_back("-dependency-file");
CmdArgs.push_back(DepFile);
// Add a default target if one wasn't specified.
if (!Args.hasArg(options::OPT_MT) && !Args.hasArg(options::OPT_MQ)) {
const char *DepTarget;
// If user provided -o, that is the dependency target, except
// when we are only generating a dependency file.
Arg *OutputOpt = Args.getLastArg(options::OPT_o);
if (OutputOpt && Output.getType() != types::TY_Dependencies) {
DepTarget = OutputOpt->getValue();
} else {
// Otherwise derive from the base input.
//
// FIXME: This should use the computed output file location.
SmallString<128> P(Inputs[0].getBaseInput());
llvm::sys::path::replace_extension(P, "o");
DepTarget = Args.MakeArgString(llvm::sys::path::filename(P));
}
CmdArgs.push_back("-MT");
SmallString<128> Quoted;
QuoteTarget(DepTarget, Quoted);
CmdArgs.push_back(Args.MakeArgString(Quoted));
}
if (A->getOption().matches(options::OPT_M) ||
A->getOption().matches(options::OPT_MD))
CmdArgs.push_back("-sys-header-deps");
}
if (Args.hasArg(options::OPT_MG)) {
if (!A || A->getOption().matches(options::OPT_MD) ||
A->getOption().matches(options::OPT_MMD))
D.Diag(diag::err_drv_mg_requires_m_or_mm);
CmdArgs.push_back("-MG");
}
Args.AddLastArg(CmdArgs, options::OPT_MP);
// Convert all -MQ <target> args to -MT <quoted target>
for (arg_iterator it = Args.filtered_begin(options::OPT_MT,
options::OPT_MQ),
ie = Args.filtered_end(); it != ie; ++it) {
const Arg *A = *it;
A->claim();
if (A->getOption().matches(options::OPT_MQ)) {
CmdArgs.push_back("-MT");
SmallString<128> Quoted;
QuoteTarget(A->getValue(), Quoted);
CmdArgs.push_back(Args.MakeArgString(Quoted));
// -MT flag - no change
} else {
A->render(Args, CmdArgs);
}
}
// Add -i* options, and automatically translate to
// -include-pch/-include-pth for transparent PCH support. It's
// wonky, but we include looking for .gch so we can support seamless
// replacement into a build system already set up to be generating
// .gch files.
bool RenderedImplicitInclude = false;
for (arg_iterator it = Args.filtered_begin(options::OPT_clang_i_Group),
ie = Args.filtered_end(); it != ie; ++it) {
const Arg *A = it;
if (A->getOption().matches(options::OPT_include)) {
bool IsFirstImplicitInclude = !RenderedImplicitInclude;
RenderedImplicitInclude = true;
// Use PCH if the user requested it.
bool UsePCH = D.CCCUsePCH;
bool FoundPTH = false;
bool FoundPCH = false;
SmallString<128> P(A->getValue());
// We want the files to have a name like foo.h.pch. Add a dummy extension
// so that replace_extension does the right thing.
P += ".dummy";
if (UsePCH) {
llvm::sys::path::replace_extension(P, "pch");
if (llvm::sys::fs::exists(P.str()))
FoundPCH = true;
}
if (!FoundPCH) {
llvm::sys::path::replace_extension(P, "pth");
if (llvm::sys::fs::exists(P.str()))
FoundPTH = true;
}
if (!FoundPCH && !FoundPTH) {
llvm::sys::path::replace_extension(P, "gch");
if (llvm::sys::fs::exists(P.str())) {
FoundPCH = UsePCH;
FoundPTH = !UsePCH;
}
}
if (FoundPCH || FoundPTH) {
if (IsFirstImplicitInclude) {
A->claim();
if (UsePCH)
CmdArgs.push_back("-include-pch");
else
CmdArgs.push_back("-include-pth");
CmdArgs.push_back(Args.MakeArgString(P.str()));
continue;
} else {
// Ignore the PCH if not first on command line and emit warning.
D.Diag(diag::warn_drv_pch_not_first_include)
<< P.str() << A->getAsString(Args);
}
}
}
// Not translated, render as usual.
A->claim();
A->render(Args, CmdArgs);
}
Args.AddAllArgs(CmdArgs, options::OPT_D, options::OPT_U);
Args.AddAllArgs(CmdArgs, options::OPT_I_Group, options::OPT_F,
options::OPT_index_header_map);
// Add -Wp, and -Xassembler if using the preprocessor.
// FIXME: There is a very unfortunate problem here, some troubled
// souls abuse -Wp, to pass preprocessor options in gcc syntax. To
// really support that we would have to parse and then translate
// those options. :(
Args.AddAllArgValues(CmdArgs, options::OPT_Wp_COMMA,
options::OPT_Xpreprocessor);
// -I- is a deprecated GCC feature, reject it.
if (Arg *A = Args.getLastArg(options::OPT_I_))
D.Diag(diag::err_drv_I_dash_not_supported) << A->getAsString(Args);
// If we have a --sysroot, and don't have an explicit -isysroot flag, add an
// -isysroot to the CC1 invocation.
StringRef sysroot = C.getSysRoot();
if (sysroot != "") {
if (!Args.hasArg(options::OPT_isysroot)) {
CmdArgs.push_back("-isysroot");
CmdArgs.push_back(C.getArgs().MakeArgString(sysroot));
}
}
// Parse additional include paths from environment variables.
// FIXME: We should probably sink the logic for handling these from the
// frontend into the driver. It will allow deleting 4 otherwise unused flags.
// CPATH - included following the user specified includes (but prior to
// builtin and standard includes).
addDirectoryList(Args, CmdArgs, "-I", "CPATH");
// C_INCLUDE_PATH - system includes enabled when compiling C.
addDirectoryList(Args, CmdArgs, "-c-isystem", "C_INCLUDE_PATH");
// CPLUS_INCLUDE_PATH - system includes enabled when compiling C++.
addDirectoryList(Args, CmdArgs, "-cxx-isystem", "CPLUS_INCLUDE_PATH");
// OBJC_INCLUDE_PATH - system includes enabled when compiling ObjC.
addDirectoryList(Args, CmdArgs, "-objc-isystem", "OBJC_INCLUDE_PATH");
// OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++.
addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH");
// Add C++ include arguments, if needed.
if (types::isCXX(Inputs[0].getType()))
getToolChain().AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
// Add system include arguments.
getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs);
}
/// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular
/// CPU.
//
// FIXME: This is redundant with -mcpu, why does LLVM use this.
// FIXME: tblgen this, or kill it!
static const char *getLLVMArchSuffixForARM(StringRef CPU) {
return llvm::StringSwitch<const char *>(CPU)
.Case("strongarm", "v4")
.Cases("arm7tdmi", "arm7tdmi-s", "arm710t", "v4t")
.Cases("arm720t", "arm9", "arm9tdmi", "v4t")
.Cases("arm920", "arm920t", "arm922t", "v4t")
.Cases("arm940t", "ep9312","v4t")
.Cases("arm10tdmi", "arm1020t", "v5")
.Cases("arm9e", "arm926ej-s", "arm946e-s", "v5e")
.Cases("arm966e-s", "arm968e-s", "arm10e", "v5e")
.Cases("arm1020e", "arm1022e", "xscale", "iwmmxt", "v5e")
.Cases("arm1136j-s", "arm1136jf-s", "arm1176jz-s", "v6")
.Cases("arm1176jzf-s", "mpcorenovfp", "mpcore", "v6")
.Cases("arm1156t2-s", "arm1156t2f-s", "v6t2")
.Cases("cortex-a5", "cortex-a7", "cortex-a8", "v7")
.Cases("cortex-a9", "cortex-a12", "cortex-a15", "v7")
.Cases("cortex-r4", "cortex-r5", "v7r")
.Case("cortex-m0", "v6m")
.Case("cortex-m3", "v7m")
.Case("cortex-m4", "v7em")
.Case("cortex-a9-mp", "v7f")
.Case("swift", "v7s")
.Cases("cortex-a53", "cortex-a57", "v8")
.Default("");
}
/// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting.
//
// FIXME: tblgen this.
static std::string getARMTargetCPU(const ArgList &Args,
const llvm::Triple &Triple) {
// FIXME: Warn on inconsistent use of -mcpu and -march.
// If we have -mcpu=, use that.
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
StringRef MCPU = A->getValue();
// Handle -mcpu=native.
if (MCPU == "native")
return llvm::sys::getHostCPUName();
else
return MCPU;
}
StringRef MArch;
if (Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
// Otherwise, if we have -march= choose the base CPU for that arch.
MArch = A->getValue();
} else {
// Otherwise, use the Arch from the triple.
MArch = Triple.getArchName();
}
if (Triple.getOS() == llvm::Triple::NetBSD) {
if (MArch == "armv6")
return "arm1176jzf-s";
}
// Handle -march=native.
std::string NativeMArch;
if (MArch == "native") {
std::string CPU = llvm::sys::getHostCPUName();
if (CPU != "generic") {
// Translate the native cpu into the architecture. The switch below will
// then chose the minimum cpu for that arch.
NativeMArch = std::string("arm") + getLLVMArchSuffixForARM(CPU);
MArch = NativeMArch;
}
}
return llvm::StringSwitch<const char *>(MArch)
.Cases("armv2", "armv2a","arm2")
.Case("armv3", "arm6")
.Case("armv3m", "arm7m")
.Case("armv4", "strongarm")
.Case("armv4t", "arm7tdmi")
.Cases("armv5", "armv5t", "arm10tdmi")
.Cases("armv5e", "armv5te", "arm1022e")
.Case("armv5tej", "arm926ej-s")
.Cases("armv6", "armv6k", "arm1136jf-s")
.Case("armv6j", "arm1136j-s")
.Cases("armv6z", "armv6zk", "arm1176jzf-s")
.Case("armv6t2", "arm1156t2-s")
.Cases("armv6m", "armv6-m", "cortex-m0")
.Cases("armv7", "armv7a", "armv7-a", "cortex-a8")
.Cases("armv7em", "armv7e-m", "cortex-m4")
.Cases("armv7f", "armv7-f", "cortex-a9-mp")
.Cases("armv7s", "armv7-s", "swift")
.Cases("armv7r", "armv7-r", "cortex-r4")
.Cases("armv7m", "armv7-m", "cortex-m3")
.Cases("armv8", "armv8a", "armv8-a", "cortex-a53")
.Case("ep9312", "ep9312")
.Case("iwmmxt", "iwmmxt")
.Case("xscale", "xscale")
// If all else failed, return the most base CPU with thumb interworking
// supported by LLVM.
.Default("arm7tdmi");
}
/// getAArch64TargetCPU - Get the (LLVM) name of the AArch64 cpu we are targeting.
//
// FIXME: tblgen this.
static std::string getAArch64TargetCPU(const ArgList &Args,
const llvm::Triple &Triple) {
// FIXME: Warn on inconsistent use of -mcpu and -march.
// If we have -mcpu=, use that.
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
StringRef MCPU = A->getValue();
// Handle -mcpu=native.
if (MCPU == "native")
return llvm::sys::getHostCPUName();
else
return MCPU;
}
return "generic";
}
// FIXME: Move to target hook.
static bool isSignedCharDefault(const llvm::Triple &Triple) {
switch (Triple.getArch()) {
default:
return true;
case llvm::Triple::aarch64:
case llvm::Triple::arm:
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
if (Triple.isOSDarwin())
return true;
return false;
case llvm::Triple::ppc64le:
case llvm::Triple::systemz:
case llvm::Triple::xcore:
return false;
}
}
static bool isNoCommonDefault(const llvm::Triple &Triple) {
switch (Triple.getArch()) {
default:
return false;
case llvm::Triple::xcore:
return true;
}
}
// Handle -mfpu=.
//
// FIXME: Centralize feature selection, defaulting shouldn't be also in the
// frontend target.
static void getAArch64FPUFeatures(const Driver &D, const Arg *A,
const ArgList &Args,
std::vector<const char *> &Features) {
StringRef FPU = A->getValue();
if (FPU == "fp-armv8") {
Features.push_back("+fp-armv8");
} else if (FPU == "neon-fp-armv8") {
Features.push_back("+fp-armv8");
Features.push_back("+neon");
} else if (FPU == "crypto-neon-fp-armv8") {
Features.push_back("+fp-armv8");
Features.push_back("+neon");
Features.push_back("+crypto");
} else if (FPU == "neon") {
Features.push_back("+neon");
} else if (FPU == "none") {
Features.push_back("-fp-armv8");
Features.push_back("-crypto");
Features.push_back("-neon");
} else
D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
}
// Handle -mhwdiv=.
static void getARMHWDivFeatures(const Driver &D, const Arg *A,
const ArgList &Args,
std::vector<const char *> &Features) {
StringRef HWDiv = A->getValue();
if (HWDiv == "arm") {
Features.push_back("+hwdiv-arm");
Features.push_back("-hwdiv");
} else if (HWDiv == "thumb") {
Features.push_back("-hwdiv-arm");
Features.push_back("+hwdiv");
} else if (HWDiv == "arm,thumb" || HWDiv == "thumb,arm") {
Features.push_back("+hwdiv-arm");
Features.push_back("+hwdiv");
} else if (HWDiv == "none") {
Features.push_back("-hwdiv-arm");
Features.push_back("-hwdiv");
} else
D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
}
// Handle -mfpu=.
//
// FIXME: Centralize feature selection, defaulting shouldn't be also in the
// frontend target.
static void getARMFPUFeatures(const Driver &D, const Arg *A,
const ArgList &Args,
std::vector<const char *> &Features) {
StringRef FPU = A->getValue();
// Set the target features based on the FPU.
if (FPU == "fpa" || FPU == "fpe2" || FPU == "fpe3" || FPU == "maverick") {
// Disable any default FPU support.
Features.push_back("-vfp2");
Features.push_back("-vfp3");
Features.push_back("-neon");
} else if (FPU == "vfp3-d16" || FPU == "vfpv3-d16") {
Features.push_back("+vfp3");
Features.push_back("+d16");
Features.push_back("-neon");
} else if (FPU == "vfp") {
Features.push_back("+vfp2");
Features.push_back("-neon");
} else if (FPU == "vfp3" || FPU == "vfpv3") {
Features.push_back("+vfp3");
Features.push_back("-neon");
} else if (FPU == "fp-armv8") {
Features.push_back("+fp-armv8");
Features.push_back("-neon");
Features.push_back("-crypto");
} else if (FPU == "neon-fp-armv8") {
Features.push_back("+fp-armv8");
Features.push_back("+neon");
Features.push_back("-crypto");
} else if (FPU == "crypto-neon-fp-armv8") {
Features.push_back("+fp-armv8");
Features.push_back("+neon");
Features.push_back("+crypto");
} else if (FPU == "neon") {
Features.push_back("+neon");
} else if (FPU == "none") {
Features.push_back("-vfp2");
Features.push_back("-vfp3");
Features.push_back("-vfp4");
Features.push_back("-fp-armv8");
Features.push_back("-crypto");
Features.push_back("-neon");
} else
D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
}
// Select the float ABI as determined by -msoft-float, -mhard-float, and
// -mfloat-abi=.
static StringRef getARMFloatABI(const Driver &D,
const ArgList &Args,
const llvm::Triple &Triple) {
StringRef FloatABI;
if (Arg *A = Args.getLastArg(options::OPT_msoft_float,
options::OPT_mhard_float,
options::OPT_mfloat_abi_EQ)) {
if (A->getOption().matches(options::OPT_msoft_float))
FloatABI = "soft";
else if (A->getOption().matches(options::OPT_mhard_float))
FloatABI = "hard";
else {
FloatABI = A->getValue();
if (FloatABI != "soft" && FloatABI != "softfp" && FloatABI != "hard") {
D.Diag(diag::err_drv_invalid_mfloat_abi)
<< A->getAsString(Args);
FloatABI = "soft";
}
}
}
// If unspecified, choose the default based on the platform.
if (FloatABI.empty()) {
switch (Triple.getOS()) {
case llvm::Triple::Darwin:
case llvm::Triple::MacOSX:
case llvm::Triple::IOS: {
// Darwin defaults to "softfp" for v6 and v7.
//
// FIXME: Factor out an ARM class so we can cache the arch somewhere.
std::string ArchName =
getLLVMArchSuffixForARM(getARMTargetCPU(Args, Triple));
if (StringRef(ArchName).startswith("v6") ||
StringRef(ArchName).startswith("v7"))
FloatABI = "softfp";
else
FloatABI = "soft";
break;
}
case llvm::Triple::FreeBSD:
switch(Triple.getEnvironment()) {
case llvm::Triple::GNUEABIHF:
FloatABI = "hard";
break;
default:
// FreeBSD defaults to soft float
FloatABI = "soft";
break;
}
break;
default:
switch(Triple.getEnvironment()) {
case llvm::Triple::GNUEABIHF:
FloatABI = "hard";
break;
case llvm::Triple::GNUEABI:
FloatABI = "softfp";
break;
case llvm::Triple::EABI:
// EABI is always AAPCS, and if it was not marked 'hard', it's softfp
FloatABI = "softfp";
break;
case llvm::Triple::Android: {
std::string ArchName =
getLLVMArchSuffixForARM(getARMTargetCPU(Args, Triple));
if (StringRef(ArchName).startswith("v7"))
FloatABI = "softfp";
else
FloatABI = "soft";
break;
}
default:
// Assume "soft", but warn the user we are guessing.
FloatABI = "soft";
D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft";
break;
}
}
}
return FloatABI;
}
static void getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args,
std::vector<const char *> &Features) {
StringRef FloatABI = getARMFloatABI(D, Args, Triple);
// FIXME: Note, this is a hack, the LLVM backend doesn't actually use these
// yet (it uses the -mfloat-abi and -msoft-float options), and it is
// stripped out by the ARM target.
// Use software floating point operations?
if (FloatABI == "soft")
Features.push_back("+soft-float");
// Use software floating point argument passing?
if (FloatABI != "hard")
Features.push_back("+soft-float-abi");
// Honor -mfpu=.
if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ))
getARMFPUFeatures(D, A, Args, Features);
if (const Arg *A = Args.getLastArg(options::OPT_mhwdiv_EQ))
getARMHWDivFeatures(D, A, Args, Features);
// Setting -msoft-float effectively disables NEON because of the GCC
// implementation, although the same isn't true of VFP or VFP3.
if (FloatABI == "soft")
Features.push_back("-neon");
// En/disable crc
if (Arg *A = Args.getLastArg(options::OPT_mcrc,
options::OPT_mnocrc)) {
if (A->getOption().matches(options::OPT_mcrc))
Features.push_back("+crc");
else
Features.push_back("-crc");
}
}
void Clang::AddARMTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs,
bool KernelOrKext) const {
const Driver &D = getToolChain().getDriver();
// Get the effective triple, which takes into account the deployment target.
std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
llvm::Triple Triple(TripleStr);
std::string CPUName = getARMTargetCPU(Args, Triple);
// Select the ABI to use.
//
// FIXME: Support -meabi.
const char *ABIName = 0;
if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
ABIName = A->getValue();
} else if (Triple.isOSDarwin()) {
// The backend is hardwired to assume AAPCS for M-class processors, ensure
// the frontend matches that.
if (Triple.getEnvironment() == llvm::Triple::EABI ||
StringRef(CPUName).startswith("cortex-m")) {
ABIName = "aapcs";
} else {
ABIName = "apcs-gnu";
}
} else {
// Select the default based on the platform.
switch(Triple.getEnvironment()) {
case llvm::Triple::Android:
case llvm::Triple::GNUEABI:
case llvm::Triple::GNUEABIHF:
ABIName = "aapcs-linux";
break;
case llvm::Triple::EABI:
ABIName = "aapcs";
break;
default:
ABIName = "apcs-gnu";
}
}
CmdArgs.push_back("-target-abi");
CmdArgs.push_back(ABIName);
// Determine floating point ABI from the options & target defaults.
StringRef FloatABI = getARMFloatABI(D, Args, Triple);
if (FloatABI == "soft") {
// Floating point operations and argument passing are soft.
//
// FIXME: This changes CPP defines, we need -target-soft-float.
CmdArgs.push_back("-msoft-float");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("soft");
} else if (FloatABI == "softfp") {
// Floating point operations are hard, but argument passing is soft.
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("soft");
} else {
// Floating point operations and argument passing are hard.
assert(FloatABI == "hard" && "Invalid float abi!");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("hard");
}
// Kernel code has more strict alignment requirements.
if (KernelOrKext) {
if (!Triple.isiOS() || Triple.isOSVersionLT(6)) {
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-arm-long-calls");
}
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-arm-strict-align");
// The kext linker doesn't know how to deal with movw/movt.
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-arm-use-movt=0");
}
// Setting -mno-global-merge disables the codegen global merge pass. Setting
// -mglobal-merge has no effect as the pass is enabled by default.
if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge,
options::OPT_mno_global_merge)) {
if (A->getOption().matches(options::OPT_mno_global_merge))
CmdArgs.push_back("-mno-global-merge");
}
if (!Args.hasFlag(options::OPT_mimplicit_float,
options::OPT_mno_implicit_float,
true))
CmdArgs.push_back("-no-implicit-float");
// llvm does not support reserving registers in general. There is support
// for reserving r9 on ARM though (defined as a platform-specific register
// in ARM EABI).
if (Args.hasArg(options::OPT_ffixed_r9)) {
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-arm-reserve-r9");
}
}
// Get CPU and ABI names. They are not independent
// so we have to calculate them together.
static void getMipsCPUAndABI(const ArgList &Args,
const llvm::Triple &Triple,
StringRef &CPUName,
StringRef &ABIName) {
const char *DefMips32CPU = "mips32";
const char *DefMips64CPU = "mips64";
if (Arg *A = Args.getLastArg(options::OPT_march_EQ,
options::OPT_mcpu_EQ))
CPUName = A->getValue();
if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
ABIName = A->getValue();
// Convert a GNU style Mips ABI name to the name
// accepted by LLVM Mips backend.
ABIName = llvm::StringSwitch<llvm::StringRef>(ABIName)
.Case("32", "o32")
.Case("64", "n64")
.Default(ABIName);
}
// Setup default CPU and ABI names.
if (CPUName.empty() && ABIName.empty()) {
switch (Triple.getArch()) {
default:
llvm_unreachable("Unexpected triple arch name");
case llvm::Triple::mips:
case llvm::Triple::mipsel:
CPUName = DefMips32CPU;
break;
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
CPUName = DefMips64CPU;
break;
}
}
if (!ABIName.empty()) {
// Deduce CPU name from ABI name.
CPUName = llvm::StringSwitch<const char *>(ABIName)
.Cases("32", "o32", "eabi", DefMips32CPU)
.Cases("n32", "n64", "64", DefMips64CPU)
.Default("");
}
else if (!CPUName.empty()) {
// Deduce ABI name from CPU name.
ABIName = llvm::StringSwitch<const char *>(CPUName)
.Cases("mips32", "mips32r2", "o32")
.Cases("mips64", "mips64r2", "n64")
.Default("");
}
// FIXME: Warn on inconsistent cpu and abi usage.
}
// Convert ABI name to the GNU tools acceptable variant.
static StringRef getGnuCompatibleMipsABIName(StringRef ABI) {
return llvm::StringSwitch<llvm::StringRef>(ABI)
.Case("o32", "32")
.Case("n64", "64")
.Default(ABI);
}
// Select the MIPS float ABI as determined by -msoft-float, -mhard-float,
// and -mfloat-abi=.
static StringRef getMipsFloatABI(const Driver &D, const ArgList &Args) {
StringRef FloatABI;
if (Arg *A = Args.getLastArg(options::OPT_msoft_float,
options::OPT_mhard_float,
options::OPT_mfloat_abi_EQ)) {
if (A->getOption().matches(options::OPT_msoft_float))
FloatABI = "soft";
else if (A->getOption().matches(options::OPT_mhard_float))
FloatABI = "hard";
else {
FloatABI = A->getValue();
if (FloatABI != "soft" && FloatABI != "hard") {
D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
FloatABI = "hard";
}
}
}
// If unspecified, choose the default based on the platform.
if (FloatABI.empty()) {
// Assume "hard", because it's a default value used by gcc.
// When we start to recognize specific target MIPS processors,
// we will be able to select the default more correctly.
FloatABI = "hard";
}
return FloatABI;
}
static void AddTargetFeature(const ArgList &Args,
std::vector<const char *> &Features,
OptSpecifier OnOpt, OptSpecifier OffOpt,
StringRef FeatureName) {
if (Arg *A = Args.getLastArg(OnOpt, OffOpt)) {
if (A->getOption().matches(OnOpt))
Features.push_back(Args.MakeArgString("+" + FeatureName));
else
Features.push_back(Args.MakeArgString("-" + FeatureName));
}
}
static void getMIPSTargetFeatures(const Driver &D, const ArgList &Args,
std::vector<const char *> &Features) {
StringRef FloatABI = getMipsFloatABI(D, Args);
bool IsMips16 = Args.getLastArg(options::OPT_mips16) != NULL;
if (FloatABI == "soft" || (FloatABI == "hard" && IsMips16)) {
// FIXME: Note, this is a hack. We need to pass the selected float
// mode to the MipsTargetInfoBase to define appropriate macros there.
// Now it is the only method.
Features.push_back("+soft-float");
}
if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) {
if (StringRef(A->getValue()) == "2008")
Features.push_back("+nan2008");
}
AddTargetFeature(Args, Features, options::OPT_msingle_float,
options::OPT_mdouble_float, "single-float");
AddTargetFeature(Args, Features, options::OPT_mips16, options::OPT_mno_mips16,
"mips16");
AddTargetFeature(Args, Features, options::OPT_mmicromips,
options::OPT_mno_micromips, "micromips");
AddTargetFeature(Args, Features, options::OPT_mdsp, options::OPT_mno_dsp,
"dsp");
AddTargetFeature(Args, Features, options::OPT_mdspr2, options::OPT_mno_dspr2,
"dspr2");
AddTargetFeature(Args, Features, options::OPT_mmsa, options::OPT_mno_msa,
"msa");
AddTargetFeature(Args, Features, options::OPT_mfp64, options::OPT_mfp32,
"fp64");
}
void Clang::AddMIPSTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
const Driver &D = getToolChain().getDriver();
StringRef CPUName;
StringRef ABIName;
const llvm::Triple &Triple = getToolChain().getTriple();
getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
CmdArgs.push_back("-target-abi");
CmdArgs.push_back(ABIName.data());
StringRef FloatABI = getMipsFloatABI(D, Args);
bool IsMips16 = Args.getLastArg(options::OPT_mips16) != NULL;
if (FloatABI == "soft" || (FloatABI == "hard" && IsMips16)) {
// Floating point operations and argument passing are soft.
CmdArgs.push_back("-msoft-float");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("soft");
if (FloatABI == "hard" && IsMips16) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-mips16-hard-float");
}
}
else {
// Floating point operations and argument passing are hard.
assert(FloatABI == "hard" && "Invalid float abi!");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("hard");
}
if (Arg *A = Args.getLastArg(options::OPT_mxgot, options::OPT_mno_xgot)) {
if (A->getOption().matches(options::OPT_mxgot)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-mxgot");
}
}
if (Arg *A = Args.getLastArg(options::OPT_mldc1_sdc1,
options::OPT_mno_ldc1_sdc1)) {
if (A->getOption().matches(options::OPT_mno_ldc1_sdc1)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-mno-ldc1-sdc1");
}
}
if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division,
options::OPT_mno_check_zero_division)) {
if (A->getOption().matches(options::OPT_mno_check_zero_division)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-mno-check-zero-division");
}
}
if (Arg *A = Args.getLastArg(options::OPT_G)) {
StringRef v = A->getValue();
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-mips-ssection-threshold=" + v));
A->claim();
}
}
/// getPPCTargetCPU - Get the (LLVM) name of the PowerPC cpu we are targeting.
static std::string getPPCTargetCPU(const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
StringRef CPUName = A->getValue();
if (CPUName == "native") {
std::string CPU = llvm::sys::getHostCPUName();
if (!CPU.empty() && CPU != "generic")
return CPU;
else
return "";
}
return llvm::StringSwitch<const char *>(CPUName)
.Case("common", "generic")
.Case("440", "440")
.Case("440fp", "440")
.Case("450", "450")
.Case("601", "601")
.Case("602", "602")
.Case("603", "603")
.Case("603e", "603e")
.Case("603ev", "603ev")
.Case("604", "604")
.Case("604e", "604e")
.Case("620", "620")
.Case("630", "pwr3")
.Case("G3", "g3")
.Case("7400", "7400")
.Case("G4", "g4")
.Case("7450", "7450")
.Case("G4+", "g4+")
.Case("750", "750")
.Case("970", "970")
.Case("G5", "g5")
.Case("a2", "a2")
.Case("a2q", "a2q")
.Case("e500mc", "e500mc")
.Case("e5500", "e5500")
.Case("power3", "pwr3")
.Case("power4", "pwr4")
.Case("power5", "pwr5")
.Case("power5x", "pwr5x")
.Case("power6", "pwr6")
.Case("power6x", "pwr6x")
.Case("power7", "pwr7")
.Case("pwr3", "pwr3")
.Case("pwr4", "pwr4")
.Case("pwr5", "pwr5")
.Case("pwr5x", "pwr5x")
.Case("pwr6", "pwr6")
.Case("pwr6x", "pwr6x")
.Case("pwr7", "pwr7")
.Case("powerpc", "ppc")
.Case("powerpc64", "ppc64")
.Case("powerpc64le", "ppc64le")
.Default("");
}
return "";
}
static void getPPCTargetFeatures(const ArgList &Args,
std::vector<const char *> &Features) {
for (arg_iterator it = Args.filtered_begin(options::OPT_m_ppc_Features_Group),
ie = Args.filtered_end();
it != ie; ++it) {
StringRef Name = (*it)->getOption().getName();
(*it)->claim();
// Skip over "-m".
assert(Name.startswith("m") && "Invalid feature name.");
Name = Name.substr(1);
bool IsNegative = Name.startswith("no-");
if (IsNegative)
Name = Name.substr(3);
// Note that gcc calls this mfcrf and LLVM calls this mfocrf so we
// pass the correct option to the backend while calling the frontend
// option the same.
// TODO: Change the LLVM backend option maybe?
if (Name == "mfcrf")
Name = "mfocrf";
Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
}
// Altivec is a bit weird, allow overriding of the Altivec feature here.
AddTargetFeature(Args, Features, options::OPT_faltivec,
options::OPT_fno_altivec, "altivec");
}
/// Get the (LLVM) name of the R600 gpu we are targeting.
static std::string getR600TargetGPU(const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
const char *GPUName = A->getValue();
return llvm::StringSwitch<const char *>(GPUName)
.Cases("rv630", "rv635", "r600")
.Cases("rv610", "rv620", "rs780", "rs880")
.Case("rv740", "rv770")
.Case("palm", "cedar")
.Cases("sumo", "sumo2", "sumo")
.Case("hemlock", "cypress")
.Case("aruba", "cayman")
.Default(GPUName);
}
return "";
}
static void getSparcTargetFeatures(const ArgList &Args,
std::vector<const char *> Features) {
bool SoftFloatABI = true;
if (Arg *A =
Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float)) {
if (A->getOption().matches(options::OPT_mhard_float))
SoftFloatABI = false;
}
if (SoftFloatABI)
Features.push_back("+soft-float");
}
void Clang::AddSparcTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
const Driver &D = getToolChain().getDriver();
// Select the float ABI as determined by -msoft-float, -mhard-float, and
StringRef FloatABI;
if (Arg *A = Args.getLastArg(options::OPT_msoft_float,
options::OPT_mhard_float)) {
if (A->getOption().matches(options::OPT_msoft_float))
FloatABI = "soft";
else if (A->getOption().matches(options::OPT_mhard_float))
FloatABI = "hard";
}
// If unspecified, choose the default based on the platform.
if (FloatABI.empty()) {
// Assume "soft", but warn the user we are guessing.
FloatABI = "soft";
D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft";
}
if (FloatABI == "soft") {
// Floating point operations and argument passing are soft.
//
// FIXME: This changes CPP defines, we need -target-soft-float.
CmdArgs.push_back("-msoft-float");
} else {
assert(FloatABI == "hard" && "Invalid float abi!");
CmdArgs.push_back("-mhard-float");
}
}
static const char *getSystemZTargetCPU(const ArgList &Args) {
if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
return A->getValue();
return "z10";
}
static const char *getX86TargetCPU(const ArgList &Args,
const llvm::Triple &Triple) {
if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
if (StringRef(A->getValue()) != "native") {
if (Triple.isOSDarwin() && Triple.getArchName() == "x86_64h")
return "core-avx2";
return A->getValue();
}
// FIXME: Reject attempts to use -march=native unless the target matches
// the host.
//
// FIXME: We should also incorporate the detected target features for use
// with -native.
std::string CPU = llvm::sys::getHostCPUName();
if (!CPU.empty() && CPU != "generic")
return Args.MakeArgString(CPU);
}
// Select the default CPU if none was given (or detection failed).
if (Triple.getArch() != llvm::Triple::x86_64 &&
Triple.getArch() != llvm::Triple::x86)
return 0; // This routine is only handling x86 targets.
bool Is64Bit = Triple.getArch() == llvm::Triple::x86_64;
// FIXME: Need target hooks.
if (Triple.isOSDarwin()) {
if (Triple.getArchName() == "x86_64h")
return "core-avx2";
return Is64Bit ? "core2" : "yonah";
}
// All x86 devices running Android have core2 as their common
// denominator. This makes a better choice than pentium4.
if (Triple.getEnvironment() == llvm::Triple::Android)
return "core2";
// Everything else goes to x86-64 in 64-bit mode.
if (Is64Bit)
return "x86-64";
switch (Triple.getOS()) {
case llvm::Triple::FreeBSD:
case llvm::Triple::NetBSD:
case llvm::Triple::OpenBSD:
return "i486";
case llvm::Triple::Haiku:
return "i586";
case llvm::Triple::Bitrig:
return "i686";
default:
// Fallback to p4.
return "pentium4";
}
}
static std::string getCPUName(const ArgList &Args, const llvm::Triple &T) {
switch(T.getArch()) {
default:
return "";
case llvm::Triple::aarch64:
return getAArch64TargetCPU(Args, T);
case llvm::Triple::arm:
case llvm::Triple::thumb:
return getARMTargetCPU(Args, T);
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el: {
StringRef CPUName;
StringRef ABIName;
getMipsCPUAndABI(Args, T, CPUName, ABIName);
return CPUName;
}
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le: {
std::string TargetCPUName = getPPCTargetCPU(Args);
// LLVM may default to generating code for the native CPU,
// but, like gcc, we default to a more generic option for
// each architecture. (except on Darwin)
if (TargetCPUName.empty() && !T.isOSDarwin()) {
if (T.getArch() == llvm::Triple::ppc64)
TargetCPUName = "ppc64";
else if (T.getArch() == llvm::Triple::ppc64le)
TargetCPUName = "ppc64le";
else
TargetCPUName = "ppc";
}
return TargetCPUName;
}
case llvm::Triple::sparc:
case llvm::Triple::sparcv9:
if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
return A->getValue();
return "";
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return getX86TargetCPU(Args, T);
case llvm::Triple::hexagon:
return "hexagon" + toolchains::Hexagon_TC::GetTargetCPU(Args).str();
case llvm::Triple::systemz:
return getSystemZTargetCPU(Args);
case llvm::Triple::r600:
return getR600TargetGPU(Args);
}
}
static void getX86TargetFeatures(const llvm::Triple &Triple,
const ArgList &Args,
std::vector<const char *> &Features) {
if (Triple.getArchName() == "x86_64h") {
// x86_64h implies quite a few of the more modern subtarget features
// for Haswell class CPUs, but not all of them. Opt-out of a few.
Features.push_back("-rdrnd");
Features.push_back("-aes");
Features.push_back("-pclmul");
Features.push_back("-rtm");
Features.push_back("-hle");
Features.push_back("-fsgsbase");
}
// Now add any that the user explicitly requested on the command line,
// which may override the defaults.
for (arg_iterator it = Args.filtered_begin(options::OPT_m_x86_Features_Group),
ie = Args.filtered_end();
it != ie; ++it) {
StringRef Name = (*it)->getOption().getName();
(*it)->claim();
// Skip over "-m".
assert(Name.startswith("m") && "Invalid feature name.");
Name = Name.substr(1);
bool IsNegative = Name.startswith("no-");
if (IsNegative)
Name = Name.substr(3);
Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
}
}
void Clang::AddX86TargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
if (!Args.hasFlag(options::OPT_mred_zone,
options::OPT_mno_red_zone,
true) ||
Args.hasArg(options::OPT_mkernel) ||
Args.hasArg(options::OPT_fapple_kext))
CmdArgs.push_back("-disable-red-zone");
// Default to avoid implicit floating-point for kernel/kext code, but allow
// that to be overridden with -mno-soft-float.
bool NoImplicitFloat = (Args.hasArg(options::OPT_mkernel) ||
Args.hasArg(options::OPT_fapple_kext));
if (Arg *A = Args.getLastArg(options::OPT_msoft_float,
options::OPT_mno_soft_float,
options::OPT_mimplicit_float,
options::OPT_mno_implicit_float)) {
const Option &O = A->getOption();
NoImplicitFloat = (O.matches(options::OPT_mno_implicit_float) ||
O.matches(options::OPT_msoft_float));
}
if (NoImplicitFloat)
CmdArgs.push_back("-no-implicit-float");
}
static inline bool HasPICArg(const ArgList &Args) {
return Args.hasArg(options::OPT_fPIC)
|| Args.hasArg(options::OPT_fpic);
}
static Arg *GetLastSmallDataThresholdArg(const ArgList &Args) {
return Args.getLastArg(options::OPT_G,
options::OPT_G_EQ,
options::OPT_msmall_data_threshold_EQ);
}
static std::string GetHexagonSmallDataThresholdValue(const ArgList &Args) {
std::string value;
if (HasPICArg(Args))
value = "0";
else if (Arg *A = GetLastSmallDataThresholdArg(Args)) {
value = A->getValue();
A->claim();
}
return value;
}
void Clang::AddHexagonTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
CmdArgs.push_back("-fno-signed-char");
CmdArgs.push_back("-mqdsp6-compat");
CmdArgs.push_back("-Wreturn-type");
std::string SmallDataThreshold = GetHexagonSmallDataThresholdValue(Args);
if (!SmallDataThreshold.empty()) {
CmdArgs.push_back ("-mllvm");
CmdArgs.push_back(Args.MakeArgString(
"-hexagon-small-data-threshold=" + SmallDataThreshold));
}
if (!Args.hasArg(options::OPT_fno_short_enums))
CmdArgs.push_back("-fshort-enums");
if (Args.getLastArg(options::OPT_mieee_rnd_near)) {
CmdArgs.push_back ("-mllvm");
CmdArgs.push_back ("-enable-hexagon-ieee-rnd-near");
}
CmdArgs.push_back ("-mllvm");
CmdArgs.push_back ("-machine-sink-split=0");
}
static void getAArch64TargetFeatures(const Driver &D, const ArgList &Args,
std::vector<const char *> &Features) {
// Honor -mfpu=.
if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ))
getAArch64FPUFeatures(D, A, Args, Features);
}
static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args, ArgStringList &CmdArgs) {
std::vector<const char *> Features;
switch (Triple.getArch()) {
default:
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
getMIPSTargetFeatures(D, Args, Features);
break;
case llvm::Triple::arm:
case llvm::Triple::thumb:
getARMTargetFeatures(D, Triple, Args, Features);
break;
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
getPPCTargetFeatures(Args, Features);
break;
case llvm::Triple::sparc:
getSparcTargetFeatures(Args, Features);
break;
case llvm::Triple::aarch64:
getAArch64TargetFeatures(D, Args, Features);
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
getX86TargetFeatures(Triple, Args, Features);
break;
}
// Find the last of each feature.
llvm::StringMap<unsigned> LastOpt;
for (unsigned I = 0, N = Features.size(); I < N; ++I) {
const char *Name = Features[I];
assert(Name[0] == '-' || Name[0] == '+');
LastOpt[Name + 1] = I;
}
for (unsigned I = 0, N = Features.size(); I < N; ++I) {
// If this feature was overridden, ignore it.
const char *Name = Features[I];
llvm::StringMap<unsigned>::iterator LastI = LastOpt.find(Name + 1);
assert(LastI != LastOpt.end());
unsigned Last = LastI->second;
if (Last != I)
continue;
CmdArgs.push_back("-target-feature");
CmdArgs.push_back(Name);
}
}
static bool
shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime,
const llvm::Triple &Triple) {
// We use the zero-cost exception tables for Objective-C if the non-fragile
// ABI is enabled or when compiling for x86_64 and ARM on Snow Leopard and
// later.
if (runtime.isNonFragile())
return true;
if (!Triple.isOSDarwin())
return false;
return (!Triple.isMacOSXVersionLT(10,5) &&
(Triple.getArch() == llvm::Triple::x86_64 ||
Triple.getArch() == llvm::Triple::arm));
}
/// addExceptionArgs - Adds exception related arguments to the driver command
/// arguments. There's a master flag, -fexceptions and also language specific
/// flags to enable/disable C++ and Objective-C exceptions.
/// This makes it possible to for example disable C++ exceptions but enable
/// Objective-C exceptions.
static void addExceptionArgs(const ArgList &Args, types::ID InputType,
const llvm::Triple &Triple,
bool KernelOrKext,
const ObjCRuntime &objcRuntime,
ArgStringList &CmdArgs) {
if (KernelOrKext) {
// -mkernel and -fapple-kext imply no exceptions, so claim exception related
// arguments now to avoid warnings about unused arguments.
Args.ClaimAllArgs(options::OPT_fexceptions);
Args.ClaimAllArgs(options::OPT_fno_exceptions);
Args.ClaimAllArgs(options::OPT_fobjc_exceptions);
Args.ClaimAllArgs(options::OPT_fno_objc_exceptions);
Args.ClaimAllArgs(options::OPT_fcxx_exceptions);
Args.ClaimAllArgs(options::OPT_fno_cxx_exceptions);
return;
}
// Exceptions are enabled by default.
bool ExceptionsEnabled = true;
// This keeps track of whether exceptions were explicitly turned on or off.
bool DidHaveExplicitExceptionFlag = false;
if (Arg *A = Args.getLastArg(options::OPT_fexceptions,
options::OPT_fno_exceptions)) {
if (A->getOption().matches(options::OPT_fexceptions))
ExceptionsEnabled = true;
else
ExceptionsEnabled = false;
DidHaveExplicitExceptionFlag = true;
}
bool ShouldUseExceptionTables = false;
// Exception tables and cleanups can be enabled with -fexceptions even if the
// language itself doesn't support exceptions.
if (ExceptionsEnabled && DidHaveExplicitExceptionFlag)
ShouldUseExceptionTables = true;
// Obj-C exceptions are enabled by default, regardless of -fexceptions. This
// is not necessarily sensible, but follows GCC.
if (types::isObjC(InputType) &&
Args.hasFlag(options::OPT_fobjc_exceptions,
options::OPT_fno_objc_exceptions,
true)) {
CmdArgs.push_back("-fobjc-exceptions");
ShouldUseExceptionTables |=
shouldUseExceptionTablesForObjCExceptions(objcRuntime, Triple);
}
if (types::isCXX(InputType)) {
bool CXXExceptionsEnabled = ExceptionsEnabled;
if (Arg *A = Args.getLastArg(options::OPT_fcxx_exceptions,
options::OPT_fno_cxx_exceptions,
options::OPT_fexceptions,
options::OPT_fno_exceptions)) {
if (A->getOption().matches(options::OPT_fcxx_exceptions))
CXXExceptionsEnabled = true;
else if (A->getOption().matches(options::OPT_fno_cxx_exceptions))
CXXExceptionsEnabled = false;
}
if (CXXExceptionsEnabled) {
CmdArgs.push_back("-fcxx-exceptions");
ShouldUseExceptionTables = true;
}
}
if (ShouldUseExceptionTables)
CmdArgs.push_back("-fexceptions");
}
static bool ShouldDisableAutolink(const ArgList &Args,
const ToolChain &TC) {
bool Default = true;
if (TC.getTriple().isOSDarwin()) {
// The native darwin assembler doesn't support the linker_option directives,
// so we disable them if we think the .s file will be passed to it.
Default = TC.useIntegratedAs();
}
return !Args.hasFlag(options::OPT_fautolink, options::OPT_fno_autolink,
Default);
}
static bool ShouldDisableCFI(const ArgList &Args,
const ToolChain &TC) {
bool Default = true;
if (TC.getTriple().isOSDarwin()) {
// The native darwin assembler doesn't support cfi directives, so
// we disable them if we think the .s file will be passed to it.
Default = TC.useIntegratedAs();
}
return !Args.hasFlag(options::OPT_fdwarf2_cfi_asm,
options::OPT_fno_dwarf2_cfi_asm,
Default);
}
static bool ShouldDisableDwarfDirectory(const ArgList &Args,
const ToolChain &TC) {
bool UseDwarfDirectory = Args.hasFlag(options::OPT_fdwarf_directory_asm,
options::OPT_fno_dwarf_directory_asm,
TC.useIntegratedAs());
return !UseDwarfDirectory;
}
/// \brief Check whether the given input tree contains any compilation actions.
static bool ContainsCompileAction(const Action *A) {
if (isa<CompileJobAction>(A))
return true;
for (Action::const_iterator it = A->begin(), ie = A->end(); it != ie; ++it)
if (ContainsCompileAction(*it))
return true;
return false;
}
/// \brief Check if -relax-all should be passed to the internal assembler.
/// This is done by default when compiling non-assembler source with -O0.
static bool UseRelaxAll(Compilation &C, const ArgList &Args) {
bool RelaxDefault = true;
if (Arg *A = Args.getLastArg(options::OPT_O_Group))
RelaxDefault = A->getOption().matches(options::OPT_O0);
if (RelaxDefault) {
RelaxDefault = false;
for (ActionList::const_iterator it = C.getActions().begin(),
ie = C.getActions().end(); it != ie; ++it) {
if (ContainsCompileAction(*it)) {
RelaxDefault = true;
break;
}
}
}
return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all,
RelaxDefault);
}
static void CollectArgsForIntegratedAssembler(Compilation &C,
const ArgList &Args,
ArgStringList &CmdArgs,
const Driver &D) {
if (UseRelaxAll(C, Args))
CmdArgs.push_back("-mrelax-all");
// When passing -I arguments to the assembler we sometimes need to
// unconditionally take the next argument. For example, when parsing
// '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the
// -Wa,-I arg and when parsing '-Wa,-I,foo' we need to accept the 'foo'
// arg after parsing the '-I' arg.
bool TakeNextArg = false;
// When using an integrated assembler, translate -Wa, and -Xassembler
// options.
for (arg_iterator it = Args.filtered_begin(options::OPT_Wa_COMMA,
options::OPT_Xassembler),
ie = Args.filtered_end(); it != ie; ++it) {
const Arg *A = *it;
A->claim();
for (unsigned i = 0, e = A->getNumValues(); i != e; ++i) {
StringRef Value = A->getValue(i);
if (TakeNextArg) {
CmdArgs.push_back(Value.data());
TakeNextArg = false;
continue;
}
if (Value == "-force_cpusubtype_ALL") {
// Do nothing, this is the default and we don't support anything else.
} else if (Value == "-L") {
CmdArgs.push_back("-msave-temp-labels");
} else if (Value == "--fatal-warnings") {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-fatal-assembler-warnings");
} else if (Value == "--noexecstack") {
CmdArgs.push_back("-mnoexecstack");
} else if (Value.startswith("-I")) {
CmdArgs.push_back(Value.data());
// We need to consume the next argument if the current arg is a plain
// -I. The next arg will be the include directory.
if (Value == "-I")
TakeNextArg = true;
} else {
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Value;
}
}
}
}
static void addProfileRTLinux(
const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) {
if (!(Args.hasArg(options::OPT_fprofile_arcs) ||
Args.hasArg(options::OPT_fprofile_generate) ||
Args.hasArg(options::OPT_fcreate_profile) ||
Args.hasArg(options::OPT_coverage)))
return;
// The profile runtime is located in the Linux library directory and has name
// "libclang_rt.profile-<ArchName>.a".
SmallString<128> LibProfile(TC.getDriver().ResourceDir);
llvm::sys::path::append(
LibProfile, "lib", "linux",
Twine("libclang_rt.profile-") + TC.getArchName() + ".a");
CmdArgs.push_back(Args.MakeArgString(LibProfile));
}
static void addSanitizerRTLinkFlagsLinux(
const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs,
const StringRef Sanitizer, bool BeforeLibStdCXX,
bool ExportSymbols = true) {
// Sanitizer runtime is located in the Linux library directory and
// has name "libclang_rt.<Sanitizer>-<ArchName>.a".
SmallString<128> LibSanitizer(TC.getDriver().ResourceDir);
llvm::sys::path::append(
LibSanitizer, "lib", "linux",
(Twine("libclang_rt.") + Sanitizer + "-" + TC.getArchName() + ".a"));
// Sanitizer runtime may need to come before -lstdc++ (or -lc++, libstdc++.a,
// etc.) so that the linker picks custom versions of the global 'operator
// new' and 'operator delete' symbols. We take the extreme (but simple)
// strategy of inserting it at the front of the link command. It also
// needs to be forced to end up in the executable, so wrap it in
// whole-archive.
SmallVector<const char *, 3> LibSanitizerArgs;
LibSanitizerArgs.push_back("-whole-archive");
LibSanitizerArgs.push_back(Args.MakeArgString(LibSanitizer));
LibSanitizerArgs.push_back("-no-whole-archive");
CmdArgs.insert(BeforeLibStdCXX ? CmdArgs.begin() : CmdArgs.end(),
LibSanitizerArgs.begin(), LibSanitizerArgs.end());
CmdArgs.push_back("-lpthread");
CmdArgs.push_back("-lrt");
CmdArgs.push_back("-ldl");
CmdArgs.push_back("-lm");
// If possible, use a dynamic symbols file to export the symbols from the
// runtime library. If we can't do so, use -export-dynamic instead to export
// all symbols from the binary.
if (ExportSymbols) {
if (llvm::sys::fs::exists(LibSanitizer + ".syms"))
CmdArgs.push_back(
Args.MakeArgString("--dynamic-list=" + LibSanitizer + ".syms"));
else
CmdArgs.push_back("-export-dynamic");
}
}
/// If AddressSanitizer is enabled, add appropriate linker flags (Linux).
/// This needs to be called before we add the C run-time (malloc, etc).
static void addAsanRTLinux(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
if (TC.getTriple().getEnvironment() == llvm::Triple::Android) {
SmallString<128> LibAsan(TC.getDriver().ResourceDir);
llvm::sys::path::append(LibAsan, "lib", "linux",
(Twine("libclang_rt.asan-") +
TC.getArchName() + "-android.so"));
CmdArgs.insert(CmdArgs.begin(), Args.MakeArgString(LibAsan));
} else {
if (!Args.hasArg(options::OPT_shared))
addSanitizerRTLinkFlagsLinux(TC, Args, CmdArgs, "asan", true);
}
}
/// If ThreadSanitizer is enabled, add appropriate linker flags (Linux).
/// This needs to be called before we add the C run-time (malloc, etc).
static void addTsanRTLinux(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
if (!Args.hasArg(options::OPT_shared))
addSanitizerRTLinkFlagsLinux(TC, Args, CmdArgs, "tsan", true);
}
/// If MemorySanitizer is enabled, add appropriate linker flags (Linux).
/// This needs to be called before we add the C run-time (malloc, etc).
static void addMsanRTLinux(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
if (!Args.hasArg(options::OPT_shared))
addSanitizerRTLinkFlagsLinux(TC, Args, CmdArgs, "msan", true);
}
/// If LeakSanitizer is enabled, add appropriate linker flags (Linux).
/// This needs to be called before we add the C run-time (malloc, etc).
static void addLsanRTLinux(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
if (!Args.hasArg(options::OPT_shared))
addSanitizerRTLinkFlagsLinux(TC, Args, CmdArgs, "lsan", true);
}
/// If UndefinedBehaviorSanitizer is enabled, add appropriate linker flags
/// (Linux).
static void addUbsanRTLinux(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs, bool IsCXX,
bool HasOtherSanitizerRt) {
// Need a copy of sanitizer_common. This could come from another sanitizer
// runtime; if we're not including one, include our own copy.
if (!HasOtherSanitizerRt)
addSanitizerRTLinkFlagsLinux(TC, Args, CmdArgs, "san", true, false);
addSanitizerRTLinkFlagsLinux(TC, Args, CmdArgs, "ubsan", false);
// Only include the bits of the runtime which need a C++ ABI library if
// we're linking in C++ mode.
if (IsCXX)
addSanitizerRTLinkFlagsLinux(TC, Args, CmdArgs, "ubsan_cxx", false);
}
static void addDfsanRTLinux(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
if (!Args.hasArg(options::OPT_shared))
addSanitizerRTLinkFlagsLinux(TC, Args, CmdArgs, "dfsan", true);
}
static bool shouldUseFramePointerForTarget(const ArgList &Args,
const llvm::Triple &Triple) {
switch (Triple.getArch()) {
// Don't use a frame pointer on linux if optimizing for certain targets.
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::systemz:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
if (Triple.isOSLinux())
if (Arg *A = Args.getLastArg(options::OPT_O_Group))
if (!A->getOption().matches(options::OPT_O0))
return false;
return true;
case llvm::Triple::xcore:
return false;
default:
return true;
}
}
static bool shouldUseFramePointer(const ArgList &Args,
const llvm::Triple &Triple) {
if (Arg *A = Args.getLastArg(options::OPT_fno_omit_frame_pointer,
options::OPT_fomit_frame_pointer))
return A->getOption().matches(options::OPT_fno_omit_frame_pointer);
return shouldUseFramePointerForTarget(Args, Triple);
}
static bool shouldUseLeafFramePointer(const ArgList &Args,
const llvm::Triple &Triple) {
if (Arg *A = Args.getLastArg(options::OPT_mno_omit_leaf_frame_pointer,
options::OPT_momit_leaf_frame_pointer))
return A->getOption().matches(options::OPT_mno_omit_leaf_frame_pointer);
return shouldUseFramePointerForTarget(Args, Triple);
}
/// Add a CC1 option to specify the debug compilation directory.
static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs) {
SmallString<128> cwd;
if (!llvm::sys::fs::current_path(cwd)) {
CmdArgs.push_back("-fdebug-compilation-dir");
CmdArgs.push_back(Args.MakeArgString(cwd));
}
}
static const char *SplitDebugName(const ArgList &Args,
const InputInfoList &Inputs) {
Arg *FinalOutput = Args.getLastArg(options::OPT_o);
if (FinalOutput && Args.hasArg(options::OPT_c)) {
SmallString<128> T(FinalOutput->getValue());
llvm::sys::path::replace_extension(T, "dwo");
return Args.MakeArgString(T);
} else {
// Use the compilation dir.
SmallString<128> T(Args.getLastArgValue(options::OPT_fdebug_compilation_dir));
SmallString<128> F(llvm::sys::path::stem(Inputs[0].getBaseInput()));
llvm::sys::path::replace_extension(F, "dwo");
T += F;
return Args.MakeArgString(F);
}
}
static void SplitDebugInfo(const ToolChain &TC, Compilation &C,
const Tool &T, const JobAction &JA,
const ArgList &Args, const InputInfo &Output,
const char *OutFile) {
ArgStringList ExtractArgs;
ExtractArgs.push_back("--extract-dwo");
ArgStringList StripArgs;
StripArgs.push_back("--strip-dwo");
// Grabbing the output of the earlier compile step.
StripArgs.push_back(Output.getFilename());
ExtractArgs.push_back(Output.getFilename());
ExtractArgs.push_back(OutFile);
const char *Exec =
Args.MakeArgString(TC.GetProgramPath("objcopy"));
// First extract the dwo sections.
C.addCommand(new Command(JA, T, Exec, ExtractArgs));
// Then remove them from the original .o file.
C.addCommand(new Command(JA, T, Exec, StripArgs));
}
-static bool isOptimizationLevelFast(const ArgList &Args) {
- if (Arg *A = Args.getLastArg(options::OPT_O_Group))
- if (A->getOption().matches(options::OPT_Ofast))
- return true;
- return false;
-}
-
/// \brief Vectorize at all optimization levels greater than 1 except for -Oz.
static bool shouldEnableVectorizerAtOLevel(const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
if (A->getOption().matches(options::OPT_O4) ||
A->getOption().matches(options::OPT_Ofast))
return true;
if (A->getOption().matches(options::OPT_O0))
return false;
assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");
// Vectorize -Os.
StringRef S(A->getValue());
if (S == "s")
return true;
// Don't vectorize -Oz.
if (S == "z")
return false;
unsigned OptLevel = 0;
if (S.getAsInteger(10, OptLevel))
return false;
return OptLevel > 1;
}
return false;
}
void Clang::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
bool KernelOrKext = Args.hasArg(options::OPT_mkernel,
options::OPT_fapple_kext);
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
// Invoke ourselves in -cc1 mode.
//
// FIXME: Implement custom jobs for internal actions.
CmdArgs.push_back("-cc1");
// Add the "effective" target triple.
CmdArgs.push_back("-triple");
std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
CmdArgs.push_back(Args.MakeArgString(TripleStr));
// Select the appropriate action.
RewriteKind rewriteKind = RK_None;
if (isa<AnalyzeJobAction>(JA)) {
assert(JA.getType() == types::TY_Plist && "Invalid output type.");
CmdArgs.push_back("-analyze");
} else if (isa<MigrateJobAction>(JA)) {
CmdArgs.push_back("-migrate");
} else if (isa<PreprocessJobAction>(JA)) {
if (Output.getType() == types::TY_Dependencies)
CmdArgs.push_back("-Eonly");
else {
CmdArgs.push_back("-E");
if (Args.hasArg(options::OPT_rewrite_objc) &&
!Args.hasArg(options::OPT_g_Group))
CmdArgs.push_back("-P");
}
} else if (isa<AssembleJobAction>(JA)) {
CmdArgs.push_back("-emit-obj");
CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D);
// Also ignore explicit -force_cpusubtype_ALL option.
(void) Args.hasArg(options::OPT_force__cpusubtype__ALL);
} else if (isa<PrecompileJobAction>(JA)) {
// Use PCH if the user requested it.
bool UsePCH = D.CCCUsePCH;
if (JA.getType() == types::TY_Nothing)
CmdArgs.push_back("-fsyntax-only");
else if (UsePCH)
CmdArgs.push_back("-emit-pch");
else
CmdArgs.push_back("-emit-pth");
} else {
assert(isa<CompileJobAction>(JA) && "Invalid action for clang tool.");
if (JA.getType() == types::TY_Nothing) {
CmdArgs.push_back("-fsyntax-only");
} else if (JA.getType() == types::TY_LLVM_IR ||
JA.getType() == types::TY_LTO_IR) {
CmdArgs.push_back("-emit-llvm");
} else if (JA.getType() == types::TY_LLVM_BC ||
JA.getType() == types::TY_LTO_BC) {
CmdArgs.push_back("-emit-llvm-bc");
} else if (JA.getType() == types::TY_PP_Asm) {
CmdArgs.push_back("-S");
} else if (JA.getType() == types::TY_AST) {
CmdArgs.push_back("-emit-pch");
} else if (JA.getType() == types::TY_ModuleFile) {
CmdArgs.push_back("-module-file-info");
} else if (JA.getType() == types::TY_RewrittenObjC) {
CmdArgs.push_back("-rewrite-objc");
rewriteKind = RK_NonFragile;
} else if (JA.getType() == types::TY_RewrittenLegacyObjC) {
CmdArgs.push_back("-rewrite-objc");
rewriteKind = RK_Fragile;
} else {
assert(JA.getType() == types::TY_PP_Asm &&
"Unexpected output type!");
}
}
// The make clang go fast button.
CmdArgs.push_back("-disable-free");
// Disable the verification pass in -asserts builds.
#ifdef NDEBUG
CmdArgs.push_back("-disable-llvm-verifier");
#endif
// Set the main file name, so that debug info works even with
// -save-temps.
CmdArgs.push_back("-main-file-name");
CmdArgs.push_back(getBaseInputName(Args, Inputs));
// Some flags which affect the language (via preprocessor
// defines).
if (Args.hasArg(options::OPT_static))
CmdArgs.push_back("-static-define");
if (isa<AnalyzeJobAction>(JA)) {
// Enable region store model by default.
CmdArgs.push_back("-analyzer-store=region");
// Treat blocks as analysis entry points.
CmdArgs.push_back("-analyzer-opt-analyze-nested-blocks");
CmdArgs.push_back("-analyzer-eagerly-assume");
// Add default argument set.
if (!Args.hasArg(options::OPT__analyzer_no_default_checks)) {
CmdArgs.push_back("-analyzer-checker=core");
if (getToolChain().getTriple().getOS() != llvm::Triple::Win32)
CmdArgs.push_back("-analyzer-checker=unix");
if (getToolChain().getTriple().getVendor() == llvm::Triple::Apple)
CmdArgs.push_back("-analyzer-checker=osx");
CmdArgs.push_back("-analyzer-checker=deadcode");
if (types::isCXX(Inputs[0].getType()))
CmdArgs.push_back("-analyzer-checker=cplusplus");
// Enable the following experimental checkers for testing.
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.UncheckedReturn");
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw");
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets");
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp");
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mkstemp");
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.vfork");
}
// Set the output format. The default is plist, for (lame) historical
// reasons.
CmdArgs.push_back("-analyzer-output");
if (Arg *A = Args.getLastArg(options::OPT__analyzer_output))
CmdArgs.push_back(A->getValue());
else
CmdArgs.push_back("plist");
// Disable the presentation of standard compiler warnings when
// using --analyze. We only want to show static analyzer diagnostics
// or frontend errors.
CmdArgs.push_back("-w");
// Add -Xanalyzer arguments when running as analyzer.
Args.AddAllArgValues(CmdArgs, options::OPT_Xanalyzer);
}
CheckCodeGenerationOptions(D, Args);
bool PIE = getToolChain().isPIEDefault();
bool PIC = PIE || getToolChain().isPICDefault();
bool IsPICLevelTwo = PIC;
// For the PIC and PIE flag options, this logic is different from the
// legacy logic in very old versions of GCC, as that logic was just
// a bug no one had ever fixed. This logic is both more rational and
// consistent with GCC's new logic now that the bugs are fixed. The last
// argument relating to either PIC or PIE wins, and no other argument is
// used. If the last argument is any flavor of the '-fno-...' arguments,
// both PIC and PIE are disabled. Any PIE option implicitly enables PIC
// at the same level.
Arg *LastPICArg =Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
options::OPT_fpic, options::OPT_fno_pic,
options::OPT_fPIE, options::OPT_fno_PIE,
options::OPT_fpie, options::OPT_fno_pie);
// Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness
// is forced, then neither PIC nor PIE flags will have no effect.
if (!getToolChain().isPICDefaultForced()) {
if (LastPICArg) {
Option O = LastPICArg->getOption();
if (O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic) ||
O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) {
PIE = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie);
PIC = PIE || O.matches(options::OPT_fPIC) ||
O.matches(options::OPT_fpic);
IsPICLevelTwo = O.matches(options::OPT_fPIE) ||
O.matches(options::OPT_fPIC);
} else {
PIE = PIC = false;
}
}
}
// Introduce a Darwin-specific hack. If the default is PIC but the flags
// specified while enabling PIC enabled level 1 PIC, just force it back to
// level 2 PIC instead. This matches the behavior of Darwin GCC (based on my
// informal testing).
if (PIC && getToolChain().getTriple().isOSDarwin())
IsPICLevelTwo |= getToolChain().isPICDefault();
// Note that these flags are trump-cards. Regardless of the order w.r.t. the
// PIC or PIE options above, if these show up, PIC is disabled.
llvm::Triple Triple(TripleStr);
if (KernelOrKext &&
(!Triple.isiOS() || Triple.isOSVersionLT(6)))
PIC = PIE = false;
if (Args.hasArg(options::OPT_static))
PIC = PIE = false;
if (Arg *A = Args.getLastArg(options::OPT_mdynamic_no_pic)) {
// This is a very special mode. It trumps the other modes, almost no one
// uses it, and it isn't even valid on any OS but Darwin.
if (!getToolChain().getTriple().isOSDarwin())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getSpelling() << getToolChain().getTriple().str();
// FIXME: Warn when this flag trumps some other PIC or PIE flag.
CmdArgs.push_back("-mrelocation-model");
CmdArgs.push_back("dynamic-no-pic");
// Only a forced PIC mode can cause the actual compile to have PIC defines
// etc., no flags are sufficient. This behavior was selected to closely
// match that of llvm-gcc and Apple GCC before that.
if (getToolChain().isPICDefault() && getToolChain().isPICDefaultForced()) {
CmdArgs.push_back("-pic-level");
CmdArgs.push_back("2");
}
} else {
// Currently, LLVM only knows about PIC vs. static; the PIE differences are
// handled in Clang's IRGen by the -pie-level flag.
CmdArgs.push_back("-mrelocation-model");
CmdArgs.push_back(PIC ? "pic" : "static");
if (PIC) {
CmdArgs.push_back("-pic-level");
CmdArgs.push_back(IsPICLevelTwo ? "2" : "1");
if (PIE) {
CmdArgs.push_back("-pie-level");
CmdArgs.push_back(IsPICLevelTwo ? "2" : "1");
}
}
}
if (!Args.hasFlag(options::OPT_fmerge_all_constants,
options::OPT_fno_merge_all_constants))
CmdArgs.push_back("-fno-merge-all-constants");
// LLVM Code Generator Options.
if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
CmdArgs.push_back("-mregparm");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_fpcc_struct_return,
options::OPT_freg_struct_return)) {
if (getToolChain().getArch() != llvm::Triple::x86) {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getSpelling() << getToolChain().getTriple().str();
} else if (A->getOption().matches(options::OPT_fpcc_struct_return)) {
CmdArgs.push_back("-fpcc-struct-return");
} else {
assert(A->getOption().matches(options::OPT_freg_struct_return));
CmdArgs.push_back("-freg-struct-return");
}
}
if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false))
CmdArgs.push_back("-mrtd");
if (shouldUseFramePointer(Args, getToolChain().getTriple()))
CmdArgs.push_back("-mdisable-fp-elim");
if (!Args.hasFlag(options::OPT_fzero_initialized_in_bss,
options::OPT_fno_zero_initialized_in_bss))
CmdArgs.push_back("-mno-zero-initialized-in-bss");
bool OFastEnabled = isOptimizationLevelFast(Args);
// If -Ofast is the optimization level, then -fstrict-aliasing should be
// enabled. This alias option is being used to simplify the hasFlag logic.
OptSpecifier StrictAliasingAliasOption = OFastEnabled ? options::OPT_Ofast :
options::OPT_fstrict_aliasing;
if (!Args.hasFlag(options::OPT_fstrict_aliasing, StrictAliasingAliasOption,
options::OPT_fno_strict_aliasing, true))
CmdArgs.push_back("-relaxed-aliasing");
if (!Args.hasFlag(options::OPT_fstruct_path_tbaa,
options::OPT_fno_struct_path_tbaa))
CmdArgs.push_back("-no-struct-path-tbaa");
if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums,
false))
CmdArgs.push_back("-fstrict-enums");
if (!Args.hasFlag(options::OPT_foptimize_sibling_calls,
options::OPT_fno_optimize_sibling_calls))
CmdArgs.push_back("-mdisable-tail-calls");
// Handle segmented stacks.
if (Args.hasArg(options::OPT_fsplit_stack))
CmdArgs.push_back("-split-stacks");
// If -Ofast is the optimization level, then -ffast-math should be enabled.
// This alias option is being used to simplify the getLastArg logic.
OptSpecifier FastMathAliasOption = OFastEnabled ? options::OPT_Ofast :
options::OPT_ffast_math;
// Handle various floating point optimization flags, mapping them to the
// appropriate LLVM code generation flags. The pattern for all of these is to
// default off the codegen optimizations, and if any flag enables them and no
// flag disables them after the flag enabling them, enable the codegen
// optimization. This is complicated by several "umbrella" flags.
if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
options::OPT_fno_fast_math,
options::OPT_ffinite_math_only,
options::OPT_fno_finite_math_only,
options::OPT_fhonor_infinities,
options::OPT_fno_honor_infinities))
if (A->getOption().getID() != options::OPT_fno_fast_math &&
A->getOption().getID() != options::OPT_fno_finite_math_only &&
A->getOption().getID() != options::OPT_fhonor_infinities)
CmdArgs.push_back("-menable-no-infs");
if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
options::OPT_fno_fast_math,
options::OPT_ffinite_math_only,
options::OPT_fno_finite_math_only,
options::OPT_fhonor_nans,
options::OPT_fno_honor_nans))
if (A->getOption().getID() != options::OPT_fno_fast_math &&
A->getOption().getID() != options::OPT_fno_finite_math_only &&
A->getOption().getID() != options::OPT_fhonor_nans)
CmdArgs.push_back("-menable-no-nans");
// -fmath-errno is the default on some platforms, e.g. BSD-derived OSes.
bool MathErrno = getToolChain().IsMathErrnoDefault();
if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
options::OPT_fno_fast_math,
options::OPT_fmath_errno,
options::OPT_fno_math_errno)) {
// Turning on -ffast_math (with either flag) removes the need for MathErrno.
// However, turning *off* -ffast_math merely restores the toolchain default
// (which may be false).
if (A->getOption().getID() == options::OPT_fno_math_errno ||
A->getOption().getID() == options::OPT_ffast_math ||
A->getOption().getID() == options::OPT_Ofast)
MathErrno = false;
else if (A->getOption().getID() == options::OPT_fmath_errno)
MathErrno = true;
}
if (MathErrno)
CmdArgs.push_back("-fmath-errno");
// There are several flags which require disabling very specific
// optimizations. Any of these being disabled forces us to turn off the
// entire set of LLVM optimizations, so collect them through all the flag
// madness.
bool AssociativeMath = false;
if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
options::OPT_fno_fast_math,
options::OPT_funsafe_math_optimizations,
options::OPT_fno_unsafe_math_optimizations,
options::OPT_fassociative_math,
options::OPT_fno_associative_math))
if (A->getOption().getID() != options::OPT_fno_fast_math &&
A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
A->getOption().getID() != options::OPT_fno_associative_math)
AssociativeMath = true;
bool ReciprocalMath = false;
if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
options::OPT_fno_fast_math,
options::OPT_funsafe_math_optimizations,
options::OPT_fno_unsafe_math_optimizations,
options::OPT_freciprocal_math,
options::OPT_fno_reciprocal_math))
if (A->getOption().getID() != options::OPT_fno_fast_math &&
A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
A->getOption().getID() != options::OPT_fno_reciprocal_math)
ReciprocalMath = true;
bool SignedZeros = true;
if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
options::OPT_fno_fast_math,
options::OPT_funsafe_math_optimizations,
options::OPT_fno_unsafe_math_optimizations,
options::OPT_fsigned_zeros,
options::OPT_fno_signed_zeros))
if (A->getOption().getID() != options::OPT_fno_fast_math &&
A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
A->getOption().getID() != options::OPT_fsigned_zeros)
SignedZeros = false;
bool TrappingMath = true;
if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
options::OPT_fno_fast_math,
options::OPT_funsafe_math_optimizations,
options::OPT_fno_unsafe_math_optimizations,
options::OPT_ftrapping_math,
options::OPT_fno_trapping_math))
if (A->getOption().getID() != options::OPT_fno_fast_math &&
A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
A->getOption().getID() != options::OPT_ftrapping_math)
TrappingMath = false;
if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
!TrappingMath)
CmdArgs.push_back("-menable-unsafe-fp-math");
// Validate and pass through -fp-contract option.
if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
options::OPT_fno_fast_math,
options::OPT_ffp_contract)) {
if (A->getOption().getID() == options::OPT_ffp_contract) {
StringRef Val = A->getValue();
if (Val == "fast" || Val == "on" || Val == "off") {
CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + Val));
} else {
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Val;
}
} else if (A->getOption().matches(options::OPT_ffast_math) ||
(OFastEnabled && A->getOption().matches(options::OPT_Ofast))) {
// If fast-math is set then set the fp-contract mode to fast.
CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast"));
}
}
// We separately look for the '-ffast-math' and '-ffinite-math-only' flags,
// and if we find them, tell the frontend to provide the appropriate
// preprocessor macros. This is distinct from enabling any optimizations as
// these options induce language changes which must survive serialization
// and deserialization, etc.
if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
options::OPT_fno_fast_math))
if (!A->getOption().matches(options::OPT_fno_fast_math))
CmdArgs.push_back("-ffast-math");
if (Arg *A = Args.getLastArg(options::OPT_ffinite_math_only, options::OPT_fno_fast_math))
if (A->getOption().matches(options::OPT_ffinite_math_only))
CmdArgs.push_back("-ffinite-math-only");
// Decide whether to use verbose asm. Verbose assembly is the default on
// toolchains which have the integrated assembler on by default.
bool IsVerboseAsmDefault = getToolChain().IsIntegratedAssemblerDefault();
if (Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm,
IsVerboseAsmDefault) ||
Args.hasArg(options::OPT_dA))
CmdArgs.push_back("-masm-verbose");
if (Args.hasArg(options::OPT_fdebug_pass_structure)) {
CmdArgs.push_back("-mdebug-pass");
CmdArgs.push_back("Structure");
}
if (Args.hasArg(options::OPT_fdebug_pass_arguments)) {
CmdArgs.push_back("-mdebug-pass");
CmdArgs.push_back("Arguments");
}
// Enable -mconstructor-aliases except on darwin, where we have to
// work around a linker bug; see <rdar://problem/7651567>.
if (!getToolChain().getTriple().isOSDarwin())
CmdArgs.push_back("-mconstructor-aliases");
// Darwin's kernel doesn't support guard variables; just die if we
// try to use them.
if (KernelOrKext && getToolChain().getTriple().isOSDarwin())
CmdArgs.push_back("-fforbid-guard-variables");
if (Args.hasArg(options::OPT_mms_bitfields)) {
CmdArgs.push_back("-mms-bitfields");
}
// This is a coarse approximation of what llvm-gcc actually does, both
// -fasynchronous-unwind-tables and -fnon-call-exceptions interact in more
// complicated ways.
bool AsynchronousUnwindTables =
Args.hasFlag(options::OPT_fasynchronous_unwind_tables,
options::OPT_fno_asynchronous_unwind_tables,
getToolChain().IsUnwindTablesDefault() &&
!KernelOrKext);
if (Args.hasFlag(options::OPT_funwind_tables, options::OPT_fno_unwind_tables,
AsynchronousUnwindTables))
CmdArgs.push_back("-munwind-tables");
getToolChain().addClangTargetOptions(Args, CmdArgs);
if (Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
CmdArgs.push_back("-mlimit-float-precision");
CmdArgs.push_back(A->getValue());
}
// FIXME: Handle -mtune=.
(void) Args.hasArg(options::OPT_mtune_EQ);
if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) {
CmdArgs.push_back("-mcode-model");
CmdArgs.push_back(A->getValue());
}
// Add the target cpu
std::string ETripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
llvm::Triple ETriple(ETripleStr);
std::string CPU = getCPUName(Args, ETriple);
if (!CPU.empty()) {
CmdArgs.push_back("-target-cpu");
CmdArgs.push_back(Args.MakeArgString(CPU));
}
if (const Arg *A = Args.getLastArg(options::OPT_mfpmath_EQ)) {
CmdArgs.push_back("-mfpmath");
CmdArgs.push_back(A->getValue());
}
// Add the target features
getTargetFeatures(D, ETriple, Args, CmdArgs);
// Add target specific flags.
switch(getToolChain().getArch()) {
default:
break;
case llvm::Triple::arm:
case llvm::Triple::thumb:
AddARMTargetArgs(Args, CmdArgs, KernelOrKext);
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
AddMIPSTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::sparc:
AddSparcTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
AddX86TargetArgs(Args, CmdArgs);
break;
case llvm::Triple::hexagon:
AddHexagonTargetArgs(Args, CmdArgs);
break;
}
// Add clang-cl arguments.
if (getToolChain().getDriver().IsCLMode())
AddClangCLArgs(Args, CmdArgs);
// Pass the linker version in use.
if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
CmdArgs.push_back("-target-linker-version");
CmdArgs.push_back(A->getValue());
}
if (!shouldUseLeafFramePointer(Args, getToolChain().getTriple()))
CmdArgs.push_back("-momit-leaf-frame-pointer");
// Explicitly error on some things we know we don't support and can't just
// ignore.
types::ID InputType = Inputs[0].getType();
if (!Args.hasArg(options::OPT_fallow_unsupported)) {
Arg *Unsupported;
if (types::isCXX(InputType) &&
getToolChain().getTriple().isOSDarwin() &&
getToolChain().getArch() == llvm::Triple::x86) {
if ((Unsupported = Args.getLastArg(options::OPT_fapple_kext)) ||
(Unsupported = Args.getLastArg(options::OPT_mkernel)))
D.Diag(diag::err_drv_clang_unsupported_opt_cxx_darwin_i386)
<< Unsupported->getOption().getName();
}
}
Args.AddAllArgs(CmdArgs, options::OPT_v);
Args.AddLastArg(CmdArgs, options::OPT_H);
if (D.CCPrintHeaders && !D.CCGenDiagnostics) {
CmdArgs.push_back("-header-include-file");
CmdArgs.push_back(D.CCPrintHeadersFilename ?
D.CCPrintHeadersFilename : "-");
}
Args.AddLastArg(CmdArgs, options::OPT_P);
Args.AddLastArg(CmdArgs, options::OPT_print_ivar_layout);
if (D.CCLogDiagnostics && !D.CCGenDiagnostics) {
CmdArgs.push_back("-diagnostic-log-file");
CmdArgs.push_back(D.CCLogDiagnosticsFilename ?
D.CCLogDiagnosticsFilename : "-");
}
// Use the last option from "-g" group. "-gline-tables-only"
// is preserved, all other debug options are substituted with "-g".
Args.ClaimAllArgs(options::OPT_g_Group);
if (Arg *A = Args.getLastArg(options::OPT_g_Group)) {
if (A->getOption().matches(options::OPT_gline_tables_only))
CmdArgs.push_back("-gline-tables-only");
else if (A->getOption().matches(options::OPT_gdwarf_2))
CmdArgs.push_back("-gdwarf-2");
else if (A->getOption().matches(options::OPT_gdwarf_3))
CmdArgs.push_back("-gdwarf-3");
else if (A->getOption().matches(options::OPT_gdwarf_4))
CmdArgs.push_back("-gdwarf-4");
else if (!A->getOption().matches(options::OPT_g0) &&
!A->getOption().matches(options::OPT_ggdb0)) {
// Default is dwarf-2 for darwin and FreeBSD.
const llvm::Triple &Triple = getToolChain().getTriple();
if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::FreeBSD)
CmdArgs.push_back("-gdwarf-2");
else
CmdArgs.push_back("-g");
}
}
// We ignore flags -gstrict-dwarf and -grecord-gcc-switches for now.
Args.ClaimAllArgs(options::OPT_g_flags_Group);
if (Args.hasArg(options::OPT_gcolumn_info))
CmdArgs.push_back("-dwarf-column-info");
// FIXME: Move backend command line options to the module.
// -gsplit-dwarf should turn on -g and enable the backend dwarf
// splitting and extraction.
// FIXME: Currently only works on Linux.
if (getToolChain().getTriple().isOSLinux() &&
Args.hasArg(options::OPT_gsplit_dwarf)) {
CmdArgs.push_back("-g");
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-split-dwarf=Enable");
}
// -ggnu-pubnames turns on gnu style pubnames in the backend.
if (Args.hasArg(options::OPT_ggnu_pubnames)) {
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-generate-gnu-dwarf-pub-sections");
}
Args.AddAllArgs(CmdArgs, options::OPT_fdebug_types_section);
Args.AddAllArgs(CmdArgs, options::OPT_ffunction_sections);
Args.AddAllArgs(CmdArgs, options::OPT_fdata_sections);
Args.AddAllArgs(CmdArgs, options::OPT_finstrument_functions);
if (Args.hasArg(options::OPT_ftest_coverage) ||
Args.hasArg(options::OPT_coverage))
CmdArgs.push_back("-femit-coverage-notes");
if (Args.hasArg(options::OPT_fprofile_arcs) ||
Args.hasArg(options::OPT_coverage))
CmdArgs.push_back("-femit-coverage-data");
if (C.getArgs().hasArg(options::OPT_c) ||
C.getArgs().hasArg(options::OPT_S)) {
if (Output.isFilename()) {
CmdArgs.push_back("-coverage-file");
SmallString<128> CoverageFilename(Output.getFilename());
if (llvm::sys::path::is_relative(CoverageFilename.str())) {
SmallString<128> Pwd;
if (!llvm::sys::fs::current_path(Pwd)) {
llvm::sys::path::append(Pwd, CoverageFilename.str());
CoverageFilename.swap(Pwd);
}
}
CmdArgs.push_back(Args.MakeArgString(CoverageFilename));
}
}
// Pass options for controlling the default header search paths.
if (Args.hasArg(options::OPT_nostdinc)) {
CmdArgs.push_back("-nostdsysteminc");
CmdArgs.push_back("-nobuiltininc");
} else {
if (Args.hasArg(options::OPT_nostdlibinc))
CmdArgs.push_back("-nostdsysteminc");
Args.AddLastArg(CmdArgs, options::OPT_nostdincxx);
Args.AddLastArg(CmdArgs, options::OPT_nobuiltininc);
}
// Pass the path to compiler resource files.
CmdArgs.push_back("-resource-dir");
CmdArgs.push_back(D.ResourceDir.c_str());
Args.AddLastArg(CmdArgs, options::OPT_working_directory);
bool ARCMTEnabled = false;
if (!Args.hasArg(options::OPT_fno_objc_arc, options::OPT_fobjc_arc)) {
if (const Arg *A = Args.getLastArg(options::OPT_ccc_arcmt_check,
options::OPT_ccc_arcmt_modify,
options::OPT_ccc_arcmt_migrate)) {
ARCMTEnabled = true;
switch (A->getOption().getID()) {
default:
llvm_unreachable("missed a case");
case options::OPT_ccc_arcmt_check:
CmdArgs.push_back("-arcmt-check");
break;
case options::OPT_ccc_arcmt_modify:
CmdArgs.push_back("-arcmt-modify");
break;
case options::OPT_ccc_arcmt_migrate:
CmdArgs.push_back("-arcmt-migrate");
CmdArgs.push_back("-mt-migrate-directory");
CmdArgs.push_back(A->getValue());
Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_report_output);
Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_emit_arc_errors);
break;
}
}
} else {
Args.ClaimAllArgs(options::OPT_ccc_arcmt_check);
Args.ClaimAllArgs(options::OPT_ccc_arcmt_modify);
Args.ClaimAllArgs(options::OPT_ccc_arcmt_migrate);
}
if (const Arg *A = Args.getLastArg(options::OPT_ccc_objcmt_migrate)) {
if (ARCMTEnabled) {
D.Diag(diag::err_drv_argument_not_allowed_with)
<< A->getAsString(Args) << "-ccc-arcmt-migrate";
}
CmdArgs.push_back("-mt-migrate-directory");
CmdArgs.push_back(A->getValue());
if (!Args.hasArg(options::OPT_objcmt_migrate_literals,
options::OPT_objcmt_migrate_subscripting,
options::OPT_objcmt_migrate_property)) {
// None specified, means enable them all.
CmdArgs.push_back("-objcmt-migrate-literals");
CmdArgs.push_back("-objcmt-migrate-subscripting");
CmdArgs.push_back("-objcmt-migrate-property");
} else {
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property);
}
} else {
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_all);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readonly_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readwrite_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_annotation);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_instancetype);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_nsmacros);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_protocol_conformance);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_atomic_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_white_list_dir_path);
}
// Add preprocessing options like -I, -D, etc. if we are using the
// preprocessor.
//
// FIXME: Support -fpreprocessed
if (types::getPreprocessedType(InputType) != types::TY_INVALID)
AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs);
// Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes
// that "The compiler can only warn and ignore the option if not recognized".
// When building with ccache, it will pass -D options to clang even on
// preprocessed inputs and configure concludes that -fPIC is not supported.
Args.ClaimAllArgs(options::OPT_D);
// Manually translate -O4 to -O3; let clang reject others.
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
if (A->getOption().matches(options::OPT_O4)) {
CmdArgs.push_back("-O3");
D.Diag(diag::warn_O4_is_O3);
} else {
A->render(Args, CmdArgs);
}
}
// Don't warn about unused -flto. This can happen when we're preprocessing or
// precompiling.
Args.ClaimAllArgs(options::OPT_flto);
Args.AddAllArgs(CmdArgs, options::OPT_W_Group);
if (Args.hasFlag(options::OPT_pedantic, options::OPT_no_pedantic, false))
CmdArgs.push_back("-pedantic");
Args.AddLastArg(CmdArgs, options::OPT_pedantic_errors);
Args.AddLastArg(CmdArgs, options::OPT_w);
// Handle -{std, ansi, trigraphs} -- take the last of -{std, ansi}
// (-ansi is equivalent to -std=c89 or -std=c++98).
//
// If a std is supplied, only add -trigraphs if it follows the
// option.
if (Arg *Std = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi)) {
if (Std->getOption().matches(options::OPT_ansi))
if (types::isCXX(InputType))
CmdArgs.push_back("-std=c++98");
else
CmdArgs.push_back("-std=c89");
else
Std->render(Args, CmdArgs);
if (Arg *A = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi,
options::OPT_trigraphs))
if (A != Std)
A->render(Args, CmdArgs);
} else {
// Honor -std-default.
//
// FIXME: Clang doesn't correctly handle -std= when the input language
// doesn't match. For the time being just ignore this for C++ inputs;
// eventually we want to do all the standard defaulting here instead of
// splitting it between the driver and clang -cc1.
if (!types::isCXX(InputType))
Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ,
"-std=", /*Joined=*/true);
else if (getToolChain().getTriple().getOS() == llvm::Triple::Win32)
CmdArgs.push_back("-std=c++11");
Args.AddLastArg(CmdArgs, options::OPT_trigraphs);
}
// GCC's behavior for -Wwrite-strings is a bit strange:
// * In C, this "warning flag" changes the types of string literals from
// 'char[N]' to 'const char[N]', and thus triggers an unrelated warning
// for the discarded qualifier.
// * In C++, this is just a normal warning flag.
//
// Implementing this warning correctly in C is hard, so we follow GCC's
// behavior for now. FIXME: Directly diagnose uses of a string literal as
// a non-const char* in C, rather than using this crude hack.
if (!types::isCXX(InputType)) {
DiagnosticsEngine::Level DiagLevel = D.getDiags().getDiagnosticLevel(
diag::warn_deprecated_string_literal_conversion_c, SourceLocation());
if (DiagLevel > DiagnosticsEngine::Ignored)
CmdArgs.push_back("-fconst-strings");
}
// GCC provides a macro definition '__DEPRECATED' when -Wdeprecated is active
// during C++ compilation, which it is by default. GCC keeps this define even
// in the presence of '-w', match this behavior bug-for-bug.
if (types::isCXX(InputType) &&
Args.hasFlag(options::OPT_Wdeprecated, options::OPT_Wno_deprecated,
true)) {
CmdArgs.push_back("-fdeprecated-macro");
}
// Translate GCC's misnamer '-fasm' arguments to '-fgnu-keywords'.
if (Arg *Asm = Args.getLastArg(options::OPT_fasm, options::OPT_fno_asm)) {
if (Asm->getOption().matches(options::OPT_fasm))
CmdArgs.push_back("-fgnu-keywords");
else
CmdArgs.push_back("-fno-gnu-keywords");
}
if (ShouldDisableCFI(Args, getToolChain()))
CmdArgs.push_back("-fno-dwarf2-cfi-asm");
if (ShouldDisableDwarfDirectory(Args, getToolChain()))
CmdArgs.push_back("-fno-dwarf-directory-asm");
if (ShouldDisableAutolink(Args, getToolChain()))
CmdArgs.push_back("-fno-autolink");
// Add in -fdebug-compilation-dir if necessary.
addDebugCompDirArg(Args, CmdArgs);
if (Arg *A = Args.getLastArg(options::OPT_ftemplate_depth_,
options::OPT_ftemplate_depth_EQ)) {
CmdArgs.push_back("-ftemplate-depth");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_foperator_arrow_depth_EQ)) {
CmdArgs.push_back("-foperator-arrow-depth");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_depth_EQ)) {
CmdArgs.push_back("-fconstexpr-depth");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_steps_EQ)) {
CmdArgs.push_back("-fconstexpr-steps");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) {
CmdArgs.push_back("-fbracket-depth");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_Wlarge_by_value_copy_EQ,
options::OPT_Wlarge_by_value_copy_def)) {
if (A->getNumValues()) {
StringRef bytes = A->getValue();
CmdArgs.push_back(Args.MakeArgString("-Wlarge-by-value-copy=" + bytes));
} else
CmdArgs.push_back("-Wlarge-by-value-copy=64"); // default value
}
if (Args.hasArg(options::OPT_relocatable_pch))
CmdArgs.push_back("-relocatable-pch");
if (Arg *A = Args.getLastArg(options::OPT_fconstant_string_class_EQ)) {
CmdArgs.push_back("-fconstant-string-class");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_ftabstop_EQ)) {
CmdArgs.push_back("-ftabstop");
CmdArgs.push_back(A->getValue());
}
CmdArgs.push_back("-ferror-limit");
if (Arg *A = Args.getLastArg(options::OPT_ferror_limit_EQ))
CmdArgs.push_back(A->getValue());
else
CmdArgs.push_back("19");
if (Arg *A = Args.getLastArg(options::OPT_fmacro_backtrace_limit_EQ)) {
CmdArgs.push_back("-fmacro-backtrace-limit");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_ftemplate_backtrace_limit_EQ)) {
CmdArgs.push_back("-ftemplate-backtrace-limit");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_backtrace_limit_EQ)) {
CmdArgs.push_back("-fconstexpr-backtrace-limit");
CmdArgs.push_back(A->getValue());
}
// Pass -fmessage-length=.
CmdArgs.push_back("-fmessage-length");
if (Arg *A = Args.getLastArg(options::OPT_fmessage_length_EQ)) {
CmdArgs.push_back(A->getValue());
} else {
// If -fmessage-length=N was not specified, determine whether this is a
// terminal and, if so, implicitly define -fmessage-length appropriately.
unsigned N = llvm::sys::Process::StandardErrColumns();
CmdArgs.push_back(Args.MakeArgString(Twine(N)));
}
// -fvisibility= and -fvisibility-ms-compat are of a piece.
if (const Arg *A = Args.getLastArg(options::OPT_fvisibility_EQ,
options::OPT_fvisibility_ms_compat)) {
if (A->getOption().matches(options::OPT_fvisibility_EQ)) {
CmdArgs.push_back("-fvisibility");
CmdArgs.push_back(A->getValue());
} else {
assert(A->getOption().matches(options::OPT_fvisibility_ms_compat));
CmdArgs.push_back("-fvisibility");
CmdArgs.push_back("hidden");
CmdArgs.push_back("-ftype-visibility");
CmdArgs.push_back("default");
}
}
Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden);
Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ);
// -fhosted is default.
if (Args.hasFlag(options::OPT_ffreestanding, options::OPT_fhosted, false) ||
KernelOrKext)
CmdArgs.push_back("-ffreestanding");
// Forward -f (flag) options which we can pass directly.
Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls);
Args.AddLastArg(CmdArgs, options::OPT_fformat_extensions);
Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
Args.AddLastArg(CmdArgs, options::OPT_fstandalone_debug);
Args.AddLastArg(CmdArgs, options::OPT_fno_standalone_debug);
Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names);
// AltiVec language extensions aren't relevant for assembling.
if (!isa<PreprocessJobAction>(JA) ||
Output.getType() != types::TY_PP_Asm)
Args.AddLastArg(CmdArgs, options::OPT_faltivec);
Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree);
Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type);
const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs();
Sanitize.addArgs(Args, CmdArgs);
if (!Args.hasFlag(options::OPT_fsanitize_recover,
options::OPT_fno_sanitize_recover,
true))
CmdArgs.push_back("-fno-sanitize-recover");
if (Args.hasArg(options::OPT_fcatch_undefined_behavior) ||
Args.hasFlag(options::OPT_fsanitize_undefined_trap_on_error,
options::OPT_fno_sanitize_undefined_trap_on_error, false))
CmdArgs.push_back("-fsanitize-undefined-trap-on-error");
// Report an error for -faltivec on anything other than PowerPC.
if (const Arg *A = Args.getLastArg(options::OPT_faltivec))
if (!(getToolChain().getArch() == llvm::Triple::ppc ||
getToolChain().getArch() == llvm::Triple::ppc64 ||
getToolChain().getArch() == llvm::Triple::ppc64le))
D.Diag(diag::err_drv_argument_only_allowed_with)
<< A->getAsString(Args) << "ppc/ppc64/ppc64le";
if (getToolChain().SupportsProfiling())
Args.AddLastArg(CmdArgs, options::OPT_pg);
// -flax-vector-conversions is default.
if (!Args.hasFlag(options::OPT_flax_vector_conversions,
options::OPT_fno_lax_vector_conversions))
CmdArgs.push_back("-fno-lax-vector-conversions");
if (Args.getLastArg(options::OPT_fapple_kext))
CmdArgs.push_back("-fapple-kext");
if (Args.hasFlag(options::OPT_frewrite_includes,
options::OPT_fno_rewrite_includes, false))
CmdArgs.push_back("-frewrite-includes");
Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch);
Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info);
Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits);
Args.AddLastArg(CmdArgs, options::OPT_ftime_report);
Args.AddLastArg(CmdArgs, options::OPT_ftrapv);
if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) {
CmdArgs.push_back("-ftrapv-handler");
CmdArgs.push_back(A->getValue());
}
Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ);
// -fno-strict-overflow implies -fwrapv if it isn't disabled, but
// -fstrict-overflow won't turn off an explicitly enabled -fwrapv.
if (Arg *A = Args.getLastArg(options::OPT_fwrapv,
options::OPT_fno_wrapv)) {
if (A->getOption().matches(options::OPT_fwrapv))
CmdArgs.push_back("-fwrapv");
} else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow,
options::OPT_fno_strict_overflow)) {
if (A->getOption().matches(options::OPT_fno_strict_overflow))
CmdArgs.push_back("-fwrapv");
}
if (Arg *A = Args.getLastArg(options::OPT_freroll_loops,
options::OPT_fno_reroll_loops))
if (A->getOption().matches(options::OPT_freroll_loops))
CmdArgs.push_back("-freroll-loops");
Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings);
Args.AddLastArg(CmdArgs, options::OPT_funroll_loops,
options::OPT_fno_unroll_loops);
Args.AddLastArg(CmdArgs, options::OPT_pthread);
// -stack-protector=0 is default.
unsigned StackProtectorLevel = 0;
if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
options::OPT_fstack_protector_all,
options::OPT_fstack_protector)) {
if (A->getOption().matches(options::OPT_fstack_protector))
StackProtectorLevel = 1;
else if (A->getOption().matches(options::OPT_fstack_protector_all))
StackProtectorLevel = 2;
} else {
StackProtectorLevel =
getToolChain().GetDefaultStackProtectorLevel(KernelOrKext);
}
if (StackProtectorLevel) {
CmdArgs.push_back("-stack-protector");
CmdArgs.push_back(Args.MakeArgString(Twine(StackProtectorLevel)));
}
// --param ssp-buffer-size=
for (arg_iterator it = Args.filtered_begin(options::OPT__param),
ie = Args.filtered_end(); it != ie; ++it) {
StringRef Str((*it)->getValue());
if (Str.startswith("ssp-buffer-size=")) {
if (StackProtectorLevel) {
CmdArgs.push_back("-stack-protector-buffer-size");
// FIXME: Verify the argument is a valid integer.
CmdArgs.push_back(Args.MakeArgString(Str.drop_front(16)));
}
(*it)->claim();
}
}
// Translate -mstackrealign
if (Args.hasFlag(options::OPT_mstackrealign, options::OPT_mno_stackrealign,
false)) {
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-force-align-stack");
}
if (!Args.hasFlag(options::OPT_mno_stackrealign, options::OPT_mstackrealign,
false)) {
CmdArgs.push_back(Args.MakeArgString("-mstackrealign"));
}
if (Args.hasArg(options::OPT_mstack_alignment)) {
StringRef alignment = Args.getLastArgValue(options::OPT_mstack_alignment);
CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment));
}
// -mkernel implies -mstrict-align; don't add the redundant option.
if (!KernelOrKext) {
if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
options::OPT_munaligned_access)) {
if (A->getOption().matches(options::OPT_mno_unaligned_access)) {
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-arm-strict-align");
} else {
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-arm-no-strict-align");
}
}
}
if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
options::OPT_mno_restrict_it)) {
if (A->getOption().matches(options::OPT_mrestrict_it)) {
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-arm-restrict-it");
} else {
CmdArgs.push_back("-backend-option");
CmdArgs.push_back("-arm-no-restrict-it");
}
}
// Forward -f options with positive and negative forms; we translate
// these by hand.
if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) {
StringRef fname = A->getValue();
if (!llvm::sys::fs::exists(fname))
D.Diag(diag::err_drv_no_such_file) << fname;
else
A->render(Args, CmdArgs);
}
if (Args.hasArg(options::OPT_mkernel)) {
if (!Args.hasArg(options::OPT_fapple_kext) && types::isCXX(InputType))
CmdArgs.push_back("-fapple-kext");
if (!Args.hasArg(options::OPT_fbuiltin))
CmdArgs.push_back("-fno-builtin");
Args.ClaimAllArgs(options::OPT_fno_builtin);
}
// -fbuiltin is default.
else if (!Args.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin))
CmdArgs.push_back("-fno-builtin");
if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,
options::OPT_fno_assume_sane_operator_new))
CmdArgs.push_back("-fno-assume-sane-operator-new");
// -fblocks=0 is default.
if (Args.hasFlag(options::OPT_fblocks, options::OPT_fno_blocks,
getToolChain().IsBlocksDefault()) ||
(Args.hasArg(options::OPT_fgnu_runtime) &&
Args.hasArg(options::OPT_fobjc_nonfragile_abi) &&
!Args.hasArg(options::OPT_fno_blocks))) {
CmdArgs.push_back("-fblocks");
if (!Args.hasArg(options::OPT_fgnu_runtime) &&
!getToolChain().hasBlocksRuntime())
CmdArgs.push_back("-fblocks-runtime-optional");
}
// -fmodules enables modules (off by default). However, for C++/Objective-C++,
// users must also pass -fcxx-modules. The latter flag will disappear once the
// modules implementation is solid for C++/Objective-C++ programs as well.
bool HaveModules = false;
if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) {
bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules,
options::OPT_fno_cxx_modules,
false);
if (AllowedInCXX || !types::isCXX(InputType)) {
CmdArgs.push_back("-fmodules");
HaveModules = true;
}
}
// -fmodule-maps enables module map processing (off by default) for header
// checking. It is implied by -fmodules.
if (Args.hasFlag(options::OPT_fmodule_maps, options::OPT_fno_module_maps,
false)) {
CmdArgs.push_back("-fmodule-maps");
}
// -fmodules-decluse checks that modules used are declared so (off by
// default).
if (Args.hasFlag(options::OPT_fmodules_decluse,
options::OPT_fno_modules_decluse,
false)) {
CmdArgs.push_back("-fmodules-decluse");
}
// -fmodule-name specifies the module that is currently being built (or
// used for header checking by -fmodule-maps).
if (Arg *A = Args.getLastArg(options::OPT_fmodule_name)) {
A->claim();
A->render(Args, CmdArgs);
}
// -fmodule-map-file can be used to specify a file containing module
// definitions.
if (Arg *A = Args.getLastArg(options::OPT_fmodule_map_file)) {
A->claim();
A->render(Args, CmdArgs);
}
// If a module path was provided, pass it along. Otherwise, use a temporary
// directory.
if (Arg *A = Args.getLastArg(options::OPT_fmodules_cache_path)) {
A->claim();
if (HaveModules) {
A->render(Args, CmdArgs);
}
} else if (HaveModules) {
SmallString<128> DefaultModuleCache;
llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/false,
DefaultModuleCache);
llvm::sys::path::append(DefaultModuleCache, "org.llvm.clang");
llvm::sys::path::append(DefaultModuleCache, "ModuleCache");
const char Arg[] = "-fmodules-cache-path=";
DefaultModuleCache.insert(DefaultModuleCache.begin(),
Arg, Arg + strlen(Arg));
CmdArgs.push_back(Args.MakeArgString(DefaultModuleCache));
}
// Pass through all -fmodules-ignore-macro arguments.
Args.AddAllArgs(CmdArgs, options::OPT_fmodules_ignore_macro);
Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_interval);
Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_after);
// -faccess-control is default.
if (Args.hasFlag(options::OPT_fno_access_control,
options::OPT_faccess_control,
false))
CmdArgs.push_back("-fno-access-control");
// -felide-constructors is the default.
if (Args.hasFlag(options::OPT_fno_elide_constructors,
options::OPT_felide_constructors,
false))
CmdArgs.push_back("-fno-elide-constructors");
// -frtti is default.
if (!Args.hasFlag(options::OPT_frtti, options::OPT_fno_rtti) ||
KernelOrKext) {
CmdArgs.push_back("-fno-rtti");
// -fno-rtti cannot usefully be combined with -fsanitize=vptr.
if (Sanitize.sanitizesVptr()) {
std::string NoRttiArg =
Args.getLastArg(options::OPT_mkernel,
options::OPT_fapple_kext,
options::OPT_fno_rtti)->getAsString(Args);
D.Diag(diag::err_drv_argument_not_allowed_with)
<< "-fsanitize=vptr" << NoRttiArg;
}
}
// -fshort-enums=0 is default for all architectures except Hexagon.
if (Args.hasFlag(options::OPT_fshort_enums,
options::OPT_fno_short_enums,
getToolChain().getArch() ==
llvm::Triple::hexagon))
CmdArgs.push_back("-fshort-enums");
// -fsigned-char is default.
if (!Args.hasFlag(options::OPT_fsigned_char, options::OPT_funsigned_char,
isSignedCharDefault(getToolChain().getTriple())))
CmdArgs.push_back("-fno-signed-char");
// -fthreadsafe-static is default.
if (!Args.hasFlag(options::OPT_fthreadsafe_statics,
options::OPT_fno_threadsafe_statics))
CmdArgs.push_back("-fno-threadsafe-statics");
// -fuse-cxa-atexit is default.
if (!Args.hasFlag(
options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit,
getToolChain().getTriple().getOS() != llvm::Triple::Cygwin &&
getToolChain().getTriple().getOS() != llvm::Triple::MinGW32 &&
getToolChain().getArch() != llvm::Triple::hexagon &&
getToolChain().getArch() != llvm::Triple::xcore) ||
KernelOrKext)
CmdArgs.push_back("-fno-use-cxa-atexit");
// -fms-extensions=0 is default.
if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
getToolChain().getTriple().getOS() == llvm::Triple::Win32))
CmdArgs.push_back("-fms-extensions");
// -fms-compatibility=0 is default.
if (Args.hasFlag(options::OPT_fms_compatibility,
options::OPT_fno_ms_compatibility,
(getToolChain().getTriple().getOS() == llvm::Triple::Win32 &&
Args.hasFlag(options::OPT_fms_extensions,
options::OPT_fno_ms_extensions,
true))))
CmdArgs.push_back("-fms-compatibility");
// -fmsc-version=1700 is default.
if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
getToolChain().getTriple().getOS() == llvm::Triple::Win32) ||
Args.hasArg(options::OPT_fmsc_version)) {
StringRef msc_ver = Args.getLastArgValue(options::OPT_fmsc_version);
if (msc_ver.empty())
CmdArgs.push_back("-fmsc-version=1700");
else
CmdArgs.push_back(Args.MakeArgString("-fmsc-version=" + msc_ver));
}
// -fno-borland-extensions is default.
if (Args.hasFlag(options::OPT_fborland_extensions,
options::OPT_fno_borland_extensions, false))
CmdArgs.push_back("-fborland-extensions");
// -fno-delayed-template-parsing is default, except for Windows where MSVC STL
// needs it.
if (Args.hasFlag(options::OPT_fdelayed_template_parsing,
options::OPT_fno_delayed_template_parsing,
getToolChain().getTriple().getOS() == llvm::Triple::Win32))
CmdArgs.push_back("-fdelayed-template-parsing");
// -fgnu-keywords default varies depending on language; only pass if
// specified.
if (Arg *A = Args.getLastArg(options::OPT_fgnu_keywords,
options::OPT_fno_gnu_keywords))
A->render(Args, CmdArgs);
if (Args.hasFlag(options::OPT_fgnu89_inline,
options::OPT_fno_gnu89_inline,
false))
CmdArgs.push_back("-fgnu89-inline");
if (Args.hasArg(options::OPT_fno_inline))
CmdArgs.push_back("-fno-inline");
if (Args.hasArg(options::OPT_fno_inline_functions))
CmdArgs.push_back("-fno-inline-functions");
ObjCRuntime objcRuntime = AddObjCRuntimeArgs(Args, CmdArgs, rewriteKind);
// -fobjc-dispatch-method is only relevant with the nonfragile-abi, and
// legacy is the default. Next runtime is always legacy dispatch and
// -fno-objc-legacy-dispatch gets ignored silently.
if (objcRuntime.isNonFragile() && !objcRuntime.isNeXTFamily()) {
if (!Args.hasFlag(options::OPT_fobjc_legacy_dispatch,
options::OPT_fno_objc_legacy_dispatch,
objcRuntime.isLegacyDispatchDefaultForArch(
getToolChain().getArch()))) {
if (getToolChain().UseObjCMixedDispatch())
CmdArgs.push_back("-fobjc-dispatch-method=mixed");
else
CmdArgs.push_back("-fobjc-dispatch-method=non-legacy");
}
}
// When ObjectiveC legacy runtime is in effect on MacOSX,
// turn on the option to do Array/Dictionary subscripting
// by default.
if (getToolChain().getTriple().getArch() == llvm::Triple::x86 &&
getToolChain().getTriple().isMacOSX() &&
!getToolChain().getTriple().isMacOSXVersionLT(10, 7) &&
objcRuntime.getKind() == ObjCRuntime::FragileMacOSX &&
objcRuntime.isNeXTFamily())
CmdArgs.push_back("-fobjc-subscripting-legacy-runtime");
// -fencode-extended-block-signature=1 is default.
if (getToolChain().IsEncodeExtendedBlockSignatureDefault()) {
CmdArgs.push_back("-fencode-extended-block-signature");
}
// Allow -fno-objc-arr to trump -fobjc-arr/-fobjc-arc.
// NOTE: This logic is duplicated in ToolChains.cpp.
bool ARC = isObjCAutoRefCount(Args);
if (ARC) {
getToolChain().CheckObjCARC();
CmdArgs.push_back("-fobjc-arc");
// FIXME: It seems like this entire block, and several around it should be
// wrapped in isObjC, but for now we just use it here as this is where it
// was being used previously.
if (types::isCXX(InputType) && types::isObjC(InputType)) {
if (getToolChain().GetCXXStdlibType(Args) == ToolChain::CST_Libcxx)
CmdArgs.push_back("-fobjc-arc-cxxlib=libc++");
else
CmdArgs.push_back("-fobjc-arc-cxxlib=libstdc++");
}
// Allow the user to enable full exceptions code emission.
// We define off for Objective-CC, on for Objective-C++.
if (Args.hasFlag(options::OPT_fobjc_arc_exceptions,
options::OPT_fno_objc_arc_exceptions,
/*default*/ types::isCXX(InputType)))
CmdArgs.push_back("-fobjc-arc-exceptions");
}
// -fobjc-infer-related-result-type is the default, except in the Objective-C
// rewriter.
if (rewriteKind != RK_None)
CmdArgs.push_back("-fno-objc-infer-related-result-type");
// Handle -fobjc-gc and -fobjc-gc-only. They are exclusive, and -fobjc-gc-only
// takes precedence.
const Arg *GCArg = Args.getLastArg(options::OPT_fobjc_gc_only);
if (!GCArg)
GCArg = Args.getLastArg(options::OPT_fobjc_gc);
if (GCArg) {
if (ARC) {
D.Diag(diag::err_drv_objc_gc_arr)
<< GCArg->getAsString(Args);
} else if (getToolChain().SupportsObjCGC()) {
GCArg->render(Args, CmdArgs);
} else {
// FIXME: We should move this to a hard error.
D.Diag(diag::warn_drv_objc_gc_unsupported)
<< GCArg->getAsString(Args);
}
}
// Add exception args.
addExceptionArgs(Args, InputType, getToolChain().getTriple(),
KernelOrKext, objcRuntime, CmdArgs);
if (getToolChain().UseSjLjExceptions())
CmdArgs.push_back("-fsjlj-exceptions");
// C++ "sane" operator new.
if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,
options::OPT_fno_assume_sane_operator_new))
CmdArgs.push_back("-fno-assume-sane-operator-new");
// -fconstant-cfstrings is default, and may be subject to argument translation
// on Darwin.
if (!Args.hasFlag(options::OPT_fconstant_cfstrings,
options::OPT_fno_constant_cfstrings) ||
!Args.hasFlag(options::OPT_mconstant_cfstrings,
options::OPT_mno_constant_cfstrings))
CmdArgs.push_back("-fno-constant-cfstrings");
// -fshort-wchar default varies depending on platform; only
// pass if specified.
if (Arg *A = Args.getLastArg(options::OPT_fshort_wchar))
A->render(Args, CmdArgs);
// -fno-pascal-strings is default, only pass non-default.
if (Args.hasFlag(options::OPT_fpascal_strings,
options::OPT_fno_pascal_strings,
false))
CmdArgs.push_back("-fpascal-strings");
// Honor -fpack-struct= and -fpack-struct, if given. Note that
// -fno-pack-struct doesn't apply to -fpack-struct=.
if (Arg *A = Args.getLastArg(options::OPT_fpack_struct_EQ)) {
std::string PackStructStr = "-fpack-struct=";
PackStructStr += A->getValue();
CmdArgs.push_back(Args.MakeArgString(PackStructStr));
} else if (Args.hasFlag(options::OPT_fpack_struct,
options::OPT_fno_pack_struct, false)) {
CmdArgs.push_back("-fpack-struct=1");
}
if (KernelOrKext || isNoCommonDefault(getToolChain().getTriple())) {
if (!Args.hasArg(options::OPT_fcommon))
CmdArgs.push_back("-fno-common");
Args.ClaimAllArgs(options::OPT_fno_common);
}
// -fcommon is default, only pass non-default.
else if (!Args.hasFlag(options::OPT_fcommon, options::OPT_fno_common))
CmdArgs.push_back("-fno-common");
// -fsigned-bitfields is default, and clang doesn't yet support
// -funsigned-bitfields.
if (!Args.hasFlag(options::OPT_fsigned_bitfields,
options::OPT_funsigned_bitfields))
D.Diag(diag::warn_drv_clang_unsupported)
<< Args.getLastArg(options::OPT_funsigned_bitfields)->getAsString(Args);
// -fsigned-bitfields is default, and clang doesn't support -fno-for-scope.
if (!Args.hasFlag(options::OPT_ffor_scope,
options::OPT_fno_for_scope))
D.Diag(diag::err_drv_clang_unsupported)
<< Args.getLastArg(options::OPT_fno_for_scope)->getAsString(Args);
// -fcaret-diagnostics is default.
if (!Args.hasFlag(options::OPT_fcaret_diagnostics,
options::OPT_fno_caret_diagnostics, true))
CmdArgs.push_back("-fno-caret-diagnostics");
// -fdiagnostics-fixit-info is default, only pass non-default.
if (!Args.hasFlag(options::OPT_fdiagnostics_fixit_info,
options::OPT_fno_diagnostics_fixit_info))
CmdArgs.push_back("-fno-diagnostics-fixit-info");
// Enable -fdiagnostics-show-option by default.
if (Args.hasFlag(options::OPT_fdiagnostics_show_option,
options::OPT_fno_diagnostics_show_option))
CmdArgs.push_back("-fdiagnostics-show-option");
if (const Arg *A =
Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) {
CmdArgs.push_back("-fdiagnostics-show-category");
CmdArgs.push_back(A->getValue());
}
if (const Arg *A =
Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) {
CmdArgs.push_back("-fdiagnostics-format");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(
options::OPT_fdiagnostics_show_note_include_stack,
options::OPT_fno_diagnostics_show_note_include_stack)) {
if (A->getOption().matches(
options::OPT_fdiagnostics_show_note_include_stack))
CmdArgs.push_back("-fdiagnostics-show-note-include-stack");
else
CmdArgs.push_back("-fno-diagnostics-show-note-include-stack");
}
// Color diagnostics are the default, unless the terminal doesn't support
// them.
// Support both clang's -f[no-]color-diagnostics and gcc's
// -f[no-]diagnostics-colors[=never|always|auto].
enum { Colors_On, Colors_Off, Colors_Auto } ShowColors = Colors_Auto;
for (ArgList::const_iterator it = Args.begin(), ie = Args.end();
it != ie; ++it) {
const Option &O = (*it)->getOption();
if (!O.matches(options::OPT_fcolor_diagnostics) &&
!O.matches(options::OPT_fdiagnostics_color) &&
!O.matches(options::OPT_fno_color_diagnostics) &&
!O.matches(options::OPT_fno_diagnostics_color) &&
!O.matches(options::OPT_fdiagnostics_color_EQ))
continue;
(*it)->claim();
if (O.matches(options::OPT_fcolor_diagnostics) ||
O.matches(options::OPT_fdiagnostics_color)) {
ShowColors = Colors_On;
} else if (O.matches(options::OPT_fno_color_diagnostics) ||
O.matches(options::OPT_fno_diagnostics_color)) {
ShowColors = Colors_Off;
} else {
assert(O.matches(options::OPT_fdiagnostics_color_EQ));
StringRef value((*it)->getValue());
if (value == "always")
ShowColors = Colors_On;
else if (value == "never")
ShowColors = Colors_Off;
else if (value == "auto")
ShowColors = Colors_Auto;
else
getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
<< ("-fdiagnostics-color=" + value).str();
}
}
if (ShowColors == Colors_On ||
(ShowColors == Colors_Auto && llvm::sys::Process::StandardErrHasColors()))
CmdArgs.push_back("-fcolor-diagnostics");
if (Args.hasArg(options::OPT_fansi_escape_codes))
CmdArgs.push_back("-fansi-escape-codes");
if (!Args.hasFlag(options::OPT_fshow_source_location,
options::OPT_fno_show_source_location))
CmdArgs.push_back("-fno-show-source-location");
if (!Args.hasFlag(options::OPT_fshow_column,
options::OPT_fno_show_column,
true))
CmdArgs.push_back("-fno-show-column");
if (!Args.hasFlag(options::OPT_fspell_checking,
options::OPT_fno_spell_checking))
CmdArgs.push_back("-fno-spell-checking");
// -fno-asm-blocks is default.
if (Args.hasFlag(options::OPT_fasm_blocks, options::OPT_fno_asm_blocks,
false))
CmdArgs.push_back("-fasm-blocks");
// Enable vectorization per default according to the optimization level
// selected. For optimization levels that want vectorization we use the alias
// option to simplify the hasFlag logic.
bool EnableVec = shouldEnableVectorizerAtOLevel(Args);
OptSpecifier VectorizeAliasOption = EnableVec ? options::OPT_O_Group :
options::OPT_fvectorize;
if (Args.hasFlag(options::OPT_fvectorize, VectorizeAliasOption,
options::OPT_fno_vectorize, EnableVec))
CmdArgs.push_back("-vectorize-loops");
// -fslp-vectorize is default.
if (Args.hasFlag(options::OPT_fslp_vectorize,
options::OPT_fno_slp_vectorize, true))
CmdArgs.push_back("-vectorize-slp");
// -fno-slp-vectorize-aggressive is default.
if (Args.hasFlag(options::OPT_fslp_vectorize_aggressive,
options::OPT_fno_slp_vectorize_aggressive, false))
CmdArgs.push_back("-vectorize-slp-aggressive");
if (Arg *A = Args.getLastArg(options::OPT_fshow_overloads_EQ))
A->render(Args, CmdArgs);
// -fdollars-in-identifiers default varies depending on platform and
// language; only pass if specified.
if (Arg *A = Args.getLastArg(options::OPT_fdollars_in_identifiers,
options::OPT_fno_dollars_in_identifiers)) {
if (A->getOption().matches(options::OPT_fdollars_in_identifiers))
CmdArgs.push_back("-fdollars-in-identifiers");
else
CmdArgs.push_back("-fno-dollars-in-identifiers");
}
// -funit-at-a-time is default, and we don't support -fno-unit-at-a-time for
// practical purposes.
if (Arg *A = Args.getLastArg(options::OPT_funit_at_a_time,
options::OPT_fno_unit_at_a_time)) {
if (A->getOption().matches(options::OPT_fno_unit_at_a_time))
D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args);
}
if (Args.hasFlag(options::OPT_fapple_pragma_pack,
options::OPT_fno_apple_pragma_pack, false))
CmdArgs.push_back("-fapple-pragma-pack");
// le32-specific flags:
// -fno-math-builtin: clang should not convert math builtins to intrinsics
// by default.
if (getToolChain().getArch() == llvm::Triple::le32) {
CmdArgs.push_back("-fno-math-builtin");
}
// Default to -fno-builtin-str{cat,cpy} on Darwin for ARM.
//
// FIXME: This is disabled until clang -cc1 supports -fno-builtin-foo. PR4941.
#if 0
if (getToolChain().getTriple().isOSDarwin() &&
(getToolChain().getArch() == llvm::Triple::arm ||
getToolChain().getArch() == llvm::Triple::thumb)) {
if (!Args.hasArg(options::OPT_fbuiltin_strcat))
CmdArgs.push_back("-fno-builtin-strcat");
if (!Args.hasArg(options::OPT_fbuiltin_strcpy))
CmdArgs.push_back("-fno-builtin-strcpy");
}
#endif
// Only allow -traditional or -traditional-cpp outside in preprocessing modes.
if (Arg *A = Args.getLastArg(options::OPT_traditional,
options::OPT_traditional_cpp)) {
if (isa<PreprocessJobAction>(JA))
CmdArgs.push_back("-traditional-cpp");
else
D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
}
Args.AddLastArg(CmdArgs, options::OPT_dM);
Args.AddLastArg(CmdArgs, options::OPT_dD);
// Handle serialized diagnostics.
if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) {
CmdArgs.push_back("-serialize-diagnostic-file");
CmdArgs.push_back(Args.MakeArgString(A->getValue()));
}
if (Args.hasArg(options::OPT_fretain_comments_from_system_headers))
CmdArgs.push_back("-fretain-comments-from-system-headers");
// Forward -fcomment-block-commands to -cc1.
Args.AddAllArgs(CmdArgs, options::OPT_fcomment_block_commands);
// Forward -fparse-all-comments to -cc1.
Args.AddAllArgs(CmdArgs, options::OPT_fparse_all_comments);
// Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option
// parser.
Args.AddAllArgValues(CmdArgs, options::OPT_Xclang);
for (arg_iterator it = Args.filtered_begin(options::OPT_mllvm),
ie = Args.filtered_end(); it != ie; ++it) {
(*it)->claim();
// We translate this by hand to the -cc1 argument, since nightly test uses
// it and developers have been trained to spell it with -mllvm.
if (StringRef((*it)->getValue(0)) == "-disable-llvm-optzns")
CmdArgs.push_back("-disable-llvm-optzns");
else
(*it)->render(Args, CmdArgs);
}
if (Output.getType() == types::TY_Dependencies) {
// Handled with other dependency code.
} else if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back("-x");
if (Args.hasArg(options::OPT_rewrite_objc))
CmdArgs.push_back(types::getTypeName(types::TY_PP_ObjCXX));
else
CmdArgs.push_back(types::getTypeName(II.getType()));
if (II.isFilename())
CmdArgs.push_back(II.getFilename());
else
II.getInputArg().renderAsInput(Args, CmdArgs);
}
Args.AddAllArgs(CmdArgs, options::OPT_undef);
const char *Exec = getToolChain().getDriver().getClangProgramPath();
// Optionally embed the -cc1 level arguments into the debug info, for build
// analysis.
if (getToolChain().UseDwarfDebugFlags()) {
ArgStringList OriginalArgs;
for (ArgList::const_iterator it = Args.begin(),
ie = Args.end(); it != ie; ++it)
(*it)->render(Args, OriginalArgs);
SmallString<256> Flags;
Flags += Exec;
for (unsigned i = 0, e = OriginalArgs.size(); i != e; ++i) {
Flags += " ";
Flags += OriginalArgs[i];
}
CmdArgs.push_back("-dwarf-debug-flags");
CmdArgs.push_back(Args.MakeArgString(Flags.str()));
}
// Add the split debug info name to the command lines here so we
// can propagate it to the backend.
bool SplitDwarf = Args.hasArg(options::OPT_gsplit_dwarf) &&
getToolChain().getTriple().isOSLinux() &&
(isa<AssembleJobAction>(JA) || isa<CompileJobAction>(JA));
const char *SplitDwarfOut;
if (SplitDwarf) {
CmdArgs.push_back("-split-dwarf-file");
SplitDwarfOut = SplitDebugName(Args, Inputs);
CmdArgs.push_back(SplitDwarfOut);
}
// Finally add the compile command to the compilation.
if (Args.hasArg(options::OPT__SLASH_fallback)) {
tools::visualstudio::Compile CL(getToolChain());
Command *CLCommand = CL.GetCommand(C, JA, Output, Inputs, Args,
LinkingOutput);
C.addCommand(new FallbackCommand(JA, *this, Exec, CmdArgs, CLCommand));
} else {
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
// Handle the debug info splitting at object creation time if we're
// creating an object.
// TODO: Currently only works on linux with newer objcopy.
if (SplitDwarf && !isa<CompileJobAction>(JA))
SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDwarfOut);
if (Arg *A = Args.getLastArg(options::OPT_pg))
if (Args.hasArg(options::OPT_fomit_frame_pointer))
D.Diag(diag::err_drv_argument_not_allowed_with)
<< "-fomit-frame-pointer" << A->getAsString(Args);
// Claim some arguments which clang supports automatically.
// -fpch-preprocess is used with gcc to add a special marker in the output to
// include the PCH file. Clang's PTH solution is completely transparent, so we
// do not need to deal with it at all.
Args.ClaimAllArgs(options::OPT_fpch_preprocess);
// Claim some arguments which clang doesn't support, but we don't
// care to warn the user about.
Args.ClaimAllArgs(options::OPT_clang_ignored_f_Group);
Args.ClaimAllArgs(options::OPT_clang_ignored_m_Group);
// Disable warnings for clang -E -emit-llvm foo.c
Args.ClaimAllArgs(options::OPT_emit_llvm);
}
/// Add options related to the Objective-C runtime/ABI.
///
/// Returns true if the runtime is non-fragile.
ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
ArgStringList &cmdArgs,
RewriteKind rewriteKind) const {
// Look for the controlling runtime option.
Arg *runtimeArg = args.getLastArg(options::OPT_fnext_runtime,
options::OPT_fgnu_runtime,
options::OPT_fobjc_runtime_EQ);
// Just forward -fobjc-runtime= to the frontend. This supercedes
// options about fragility.
if (runtimeArg &&
runtimeArg->getOption().matches(options::OPT_fobjc_runtime_EQ)) {
ObjCRuntime runtime;
StringRef value = runtimeArg->getValue();
if (runtime.tryParse(value)) {
getToolChain().getDriver().Diag(diag::err_drv_unknown_objc_runtime)
<< value;
}
runtimeArg->render(args, cmdArgs);
return runtime;
}
// Otherwise, we'll need the ABI "version". Version numbers are
// slightly confusing for historical reasons:
// 1 - Traditional "fragile" ABI
// 2 - Non-fragile ABI, version 1
// 3 - Non-fragile ABI, version 2
unsigned objcABIVersion = 1;
// If -fobjc-abi-version= is present, use that to set the version.
if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_abi_version_EQ)) {
StringRef value = abiArg->getValue();
if (value == "1")
objcABIVersion = 1;
else if (value == "2")
objcABIVersion = 2;
else if (value == "3")
objcABIVersion = 3;
else
getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
<< value;
} else {
// Otherwise, determine if we are using the non-fragile ABI.
bool nonFragileABIIsDefault =
(rewriteKind == RK_NonFragile ||
(rewriteKind == RK_None &&
getToolChain().IsObjCNonFragileABIDefault()));
if (args.hasFlag(options::OPT_fobjc_nonfragile_abi,
options::OPT_fno_objc_nonfragile_abi,
nonFragileABIIsDefault)) {
// Determine the non-fragile ABI version to use.
#ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO
unsigned nonFragileABIVersion = 1;
#else
unsigned nonFragileABIVersion = 2;
#endif
if (Arg *abiArg = args.getLastArg(
options::OPT_fobjc_nonfragile_abi_version_EQ)) {
StringRef value = abiArg->getValue();
if (value == "1")
nonFragileABIVersion = 1;
else if (value == "2")
nonFragileABIVersion = 2;
else
getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
<< value;
}
objcABIVersion = 1 + nonFragileABIVersion;
} else {
objcABIVersion = 1;
}
}
// We don't actually care about the ABI version other than whether
// it's non-fragile.
bool isNonFragile = objcABIVersion != 1;
// If we have no runtime argument, ask the toolchain for its default runtime.
// However, the rewriter only really supports the Mac runtime, so assume that.
ObjCRuntime runtime;
if (!runtimeArg) {
switch (rewriteKind) {
case RK_None:
runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);
break;
case RK_Fragile:
runtime = ObjCRuntime(ObjCRuntime::FragileMacOSX, VersionTuple());
break;
case RK_NonFragile:
runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
break;
}
// -fnext-runtime
} else if (runtimeArg->getOption().matches(options::OPT_fnext_runtime)) {
// On Darwin, make this use the default behavior for the toolchain.
if (getToolChain().getTriple().isOSDarwin()) {
runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);
// Otherwise, build for a generic macosx port.
} else {
runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
}
// -fgnu-runtime
} else {
assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime));
// Legacy behaviour is to target the gnustep runtime if we are i
// non-fragile mode or the GCC runtime in fragile mode.
if (isNonFragile)
runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(1,6));
else
runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple());
}
cmdArgs.push_back(args.MakeArgString(
"-fobjc-runtime=" + runtime.getAsString()));
return runtime;
}
void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs) const {
unsigned RTOptionID = options::OPT__SLASH_MT;
if (Args.hasArg(options::OPT__SLASH_LDd))
// The /LDd option implies /MTd. The dependent lib part can be overridden,
// but defining _DEBUG is sticky.
RTOptionID = options::OPT__SLASH_MTd;
if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group))
RTOptionID = A->getOption().getID();
switch(RTOptionID) {
case options::OPT__SLASH_MD:
if (Args.hasArg(options::OPT__SLASH_LDd))
CmdArgs.push_back("-D_DEBUG");
CmdArgs.push_back("-D_MT");
CmdArgs.push_back("-D_DLL");
CmdArgs.push_back("--dependent-lib=msvcrt");
break;
case options::OPT__SLASH_MDd:
CmdArgs.push_back("-D_DEBUG");
CmdArgs.push_back("-D_MT");
CmdArgs.push_back("-D_DLL");
CmdArgs.push_back("--dependent-lib=msvcrtd");
break;
case options::OPT__SLASH_MT:
if (Args.hasArg(options::OPT__SLASH_LDd))
CmdArgs.push_back("-D_DEBUG");
CmdArgs.push_back("-D_MT");
CmdArgs.push_back("--dependent-lib=libcmt");
break;
case options::OPT__SLASH_MTd:
CmdArgs.push_back("-D_DEBUG");
CmdArgs.push_back("-D_MT");
CmdArgs.push_back("--dependent-lib=libcmtd");
break;
default:
llvm_unreachable("Unexpected option ID.");
}
// This provides POSIX compatibility (maps 'open' to '_open'), which most
// users want. The /Za flag to cl.exe turns this off, but it's not
// implemented in clang.
CmdArgs.push_back("--dependent-lib=oldnames");
// FIXME: Make this default for the win32 triple.
CmdArgs.push_back("-cxx-abi");
CmdArgs.push_back("microsoft");
if (Arg *A = Args.getLastArg(options::OPT_show_includes))
A->render(Args, CmdArgs);
if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) {
CmdArgs.push_back("-fdiagnostics-format");
if (Args.hasArg(options::OPT__SLASH_fallback))
CmdArgs.push_back("msvc-fallback");
else
CmdArgs.push_back("msvc");
}
}
void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
assert(Inputs.size() == 1 && "Unexpected number of inputs.");
const InputInfo &Input = Inputs[0];
// Don't warn about "clang -w -c foo.s"
Args.ClaimAllArgs(options::OPT_w);
// and "clang -emit-llvm -c foo.s"
Args.ClaimAllArgs(options::OPT_emit_llvm);
// Invoke ourselves in -cc1as mode.
//
// FIXME: Implement custom jobs for internal actions.
CmdArgs.push_back("-cc1as");
// Add the "effective" target triple.
CmdArgs.push_back("-triple");
std::string TripleStr =
getToolChain().ComputeEffectiveClangTriple(Args, Input.getType());
CmdArgs.push_back(Args.MakeArgString(TripleStr));
// Set the output mode, we currently only expect to be used as a real
// assembler.
CmdArgs.push_back("-filetype");
CmdArgs.push_back("obj");
// Set the main file name, so that debug info works even with
// -save-temps or preprocessed assembly.
CmdArgs.push_back("-main-file-name");
CmdArgs.push_back(Clang::getBaseInputName(Args, Inputs));
// Add the target cpu
const llvm::Triple &Triple = getToolChain().getTriple();
std::string CPU = getCPUName(Args, Triple);
if (!CPU.empty()) {
CmdArgs.push_back("-target-cpu");
CmdArgs.push_back(Args.MakeArgString(CPU));
}
// Add the target features
const Driver &D = getToolChain().getDriver();
getTargetFeatures(D, Triple, Args, CmdArgs);
// Ignore explicit -force_cpusubtype_ALL option.
(void) Args.hasArg(options::OPT_force__cpusubtype__ALL);
// Determine the original source input.
const Action *SourceAction = &JA;
while (SourceAction->getKind() != Action::InputClass) {
assert(!SourceAction->getInputs().empty() && "unexpected root action!");
SourceAction = SourceAction->getInputs()[0];
}
// Forward -g and handle debug info related flags, assuming we are dealing
// with an actual assembly file.
if (SourceAction->getType() == types::TY_Asm ||
SourceAction->getType() == types::TY_PP_Asm) {
Args.ClaimAllArgs(options::OPT_g_Group);
if (Arg *A = Args.getLastArg(options::OPT_g_Group))
if (!A->getOption().matches(options::OPT_g0))
CmdArgs.push_back("-g");
// Add the -fdebug-compilation-dir flag if needed.
addDebugCompDirArg(Args, CmdArgs);
// Set the AT_producer to the clang version when using the integrated
// assembler on assembly source files.
CmdArgs.push_back("-dwarf-debug-producer");
CmdArgs.push_back(Args.MakeArgString(getClangFullVersion()));
}
// Optionally embed the -cc1as level arguments into the debug info, for build
// analysis.
if (getToolChain().UseDwarfDebugFlags()) {
ArgStringList OriginalArgs;
for (ArgList::const_iterator it = Args.begin(),
ie = Args.end(); it != ie; ++it)
(*it)->render(Args, OriginalArgs);
SmallString<256> Flags;
const char *Exec = getToolChain().getDriver().getClangProgramPath();
Flags += Exec;
for (unsigned i = 0, e = OriginalArgs.size(); i != e; ++i) {
Flags += " ";
Flags += OriginalArgs[i];
}
CmdArgs.push_back("-dwarf-debug-flags");
CmdArgs.push_back(Args.MakeArgString(Flags.str()));
}
// FIXME: Add -static support, once we have it.
CollectArgsForIntegratedAssembler(C, Args, CmdArgs,
getToolChain().getDriver());
Args.AddAllArgs(CmdArgs, options::OPT_mllvm);
assert(Output.isFilename() && "Unexpected lipo output.");
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
assert(Input.isFilename() && "Invalid input.");
CmdArgs.push_back(Input.getFilename());
const char *Exec = getToolChain().getDriver().getClangProgramPath();
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
// Handle the debug info splitting at object creation time if we're
// creating an object.
// TODO: Currently only works on linux with newer objcopy.
if (Args.hasArg(options::OPT_gsplit_dwarf) &&
getToolChain().getTriple().isOSLinux())
SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
SplitDebugName(Args, Inputs));
}
void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
for (ArgList::const_iterator
it = Args.begin(), ie = Args.end(); it != ie; ++it) {
Arg *A = *it;
if (forwardToGCC(A->getOption())) {
// Don't forward any -g arguments to assembly steps.
if (isa<AssembleJobAction>(JA) &&
A->getOption().matches(options::OPT_g_Group))
continue;
// Don't forward any -W arguments to assembly and link steps.
if ((isa<AssembleJobAction>(JA) || isa<LinkJobAction>(JA)) &&
A->getOption().matches(options::OPT_W_Group))
continue;
// It is unfortunate that we have to claim here, as this means
// we will basically never report anything interesting for
// platforms using a generic gcc, even if we are just using gcc
// to get to the assembler.
A->claim();
A->render(Args, CmdArgs);
}
}
RenderExtraToolArgs(JA, CmdArgs);
// If using a driver driver, force the arch.
llvm::Triple::ArchType Arch = getToolChain().getArch();
if (getToolChain().getTriple().isOSDarwin()) {
CmdArgs.push_back("-arch");
// FIXME: Remove these special cases.
if (Arch == llvm::Triple::ppc)
CmdArgs.push_back("ppc");
else if (Arch == llvm::Triple::ppc64)
CmdArgs.push_back("ppc64");
else if (Arch == llvm::Triple::ppc64le)
CmdArgs.push_back("ppc64le");
else
CmdArgs.push_back(Args.MakeArgString(getToolChain().getArchName()));
}
// Try to force gcc to match the tool chain we want, if we recognize
// the arch.
//
// FIXME: The triple class should directly provide the information we want
// here.
if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::ppc)
CmdArgs.push_back("-m32");
else if (Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::ppc64 ||
Arch == llvm::Triple::ppc64le)
CmdArgs.push_back("-m64");
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Unexpected output");
CmdArgs.push_back("-fsyntax-only");
}
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
// Only pass -x if gcc will understand it; otherwise hope gcc
// understands the suffix correctly. The main use case this would go
// wrong in is for linker inputs if they happened to have an odd
// suffix; really the only way to get this to happen is a command
// like '-x foobar a.c' which will treat a.c like a linker input.
//
// FIXME: For the linker case specifically, can we safely convert
// inputs into '-Wl,' options?
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
// Don't try to pass LLVM or AST inputs to a generic gcc.
if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
II.getType() == types::TY_LLVM_BC || II.getType() == types::TY_LTO_BC)
D.Diag(diag::err_drv_no_linker_llvm_support)
<< getToolChain().getTripleString();
else if (II.getType() == types::TY_AST)
D.Diag(diag::err_drv_no_ast_support)
<< getToolChain().getTripleString();
else if (II.getType() == types::TY_ModuleFile)
D.Diag(diag::err_drv_no_module_support)
<< getToolChain().getTripleString();
if (types::canTypeBeUserSpecified(II.getType())) {
CmdArgs.push_back("-x");
CmdArgs.push_back(types::getTypeName(II.getType()));
}
if (II.isFilename())
CmdArgs.push_back(II.getFilename());
else {
const Arg &A = II.getInputArg();
// Reverse translate some rewritten options.
if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) {
CmdArgs.push_back("-lstdc++");
continue;
}
// Don't render as input, we need gcc to do the translations.
A.render(Args, CmdArgs);
}
}
const std::string customGCCName = D.getCCCGenericGCCName();
const char *GCCName;
if (!customGCCName.empty())
GCCName = customGCCName.c_str();
else if (D.CCCIsCXX()) {
GCCName = "g++";
} else
GCCName = "gcc";
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath(GCCName));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void gcc::Preprocess::RenderExtraToolArgs(const JobAction &JA,
ArgStringList &CmdArgs) const {
CmdArgs.push_back("-E");
}
void gcc::Precompile::RenderExtraToolArgs(const JobAction &JA,
ArgStringList &CmdArgs) const {
// The type is good enough.
}
void gcc::Compile::RenderExtraToolArgs(const JobAction &JA,
ArgStringList &CmdArgs) const {
const Driver &D = getToolChain().getDriver();
// If -flto, etc. are present then make sure not to force assembly output.
if (JA.getType() == types::TY_LLVM_IR || JA.getType() == types::TY_LTO_IR ||
JA.getType() == types::TY_LLVM_BC || JA.getType() == types::TY_LTO_BC)
CmdArgs.push_back("-c");
else {
if (JA.getType() != types::TY_PP_Asm)
D.Diag(diag::err_drv_invalid_gcc_output_type)
<< getTypeName(JA.getType());
CmdArgs.push_back("-S");
}
}
void gcc::Assemble::RenderExtraToolArgs(const JobAction &JA,
ArgStringList &CmdArgs) const {
CmdArgs.push_back("-c");
}
void gcc::Link::RenderExtraToolArgs(const JobAction &JA,
ArgStringList &CmdArgs) const {
// The types are (hopefully) good enough.
}
// Hexagon tools start.
void hexagon::Assemble::RenderExtraToolArgs(const JobAction &JA,
ArgStringList &CmdArgs) const {
}
void hexagon::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
std::string MarchString = "-march=";
MarchString += toolchains::Hexagon_TC::GetTargetCPU(Args);
CmdArgs.push_back(Args.MakeArgString(MarchString));
RenderExtraToolArgs(JA, CmdArgs);
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Unexpected output");
CmdArgs.push_back("-fsyntax-only");
}
std::string SmallDataThreshold = GetHexagonSmallDataThresholdValue(Args);
if (!SmallDataThreshold.empty())
CmdArgs.push_back(
Args.MakeArgString(std::string("-G") + SmallDataThreshold));
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
// Only pass -x if gcc will understand it; otherwise hope gcc
// understands the suffix correctly. The main use case this would go
// wrong in is for linker inputs if they happened to have an odd
// suffix; really the only way to get this to happen is a command
// like '-x foobar a.c' which will treat a.c like a linker input.
//
// FIXME: For the linker case specifically, can we safely convert
// inputs into '-Wl,' options?
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
// Don't try to pass LLVM or AST inputs to a generic gcc.
if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
II.getType() == types::TY_LLVM_BC || II.getType() == types::TY_LTO_BC)
D.Diag(clang::diag::err_drv_no_linker_llvm_support)
<< getToolChain().getTripleString();
else if (II.getType() == types::TY_AST)
D.Diag(clang::diag::err_drv_no_ast_support)
<< getToolChain().getTripleString();
else if (II.getType() == types::TY_ModuleFile)
D.Diag(diag::err_drv_no_module_support)
<< getToolChain().getTripleString();
if (II.isFilename())
CmdArgs.push_back(II.getFilename());
else
// Don't render as input, we need gcc to do the translations. FIXME: Pranav: What is this ?
II.getInputArg().render(Args, CmdArgs);
}
const char *GCCName = "hexagon-as";
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath(GCCName));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void hexagon::Link::RenderExtraToolArgs(const JobAction &JA,
ArgStringList &CmdArgs) const {
// The types are (hopefully) good enough.
}
void hexagon::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const toolchains::Hexagon_TC& ToolChain =
static_cast<const toolchains::Hexagon_TC&>(getToolChain());
const Driver &D = ToolChain.getDriver();
ArgStringList CmdArgs;
//----------------------------------------------------------------------------
//
//----------------------------------------------------------------------------
bool hasStaticArg = Args.hasArg(options::OPT_static);
bool buildingLib = Args.hasArg(options::OPT_shared);
bool buildPIE = Args.hasArg(options::OPT_pie);
bool incStdLib = !Args.hasArg(options::OPT_nostdlib);
bool incStartFiles = !Args.hasArg(options::OPT_nostartfiles);
bool incDefLibs = !Args.hasArg(options::OPT_nodefaultlibs);
bool useShared = buildingLib && !hasStaticArg;
//----------------------------------------------------------------------------
// Silence warnings for various options
//----------------------------------------------------------------------------
Args.ClaimAllArgs(options::OPT_g_Group);
Args.ClaimAllArgs(options::OPT_emit_llvm);
Args.ClaimAllArgs(options::OPT_w); // Other warning options are already
// handled somewhere else.
Args.ClaimAllArgs(options::OPT_static_libgcc);
//----------------------------------------------------------------------------
//
//----------------------------------------------------------------------------
for (std::vector<std::string>::const_iterator i = ToolChain.ExtraOpts.begin(),
e = ToolChain.ExtraOpts.end();
i != e; ++i)
CmdArgs.push_back(i->c_str());
std::string MarchString = toolchains::Hexagon_TC::GetTargetCPU(Args);
CmdArgs.push_back(Args.MakeArgString("-m" + MarchString));
if (buildingLib) {
CmdArgs.push_back("-shared");
CmdArgs.push_back("-call_shared"); // should be the default, but doing as
// hexagon-gcc does
}
if (hasStaticArg)
CmdArgs.push_back("-static");
if (buildPIE && !buildingLib)
CmdArgs.push_back("-pie");
std::string SmallDataThreshold = GetHexagonSmallDataThresholdValue(Args);
if (!SmallDataThreshold.empty()) {
CmdArgs.push_back(
Args.MakeArgString(std::string("-G") + SmallDataThreshold));
}
//----------------------------------------------------------------------------
//
//----------------------------------------------------------------------------
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
const std::string MarchSuffix = "/" + MarchString;
const std::string G0Suffix = "/G0";
const std::string MarchG0Suffix = MarchSuffix + G0Suffix;
const std::string RootDir = toolchains::Hexagon_TC::GetGnuDir(D.InstalledDir)
+ "/";
const std::string StartFilesDir = RootDir
+ "hexagon/lib"
+ (buildingLib
? MarchG0Suffix : MarchSuffix);
//----------------------------------------------------------------------------
// moslib
//----------------------------------------------------------------------------
std::vector<std::string> oslibs;
bool hasStandalone= false;
for (arg_iterator it = Args.filtered_begin(options::OPT_moslib_EQ),
ie = Args.filtered_end(); it != ie; ++it) {
(*it)->claim();
oslibs.push_back((*it)->getValue());
hasStandalone = hasStandalone || (oslibs.back() == "standalone");
}
if (oslibs.empty()) {
oslibs.push_back("standalone");
hasStandalone = true;
}
//----------------------------------------------------------------------------
// Start Files
//----------------------------------------------------------------------------
if (incStdLib && incStartFiles) {
if (!buildingLib) {
if (hasStandalone) {
CmdArgs.push_back(
Args.MakeArgString(StartFilesDir + "/crt0_standalone.o"));
}
CmdArgs.push_back(Args.MakeArgString(StartFilesDir + "/crt0.o"));
}
std::string initObj = useShared ? "/initS.o" : "/init.o";
CmdArgs.push_back(Args.MakeArgString(StartFilesDir + initObj));
}
//----------------------------------------------------------------------------
// Library Search Paths
//----------------------------------------------------------------------------
const ToolChain::path_list &LibPaths = ToolChain.getFilePaths();
for (ToolChain::path_list::const_iterator
i = LibPaths.begin(),
e = LibPaths.end();
i != e;
++i)
CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + *i));
//----------------------------------------------------------------------------
//
//----------------------------------------------------------------------------
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_e);
Args.AddAllArgs(CmdArgs, options::OPT_s);
Args.AddAllArgs(CmdArgs, options::OPT_t);
Args.AddAllArgs(CmdArgs, options::OPT_u_Group);
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
//----------------------------------------------------------------------------
// Libraries
//----------------------------------------------------------------------------
if (incStdLib && incDefLibs) {
if (D.CCCIsCXX()) {
ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
CmdArgs.push_back("-lm");
}
CmdArgs.push_back("--start-group");
if (!buildingLib) {
for(std::vector<std::string>::iterator i = oslibs.begin(),
e = oslibs.end(); i != e; ++i)
CmdArgs.push_back(Args.MakeArgString("-l" + *i));
CmdArgs.push_back("-lc");
}
CmdArgs.push_back("-lgcc");
CmdArgs.push_back("--end-group");
}
//----------------------------------------------------------------------------
// End files
//----------------------------------------------------------------------------
if (incStdLib && incStartFiles) {
std::string finiObj = useShared ? "/finiS.o" : "/fini.o";
CmdArgs.push_back(Args.MakeArgString(StartFilesDir + finiObj));
}
std::string Linker = ToolChain.GetProgramPath("hexagon-ld");
C.addCommand(new Command(JA, *this, Args.MakeArgString(Linker), CmdArgs));
}
// Hexagon tools end.
llvm::Triple::ArchType darwin::getArchTypeForDarwinArchName(StringRef Str) {
// See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
// archs which Darwin doesn't use.
// The matching this routine does is fairly pointless, since it is neither the
// complete architecture list, nor a reasonable subset. The problem is that
// historically the driver driver accepts this and also ties its -march=
// handling to the architecture name, so we need to be careful before removing
// support for it.
// This code must be kept in sync with Clang's Darwin specific argument
// translation.
return llvm::StringSwitch<llvm::Triple::ArchType>(Str)
.Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", llvm::Triple::ppc)
.Cases("ppc750", "ppc7400", "ppc7450", "ppc970", llvm::Triple::ppc)
.Case("ppc64", llvm::Triple::ppc64)
.Cases("i386", "i486", "i486SX", "i586", "i686", llvm::Triple::x86)
.Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4",
llvm::Triple::x86)
.Cases("x86_64", "x86_64h", llvm::Triple::x86_64)
// This is derived from the driver driver.
.Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm)
.Cases("armv7", "armv7em", "armv7f", "armv7k", "armv7m", llvm::Triple::arm)
.Cases("armv7s", "xscale", llvm::Triple::arm)
.Case("r600", llvm::Triple::r600)
.Case("nvptx", llvm::Triple::nvptx)
.Case("nvptx64", llvm::Triple::nvptx64)
.Case("amdil", llvm::Triple::amdil)
.Case("spir", llvm::Triple::spir)
.Default(llvm::Triple::UnknownArch);
}
const char *Clang::getBaseInputName(const ArgList &Args,
const InputInfoList &Inputs) {
return Args.MakeArgString(
llvm::sys::path::filename(Inputs[0].getBaseInput()));
}
const char *Clang::getBaseInputStem(const ArgList &Args,
const InputInfoList &Inputs) {
const char *Str = getBaseInputName(Args, Inputs);
if (const char *End = strrchr(Str, '.'))
return Args.MakeArgString(std::string(Str, End));
return Str;
}
const char *Clang::getDependencyFileName(const ArgList &Args,
const InputInfoList &Inputs) {
// FIXME: Think about this more.
std::string Res;
if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) {
std::string Str(OutputOpt->getValue());
Res = Str.substr(0, Str.rfind('.'));
} else {
Res = getBaseInputStem(Args, Inputs);
}
return Args.MakeArgString(Res + ".d");
}
void darwin::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
assert(Inputs.size() == 1 && "Unexpected number of inputs.");
const InputInfo &Input = Inputs[0];
// Determine the original source input.
const Action *SourceAction = &JA;
while (SourceAction->getKind() != Action::InputClass) {
assert(!SourceAction->getInputs().empty() && "unexpected root action!");
SourceAction = SourceAction->getInputs()[0];
}
// If -no_integrated_as is used add -Q to the darwin assember driver to make
// sure it runs its system assembler not clang's integrated assembler.
- if (Args.hasArg(options::OPT_no_integrated_as))
- CmdArgs.push_back("-Q");
+ // Applicable to darwin11+ and Xcode 4+. darwin<10 lacked integrated-as.
+ // FIXME: at run-time detect assembler capabilities or rely on version
+ // information forwarded by -target-assembler-version (future)
+ if (Args.hasArg(options::OPT_no_integrated_as)) {
+ const llvm::Triple& t(getToolChain().getTriple());
+ if (!(t.isMacOSX() && t.isMacOSXVersionLT(10, 7)))
+ CmdArgs.push_back("-Q");
+ }
// Forward -g, assuming we are dealing with an actual assembly file.
if (SourceAction->getType() == types::TY_Asm ||
SourceAction->getType() == types::TY_PP_Asm) {
if (Args.hasArg(options::OPT_gstabs))
CmdArgs.push_back("--gstabs");
else if (Args.hasArg(options::OPT_g_Group))
CmdArgs.push_back("-g");
}
// Derived from asm spec.
AddDarwinArch(Args, CmdArgs);
// Use -force_cpusubtype_ALL on x86 by default.
if (getToolChain().getArch() == llvm::Triple::x86 ||
getToolChain().getArch() == llvm::Triple::x86_64 ||
Args.hasArg(options::OPT_force__cpusubtype__ALL))
CmdArgs.push_back("-force_cpusubtype_ALL");
if (getToolChain().getArch() != llvm::Triple::x86_64 &&
(((Args.hasArg(options::OPT_mkernel) ||
Args.hasArg(options::OPT_fapple_kext)) &&
(!getDarwinToolChain().isTargetIPhoneOS() ||
getDarwinToolChain().isIPhoneOSVersionLT(6, 0))) ||
Args.hasArg(options::OPT_static)))
CmdArgs.push_back("-static");
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
assert(Output.isFilename() && "Unexpected lipo output.");
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
assert(Input.isFilename() && "Invalid input.");
CmdArgs.push_back(Input.getFilename());
// asm_final spec is empty.
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void darwin::DarwinTool::anchor() {}
void darwin::DarwinTool::AddDarwinArch(const ArgList &Args,
ArgStringList &CmdArgs) const {
StringRef ArchName = getDarwinToolChain().getDarwinArchName(Args);
// Derived from darwin_arch spec.
CmdArgs.push_back("-arch");
CmdArgs.push_back(Args.MakeArgString(ArchName));
// FIXME: Is this needed anymore?
if (ArchName == "arm")
CmdArgs.push_back("-force_cpusubtype_ALL");
}
bool darwin::Link::NeedsTempPath(const InputInfoList &Inputs) const {
// We only need to generate a temp path for LTO if we aren't compiling object
// files. When compiling source files, we run 'dsymutil' after linking. We
// don't run 'dsymutil' when compiling object files.
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it)
if (it->getType() != types::TY_Object)
return true;
return false;
}
void darwin::Link::AddLinkArgs(Compilation &C,
const ArgList &Args,
ArgStringList &CmdArgs,
const InputInfoList &Inputs) const {
const Driver &D = getToolChain().getDriver();
const toolchains::Darwin &DarwinTC = getDarwinToolChain();
unsigned Version[3] = { 0, 0, 0 };
if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
bool HadExtra;
if (!Driver::GetReleaseVersion(A->getValue(), Version[0],
Version[1], Version[2], HadExtra) ||
HadExtra)
D.Diag(diag::err_drv_invalid_version_number)
<< A->getAsString(Args);
}
// Newer linkers support -demangle, pass it if supported and not disabled by
// the user.
if (Version[0] >= 100 && !Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) {
// Don't pass -demangle to ld_classic.
//
// FIXME: This is a temporary workaround, ld should be handling this.
bool UsesLdClassic = (getToolChain().getArch() == llvm::Triple::x86 &&
Args.hasArg(options::OPT_static));
if (getToolChain().getArch() == llvm::Triple::x86) {
for (arg_iterator it = Args.filtered_begin(options::OPT_Xlinker,
options::OPT_Wl_COMMA),
ie = Args.filtered_end(); it != ie; ++it) {
const Arg *A = *it;
for (unsigned i = 0, e = A->getNumValues(); i != e; ++i)
if (StringRef(A->getValue(i)) == "-kext")
UsesLdClassic = true;
}
}
if (!UsesLdClassic)
CmdArgs.push_back("-demangle");
}
if (Args.hasArg(options::OPT_rdynamic) && Version[0] >= 137)
CmdArgs.push_back("-export_dynamic");
// If we are using LTO, then automatically create a temporary file path for
// the linker to use, so that it's lifetime will extend past a possible
// dsymutil step.
if (Version[0] >= 116 && D.IsUsingLTO(Args) && NeedsTempPath(Inputs)) {
const char *TmpPath = C.getArgs().MakeArgString(
D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object)));
C.addTempFile(TmpPath);
CmdArgs.push_back("-object_path_lto");
CmdArgs.push_back(TmpPath);
}
// Derived from the "link" spec.
Args.AddAllArgs(CmdArgs, options::OPT_static);
if (!Args.hasArg(options::OPT_static))
CmdArgs.push_back("-dynamic");
if (Args.hasArg(options::OPT_fgnu_runtime)) {
// FIXME: gcc replaces -lobjc in forward args with -lobjc-gnu
// here. How do we wish to handle such things?
}
if (!Args.hasArg(options::OPT_dynamiclib)) {
AddDarwinArch(Args, CmdArgs);
// FIXME: Why do this only on this path?
Args.AddLastArg(CmdArgs, options::OPT_force__cpusubtype__ALL);
Args.AddLastArg(CmdArgs, options::OPT_bundle);
Args.AddAllArgs(CmdArgs, options::OPT_bundle__loader);
Args.AddAllArgs(CmdArgs, options::OPT_client__name);
Arg *A;
if ((A = Args.getLastArg(options::OPT_compatibility__version)) ||
(A = Args.getLastArg(options::OPT_current__version)) ||
(A = Args.getLastArg(options::OPT_install__name)))
D.Diag(diag::err_drv_argument_only_allowed_with)
<< A->getAsString(Args) << "-dynamiclib";
Args.AddLastArg(CmdArgs, options::OPT_force__flat__namespace);
Args.AddLastArg(CmdArgs, options::OPT_keep__private__externs);
Args.AddLastArg(CmdArgs, options::OPT_private__bundle);
} else {
CmdArgs.push_back("-dylib");
Arg *A;
if ((A = Args.getLastArg(options::OPT_bundle)) ||
(A = Args.getLastArg(options::OPT_bundle__loader)) ||
(A = Args.getLastArg(options::OPT_client__name)) ||
(A = Args.getLastArg(options::OPT_force__flat__namespace)) ||
(A = Args.getLastArg(options::OPT_keep__private__externs)) ||
(A = Args.getLastArg(options::OPT_private__bundle)))
D.Diag(diag::err_drv_argument_not_allowed_with)
<< A->getAsString(Args) << "-dynamiclib";
Args.AddAllArgsTranslated(CmdArgs, options::OPT_compatibility__version,
"-dylib_compatibility_version");
Args.AddAllArgsTranslated(CmdArgs, options::OPT_current__version,
"-dylib_current_version");
AddDarwinArch(Args, CmdArgs);
Args.AddAllArgsTranslated(CmdArgs, options::OPT_install__name,
"-dylib_install_name");
}
Args.AddLastArg(CmdArgs, options::OPT_all__load);
Args.AddAllArgs(CmdArgs, options::OPT_allowable__client);
Args.AddLastArg(CmdArgs, options::OPT_bind__at__load);
if (DarwinTC.isTargetIPhoneOS())
Args.AddLastArg(CmdArgs, options::OPT_arch__errors__fatal);
Args.AddLastArg(CmdArgs, options::OPT_dead__strip);
Args.AddLastArg(CmdArgs, options::OPT_no__dead__strip__inits__and__terms);
Args.AddAllArgs(CmdArgs, options::OPT_dylib__file);
Args.AddLastArg(CmdArgs, options::OPT_dynamic);
Args.AddAllArgs(CmdArgs, options::OPT_exported__symbols__list);
Args.AddLastArg(CmdArgs, options::OPT_flat__namespace);
Args.AddAllArgs(CmdArgs, options::OPT_force__load);
Args.AddAllArgs(CmdArgs, options::OPT_headerpad__max__install__names);
Args.AddAllArgs(CmdArgs, options::OPT_image__base);
Args.AddAllArgs(CmdArgs, options::OPT_init);
// Add the deployment target.
VersionTuple TargetVersion = DarwinTC.getTargetVersion();
// If we had an explicit -mios-simulator-version-min argument, honor that,
// otherwise use the traditional deployment targets. We can't just check the
// is-sim attribute because existing code follows this path, and the linker
// may not handle the argument.
//
// FIXME: We may be able to remove this, once we can verify no one depends on
// it.
if (Args.hasArg(options::OPT_mios_simulator_version_min_EQ))
CmdArgs.push_back("-ios_simulator_version_min");
else if (DarwinTC.isTargetIPhoneOS())
CmdArgs.push_back("-iphoneos_version_min");
else
CmdArgs.push_back("-macosx_version_min");
CmdArgs.push_back(Args.MakeArgString(TargetVersion.getAsString()));
Args.AddLastArg(CmdArgs, options::OPT_nomultidefs);
Args.AddLastArg(CmdArgs, options::OPT_multi__module);
Args.AddLastArg(CmdArgs, options::OPT_single__module);
Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined);
Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined__unused);
if (const Arg *A = Args.getLastArg(options::OPT_fpie, options::OPT_fPIE,
options::OPT_fno_pie,
options::OPT_fno_PIE)) {
if (A->getOption().matches(options::OPT_fpie) ||
A->getOption().matches(options::OPT_fPIE))
CmdArgs.push_back("-pie");
else
CmdArgs.push_back("-no_pie");
}
Args.AddLastArg(CmdArgs, options::OPT_prebind);
Args.AddLastArg(CmdArgs, options::OPT_noprebind);
Args.AddLastArg(CmdArgs, options::OPT_nofixprebinding);
Args.AddLastArg(CmdArgs, options::OPT_prebind__all__twolevel__modules);
Args.AddLastArg(CmdArgs, options::OPT_read__only__relocs);
Args.AddAllArgs(CmdArgs, options::OPT_sectcreate);
Args.AddAllArgs(CmdArgs, options::OPT_sectorder);
Args.AddAllArgs(CmdArgs, options::OPT_seg1addr);
Args.AddAllArgs(CmdArgs, options::OPT_segprot);
Args.AddAllArgs(CmdArgs, options::OPT_segaddr);
Args.AddAllArgs(CmdArgs, options::OPT_segs__read__only__addr);
Args.AddAllArgs(CmdArgs, options::OPT_segs__read__write__addr);
Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table);
Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table__filename);
Args.AddAllArgs(CmdArgs, options::OPT_sub__library);
Args.AddAllArgs(CmdArgs, options::OPT_sub__umbrella);
// Give --sysroot= preference, over the Apple specific behavior to also use
// --isysroot as the syslibroot.
StringRef sysroot = C.getSysRoot();
if (sysroot != "") {
CmdArgs.push_back("-syslibroot");
CmdArgs.push_back(C.getArgs().MakeArgString(sysroot));
} else if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
CmdArgs.push_back("-syslibroot");
CmdArgs.push_back(A->getValue());
}
Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace);
Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace__hints);
Args.AddAllArgs(CmdArgs, options::OPT_umbrella);
Args.AddAllArgs(CmdArgs, options::OPT_undefined);
Args.AddAllArgs(CmdArgs, options::OPT_unexported__symbols__list);
Args.AddAllArgs(CmdArgs, options::OPT_weak__reference__mismatches);
Args.AddLastArg(CmdArgs, options::OPT_X_Flag);
Args.AddAllArgs(CmdArgs, options::OPT_y);
Args.AddLastArg(CmdArgs, options::OPT_w);
Args.AddAllArgs(CmdArgs, options::OPT_pagezero__size);
Args.AddAllArgs(CmdArgs, options::OPT_segs__read__);
Args.AddLastArg(CmdArgs, options::OPT_seglinkedit);
Args.AddLastArg(CmdArgs, options::OPT_noseglinkedit);
Args.AddAllArgs(CmdArgs, options::OPT_sectalign);
Args.AddAllArgs(CmdArgs, options::OPT_sectobjectsymbols);
Args.AddAllArgs(CmdArgs, options::OPT_segcreate);
Args.AddLastArg(CmdArgs, options::OPT_whyload);
Args.AddLastArg(CmdArgs, options::OPT_whatsloaded);
Args.AddAllArgs(CmdArgs, options::OPT_dylinker__install__name);
Args.AddLastArg(CmdArgs, options::OPT_dylinker);
Args.AddLastArg(CmdArgs, options::OPT_Mach);
}
void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
assert(Output.getType() == types::TY_Image && "Invalid linker output type.");
// The logic here is derived from gcc's behavior; most of which
// comes from specs (starting with link_command). Consult gcc for
// more information.
ArgStringList CmdArgs;
/// Hack(tm) to ignore linking errors when we are doing ARC migration.
if (Args.hasArg(options::OPT_ccc_arcmt_check,
options::OPT_ccc_arcmt_migrate)) {
for (ArgList::const_iterator I = Args.begin(), E = Args.end(); I != E; ++I)
(*I)->claim();
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("touch"));
CmdArgs.push_back(Output.getFilename());
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
return;
}
// I'm not sure why this particular decomposition exists in gcc, but
// we follow suite for ease of comparison.
AddLinkArgs(C, Args, CmdArgs, Inputs);
Args.AddAllArgs(CmdArgs, options::OPT_d_Flag);
Args.AddAllArgs(CmdArgs, options::OPT_s);
Args.AddAllArgs(CmdArgs, options::OPT_t);
Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
Args.AddAllArgs(CmdArgs, options::OPT_u_Group);
Args.AddLastArg(CmdArgs, options::OPT_e);
Args.AddAllArgs(CmdArgs, options::OPT_r);
// Forward -ObjC when either -ObjC or -ObjC++ is used, to force loading
// members of static archive libraries which implement Objective-C classes or
// categories.
if (Args.hasArg(options::OPT_ObjC) || Args.hasArg(options::OPT_ObjCXX))
CmdArgs.push_back("-ObjC");
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
// Derived from startfile spec.
if (Args.hasArg(options::OPT_dynamiclib)) {
// Derived from darwin_dylib1 spec.
if (getDarwinToolChain().isTargetIOSSimulator()) {
// The simulator doesn't have a versioned crt1 file.
CmdArgs.push_back("-ldylib1.o");
} else if (getDarwinToolChain().isTargetIPhoneOS()) {
if (getDarwinToolChain().isIPhoneOSVersionLT(3, 1))
CmdArgs.push_back("-ldylib1.o");
} else {
if (getDarwinToolChain().isMacosxVersionLT(10, 5))
CmdArgs.push_back("-ldylib1.o");
else if (getDarwinToolChain().isMacosxVersionLT(10, 6))
CmdArgs.push_back("-ldylib1.10.5.o");
}
} else {
if (Args.hasArg(options::OPT_bundle)) {
if (!Args.hasArg(options::OPT_static)) {
// Derived from darwin_bundle1 spec.
if (getDarwinToolChain().isTargetIOSSimulator()) {
// The simulator doesn't have a versioned crt1 file.
CmdArgs.push_back("-lbundle1.o");
} else if (getDarwinToolChain().isTargetIPhoneOS()) {
if (getDarwinToolChain().isIPhoneOSVersionLT(3, 1))
CmdArgs.push_back("-lbundle1.o");
} else {
if (getDarwinToolChain().isMacosxVersionLT(10, 6))
CmdArgs.push_back("-lbundle1.o");
}
}
} else {
if (Args.hasArg(options::OPT_pg) &&
getToolChain().SupportsProfiling()) {
if (Args.hasArg(options::OPT_static) ||
Args.hasArg(options::OPT_object) ||
Args.hasArg(options::OPT_preload)) {
CmdArgs.push_back("-lgcrt0.o");
} else {
CmdArgs.push_back("-lgcrt1.o");
// darwin_crt2 spec is empty.
}
// By default on OS X 10.8 and later, we don't link with a crt1.o
// file and the linker knows to use _main as the entry point. But,
// when compiling with -pg, we need to link with the gcrt1.o file,
// so pass the -no_new_main option to tell the linker to use the
// "start" symbol as the entry point.
if (getDarwinToolChain().isTargetMacOS() &&
!getDarwinToolChain().isMacosxVersionLT(10, 8))
CmdArgs.push_back("-no_new_main");
} else {
if (Args.hasArg(options::OPT_static) ||
Args.hasArg(options::OPT_object) ||
Args.hasArg(options::OPT_preload)) {
CmdArgs.push_back("-lcrt0.o");
} else {
// Derived from darwin_crt1 spec.
if (getDarwinToolChain().isTargetIOSSimulator()) {
// The simulator doesn't have a versioned crt1 file.
CmdArgs.push_back("-lcrt1.o");
} else if (getDarwinToolChain().isTargetIPhoneOS()) {
if (getDarwinToolChain().isIPhoneOSVersionLT(3, 1))
CmdArgs.push_back("-lcrt1.o");
else if (getDarwinToolChain().isIPhoneOSVersionLT(6, 0))
CmdArgs.push_back("-lcrt1.3.1.o");
} else {
if (getDarwinToolChain().isMacosxVersionLT(10, 5))
CmdArgs.push_back("-lcrt1.o");
else if (getDarwinToolChain().isMacosxVersionLT(10, 6))
CmdArgs.push_back("-lcrt1.10.5.o");
else if (getDarwinToolChain().isMacosxVersionLT(10, 8))
CmdArgs.push_back("-lcrt1.10.6.o");
// darwin_crt2 spec is empty.
}
}
}
}
}
if (!getDarwinToolChain().isTargetIPhoneOS() &&
Args.hasArg(options::OPT_shared_libgcc) &&
getDarwinToolChain().isMacosxVersionLT(10, 5)) {
const char *Str =
Args.MakeArgString(getToolChain().GetFilePath("crt3.o"));
CmdArgs.push_back(Str);
}
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
if (Args.hasArg(options::OPT_fopenmp))
// This is more complicated in gcc...
CmdArgs.push_back("-lgomp");
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
if (isObjCRuntimeLinked(Args) &&
!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
// Avoid linking compatibility stubs on i386 mac.
if (!getDarwinToolChain().isTargetMacOS() ||
getDarwinToolChain().getArch() != llvm::Triple::x86) {
// If we don't have ARC or subscripting runtime support, link in the
// runtime stubs. We have to do this *before* adding any of the normal
// linker inputs so that its initializer gets run first.
ObjCRuntime runtime =
getDarwinToolChain().getDefaultObjCRuntime(/*nonfragile*/ true);
// We use arclite library for both ARC and subscripting support.
if ((!runtime.hasNativeARC() && isObjCAutoRefCount(Args)) ||
!runtime.hasSubscripting())
getDarwinToolChain().AddLinkARCArgs(Args, CmdArgs);
}
CmdArgs.push_back("-framework");
CmdArgs.push_back("Foundation");
// Link libobj.
CmdArgs.push_back("-lobjc");
}
if (LinkingOutput) {
CmdArgs.push_back("-arch_multiple");
CmdArgs.push_back("-final_output");
CmdArgs.push_back(LinkingOutput);
}
if (Args.hasArg(options::OPT_fnested_functions))
CmdArgs.push_back("-allow_stack_execute");
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
if (getToolChain().getDriver().CCCIsCXX())
getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
// link_ssp spec is empty.
// Let the tool chain choose which runtime library to link.
getDarwinToolChain().AddLinkRuntimeLibArgs(Args, CmdArgs);
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
// endfile_spec is empty.
}
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_F);
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("ld"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
CmdArgs.push_back("-create");
assert(Output.isFilename() && "Unexpected lipo output.");
CmdArgs.push_back("-output");
CmdArgs.push_back(Output.getFilename());
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
assert(II.isFilename() && "Unexpected lipo input.");
CmdArgs.push_back(II.getFilename());
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("lipo"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
const InputInfo &Input = Inputs[0];
assert(Input.isFilename() && "Unexpected dsymutil input.");
CmdArgs.push_back(Input.getFilename());
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("dsymutil"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
CmdArgs.push_back("--verify");
CmdArgs.push_back("--debug-info");
CmdArgs.push_back("--eh-frame");
CmdArgs.push_back("--quiet");
assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
const InputInfo &Input = Inputs[0];
assert(Input.isFilename() && "Unexpected verify input");
// Grabbing the output of the earlier dsymutil run.
CmdArgs.push_back(Input.getFilename());
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void solaris::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back(II.getFilename());
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void solaris::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
// FIXME: Find a real GCC, don't hard-code versions here
std::string GCCLibPath = "/usr/gcc/4.5/lib/gcc/";
const llvm::Triple &T = getToolChain().getTriple();
std::string LibPath = "/usr/lib/";
llvm::Triple::ArchType Arch = T.getArch();
switch (Arch) {
case llvm::Triple::x86:
GCCLibPath +=
("i386-" + T.getVendorName() + "-" + T.getOSName()).str() + "/4.5.2/";
break;
case llvm::Triple::x86_64:
GCCLibPath += ("i386-" + T.getVendorName() + "-" + T.getOSName()).str();
GCCLibPath += "/4.5.2/amd64/";
LibPath += "amd64/";
break;
default:
llvm_unreachable("Unsupported architecture");
}
ArgStringList CmdArgs;
// Demangle C++ names in errors
CmdArgs.push_back("-C");
if ((!Args.hasArg(options::OPT_nostdlib)) &&
(!Args.hasArg(options::OPT_shared))) {
CmdArgs.push_back("-e");
CmdArgs.push_back("_start");
}
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
CmdArgs.push_back("-dn");
} else {
CmdArgs.push_back("-Bdynamic");
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-shared");
} else {
CmdArgs.push_back("--dynamic-linker");
CmdArgs.push_back(Args.MakeArgString(LibPath + "ld.so.1"));
}
}
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back(Args.MakeArgString(LibPath + "crt1.o"));
CmdArgs.push_back(Args.MakeArgString(LibPath + "crti.o"));
CmdArgs.push_back(Args.MakeArgString(LibPath + "values-Xa.o"));
CmdArgs.push_back(Args.MakeArgString(GCCLibPath + "crtbegin.o"));
} else {
CmdArgs.push_back(Args.MakeArgString(LibPath + "crti.o"));
CmdArgs.push_back(Args.MakeArgString(LibPath + "values-Xa.o"));
CmdArgs.push_back(Args.MakeArgString(GCCLibPath + "crtbegin.o"));
}
if (getToolChain().getDriver().CCCIsCXX())
CmdArgs.push_back(Args.MakeArgString(LibPath + "cxa_finalize.o"));
}
CmdArgs.push_back(Args.MakeArgString("-L" + GCCLibPath));
Args.AddAllArgs(CmdArgs, options::OPT_L);
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_e);
Args.AddAllArgs(CmdArgs, options::OPT_r);
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
if (getToolChain().getDriver().CCCIsCXX())
getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
CmdArgs.push_back("-lgcc_s");
if (!Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-lgcc");
CmdArgs.push_back("-lc");
CmdArgs.push_back("-lm");
}
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
CmdArgs.push_back(Args.MakeArgString(GCCLibPath + "crtend.o"));
}
CmdArgs.push_back(Args.MakeArgString(LibPath + "crtn.o"));
addProfileRT(getToolChain(), Args, CmdArgs, getToolChain().getTriple());
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("ld"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void auroraux::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back(II.getFilename());
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("gas"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void auroraux::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
if ((!Args.hasArg(options::OPT_nostdlib)) &&
(!Args.hasArg(options::OPT_shared))) {
CmdArgs.push_back("-e");
CmdArgs.push_back("_start");
}
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
CmdArgs.push_back("-dn");
} else {
// CmdArgs.push_back("--eh-frame-hdr");
CmdArgs.push_back("-Bdynamic");
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-shared");
} else {
CmdArgs.push_back("--dynamic-linker");
CmdArgs.push_back("/lib/ld.so.1"); // 64Bit Path /lib/amd64/ld.so.1
}
}
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crt1.o")));
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crti.o")));
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtbegin.o")));
} else {
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crti.o")));
}
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtn.o")));
}
CmdArgs.push_back(Args.MakeArgString("-L/opt/gcc4/lib/gcc/"
+ getToolChain().getTripleString()
+ "/4.2.4"));
Args.AddAllArgs(CmdArgs, options::OPT_L);
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_e);
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
// FIXME: For some reason GCC passes -lgcc before adding
// the default system libraries. Just mimic this for now.
CmdArgs.push_back("-lgcc");
if (Args.hasArg(options::OPT_pthread))
CmdArgs.push_back("-pthread");
if (!Args.hasArg(options::OPT_shared))
CmdArgs.push_back("-lc");
CmdArgs.push_back("-lgcc");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared))
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtend.o")));
}
addProfileRT(getToolChain(), Args, CmdArgs, getToolChain().getTriple());
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("ld"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void openbsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
// When building 32-bit code on OpenBSD/amd64, we have to explicitly
// instruct as in the base system to assemble 32-bit code.
if (getToolChain().getArch() == llvm::Triple::x86)
CmdArgs.push_back("--32");
else if (getToolChain().getArch() == llvm::Triple::ppc) {
CmdArgs.push_back("-mppc");
CmdArgs.push_back("-many");
} else if (getToolChain().getArch() == llvm::Triple::mips64 ||
getToolChain().getArch() == llvm::Triple::mips64el) {
StringRef CPUName;
StringRef ABIName;
getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
CmdArgs.push_back("-mabi");
CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data());
if (getToolChain().getArch() == llvm::Triple::mips64)
CmdArgs.push_back("-EB");
else
CmdArgs.push_back("-EL");
Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
options::OPT_fpic, options::OPT_fno_pic,
options::OPT_fPIE, options::OPT_fno_PIE,
options::OPT_fpie, options::OPT_fno_pie);
if (LastPICArg &&
(LastPICArg->getOption().matches(options::OPT_fPIC) ||
LastPICArg->getOption().matches(options::OPT_fpic) ||
LastPICArg->getOption().matches(options::OPT_fPIE) ||
LastPICArg->getOption().matches(options::OPT_fpie))) {
CmdArgs.push_back("-KPIC");
}
}
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back(II.getFilename());
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void openbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
// Silence warning for "clang -g foo.o -o foo"
Args.ClaimAllArgs(options::OPT_g_Group);
// and "clang -emit-llvm foo.o -o foo"
Args.ClaimAllArgs(options::OPT_emit_llvm);
// and for "clang -w foo.o -o foo". Other warning options are already
// handled somewhere else.
Args.ClaimAllArgs(options::OPT_w);
if (getToolChain().getArch() == llvm::Triple::mips64)
CmdArgs.push_back("-EB");
else if (getToolChain().getArch() == llvm::Triple::mips64el)
CmdArgs.push_back("-EL");
if ((!Args.hasArg(options::OPT_nostdlib)) &&
(!Args.hasArg(options::OPT_shared))) {
CmdArgs.push_back("-e");
CmdArgs.push_back("__start");
}
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
} else {
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
CmdArgs.push_back("--eh-frame-hdr");
CmdArgs.push_back("-Bdynamic");
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-shared");
} else {
CmdArgs.push_back("-dynamic-linker");
CmdArgs.push_back("/usr/libexec/ld.so");
}
}
if (Args.hasArg(options::OPT_nopie))
CmdArgs.push_back("-nopie");
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared)) {
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("gcrt0.o")));
else
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crt0.o")));
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtbegin.o")));
} else {
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtbeginS.o")));
}
}
std::string Triple = getToolChain().getTripleString();
if (Triple.substr(0, 6) == "x86_64")
Triple.replace(0, 6, "amd64");
CmdArgs.push_back(Args.MakeArgString("-L/usr/lib/gcc-lib/" + Triple +
"/4.2.1"));
Args.AddAllArgs(CmdArgs, options::OPT_L);
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_e);
Args.AddAllArgs(CmdArgs, options::OPT_s);
Args.AddAllArgs(CmdArgs, options::OPT_t);
Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
Args.AddAllArgs(CmdArgs, options::OPT_r);
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
if (D.CCCIsCXX()) {
getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lm_p");
else
CmdArgs.push_back("-lm");
}
// FIXME: For some reason GCC passes -lgcc before adding
// the default system libraries. Just mimic this for now.
CmdArgs.push_back("-lgcc");
if (Args.hasArg(options::OPT_pthread)) {
if (!Args.hasArg(options::OPT_shared) &&
Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lpthread_p");
else
CmdArgs.push_back("-lpthread");
}
if (!Args.hasArg(options::OPT_shared)) {
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lc_p");
else
CmdArgs.push_back("-lc");
}
CmdArgs.push_back("-lgcc");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared))
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtend.o")));
else
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtendS.o")));
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("ld"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void bitrig::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back(II.getFilename());
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void bitrig::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
if ((!Args.hasArg(options::OPT_nostdlib)) &&
(!Args.hasArg(options::OPT_shared))) {
CmdArgs.push_back("-e");
CmdArgs.push_back("__start");
}
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
} else {
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
CmdArgs.push_back("--eh-frame-hdr");
CmdArgs.push_back("-Bdynamic");
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-shared");
} else {
CmdArgs.push_back("-dynamic-linker");
CmdArgs.push_back("/usr/libexec/ld.so");
}
}
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared)) {
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("gcrt0.o")));
else
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crt0.o")));
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtbegin.o")));
} else {
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtbeginS.o")));
}
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_e);
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
if (D.CCCIsCXX()) {
getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lm_p");
else
CmdArgs.push_back("-lm");
}
if (Args.hasArg(options::OPT_pthread)) {
if (!Args.hasArg(options::OPT_shared) &&
Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lpthread_p");
else
CmdArgs.push_back("-lpthread");
}
if (!Args.hasArg(options::OPT_shared)) {
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lc_p");
else
CmdArgs.push_back("-lc");
}
StringRef MyArch;
switch (getToolChain().getTriple().getArch()) {
case llvm::Triple::arm:
MyArch = "arm";
break;
case llvm::Triple::x86:
MyArch = "i386";
break;
case llvm::Triple::x86_64:
MyArch = "amd64";
break;
default:
llvm_unreachable("Unsupported architecture");
}
CmdArgs.push_back(Args.MakeArgString("-lclang_rt." + MyArch));
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared))
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtend.o")));
else
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtendS.o")));
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("ld"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void freebsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
// When building 32-bit code on FreeBSD/amd64, we have to explicitly
// instruct as in the base system to assemble 32-bit code.
if (getToolChain().getArch() == llvm::Triple::x86)
CmdArgs.push_back("--32");
else if (getToolChain().getArch() == llvm::Triple::ppc)
CmdArgs.push_back("-a32");
else if (getToolChain().getArch() == llvm::Triple::mips ||
getToolChain().getArch() == llvm::Triple::mipsel ||
getToolChain().getArch() == llvm::Triple::mips64 ||
getToolChain().getArch() == llvm::Triple::mips64el) {
StringRef CPUName;
StringRef ABIName;
getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
CmdArgs.push_back("-march");
CmdArgs.push_back(CPUName.data());
CmdArgs.push_back("-mabi");
CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data());
if (getToolChain().getArch() == llvm::Triple::mips ||
getToolChain().getArch() == llvm::Triple::mips64)
CmdArgs.push_back("-EB");
else
CmdArgs.push_back("-EL");
Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
options::OPT_fpic, options::OPT_fno_pic,
options::OPT_fPIE, options::OPT_fno_PIE,
options::OPT_fpie, options::OPT_fno_pie);
if (LastPICArg &&
(LastPICArg->getOption().matches(options::OPT_fPIC) ||
LastPICArg->getOption().matches(options::OPT_fpic) ||
LastPICArg->getOption().matches(options::OPT_fPIE) ||
LastPICArg->getOption().matches(options::OPT_fpie))) {
CmdArgs.push_back("-KPIC");
}
} else if (getToolChain().getArch() == llvm::Triple::arm ||
getToolChain().getArch() == llvm::Triple::thumb) {
const Driver &D = getToolChain().getDriver();
llvm::Triple Triple = getToolChain().getTriple();
StringRef FloatABI = getARMFloatABI(D, Args, Triple);
if (FloatABI == "hard") {
CmdArgs.push_back("-mfpu=vfp");
} else {
CmdArgs.push_back("-mfpu=softvfp");
}
switch(getToolChain().getTriple().getEnvironment()) {
case llvm::Triple::GNUEABIHF:
case llvm::Triple::GNUEABI:
case llvm::Triple::EABI:
CmdArgs.push_back("-meabi=5");
break;
default:
CmdArgs.push_back("-matpcs");
}
} else if (getToolChain().getArch() == llvm::Triple::sparc ||
getToolChain().getArch() == llvm::Triple::sparcv9) {
if (getToolChain().getArch() == llvm::Triple::sparc)
CmdArgs.push_back("-Av8plusa");
else
CmdArgs.push_back("-Av9a");
Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
options::OPT_fpic, options::OPT_fno_pic,
options::OPT_fPIE, options::OPT_fno_PIE,
options::OPT_fpie, options::OPT_fno_pie);
if (LastPICArg &&
(LastPICArg->getOption().matches(options::OPT_fPIC) ||
LastPICArg->getOption().matches(options::OPT_fpic) ||
LastPICArg->getOption().matches(options::OPT_fPIE) ||
LastPICArg->getOption().matches(options::OPT_fpie))) {
CmdArgs.push_back("-KPIC");
}
}
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back(II.getFilename());
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void freebsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const toolchains::FreeBSD& ToolChain =
static_cast<const toolchains::FreeBSD&>(getToolChain());
const Driver &D = ToolChain.getDriver();
ArgStringList CmdArgs;
// Silence warning for "clang -g foo.o -o foo"
Args.ClaimAllArgs(options::OPT_g_Group);
// and "clang -emit-llvm foo.o -o foo"
Args.ClaimAllArgs(options::OPT_emit_llvm);
// and for "clang -w foo.o -o foo". Other warning options are already
// handled somewhere else.
Args.ClaimAllArgs(options::OPT_w);
if (!D.SysRoot.empty())
CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
if (Args.hasArg(options::OPT_pie))
CmdArgs.push_back("-pie");
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
} else {
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
CmdArgs.push_back("--eh-frame-hdr");
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-Bshareable");
} else {
CmdArgs.push_back("-dynamic-linker");
CmdArgs.push_back("/libexec/ld-elf.so.1");
}
if (ToolChain.getTriple().getOSMajorVersion() >= 9) {
llvm::Triple::ArchType Arch = ToolChain.getArch();
if (Arch == llvm::Triple::arm || Arch == llvm::Triple::sparc ||
Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) {
CmdArgs.push_back("--hash-style=both");
}
}
CmdArgs.push_back("--enable-new-dtags");
}
// When building 32-bit code on FreeBSD/amd64, we have to explicitly
// instruct ld in the base system to link 32-bit code.
if (ToolChain.getArch() == llvm::Triple::x86) {
CmdArgs.push_back("-m");
CmdArgs.push_back("elf_i386_fbsd");
}
if (ToolChain.getArch() == llvm::Triple::ppc) {
CmdArgs.push_back("-m");
CmdArgs.push_back("elf32ppc_fbsd");
}
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
const char *crt1 = NULL;
if (!Args.hasArg(options::OPT_shared)) {
if (Args.hasArg(options::OPT_pg))
crt1 = "gcrt1.o";
else if (Args.hasArg(options::OPT_pie))
crt1 = "Scrt1.o";
else
crt1 = "crt1.o";
}
if (crt1)
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1)));
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
const char *crtbegin = NULL;
if (Args.hasArg(options::OPT_static))
crtbegin = "crtbeginT.o";
else if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
crtbegin = "crtbeginS.o";
else
crtbegin = "crtbegin.o";
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
const ToolChain::path_list Paths = ToolChain.getFilePaths();
for (ToolChain::path_list::const_iterator i = Paths.begin(), e = Paths.end();
i != e; ++i)
CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + *i));
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_e);
Args.AddAllArgs(CmdArgs, options::OPT_s);
Args.AddAllArgs(CmdArgs, options::OPT_t);
Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
Args.AddAllArgs(CmdArgs, options::OPT_r);
// Tell the linker to load the plugin. This has to come before AddLinkerInputs
// as gold requires -plugin to come before any -plugin-opt that -Wl might
// forward.
if (D.IsUsingLTO(Args)) {
CmdArgs.push_back("-plugin");
std::string Plugin = ToolChain.getDriver().Dir + "/../lib/LLVMgold.so";
CmdArgs.push_back(Args.MakeArgString(Plugin));
// Try to pass driver level flags relevant to LTO code generation down to
// the plugin.
// Handle flags for selecting CPU variants.
std::string CPU = getCPUName(Args, ToolChain.getTriple());
if (!CPU.empty()) {
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=mcpu=") +
CPU));
}
}
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
if (D.CCCIsCXX()) {
ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lm_p");
else
CmdArgs.push_back("-lm");
}
// FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding
// the default system libraries. Just mimic this for now.
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lgcc_p");
else
CmdArgs.push_back("-lgcc");
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-lgcc_eh");
} else if (Args.hasArg(options::OPT_pg)) {
CmdArgs.push_back("-lgcc_eh_p");
} else {
CmdArgs.push_back("--as-needed");
CmdArgs.push_back("-lgcc_s");
CmdArgs.push_back("--no-as-needed");
}
if (Args.hasArg(options::OPT_pthread)) {
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lpthread_p");
else
CmdArgs.push_back("-lpthread");
}
if (Args.hasArg(options::OPT_pg)) {
if (Args.hasArg(options::OPT_shared))
CmdArgs.push_back("-lc");
else
CmdArgs.push_back("-lc_p");
CmdArgs.push_back("-lgcc_p");
} else {
CmdArgs.push_back("-lc");
CmdArgs.push_back("-lgcc");
}
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-lgcc_eh");
} else if (Args.hasArg(options::OPT_pg)) {
CmdArgs.push_back("-lgcc_eh_p");
} else {
CmdArgs.push_back("--as-needed");
CmdArgs.push_back("-lgcc_s");
CmdArgs.push_back("--no-as-needed");
}
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o")));
else
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
}
addProfileRT(ToolChain, Args, CmdArgs, ToolChain.getTriple());
const char *Exec =
Args.MakeArgString(ToolChain.GetProgramPath("ld"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void netbsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
// When building 32-bit code on NetBSD/amd64, we have to explicitly
// instruct as in the base system to assemble 32-bit code.
if (getToolChain().getArch() == llvm::Triple::x86)
CmdArgs.push_back("--32");
// Pass the target CPU to GNU as for ARM, since the source code might
// not have the correct .cpu annotation.
if (getToolChain().getArch() == llvm::Triple::arm) {
std::string MArch(getARMTargetCPU(Args, getToolChain().getTriple()));
CmdArgs.push_back(Args.MakeArgString("-mcpu=" + MArch));
}
if (getToolChain().getArch() == llvm::Triple::mips ||
getToolChain().getArch() == llvm::Triple::mipsel ||
getToolChain().getArch() == llvm::Triple::mips64 ||
getToolChain().getArch() == llvm::Triple::mips64el) {
StringRef CPUName;
StringRef ABIName;
getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
CmdArgs.push_back("-march");
CmdArgs.push_back(CPUName.data());
CmdArgs.push_back("-mabi");
CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data());
if (getToolChain().getArch() == llvm::Triple::mips ||
getToolChain().getArch() == llvm::Triple::mips64)
CmdArgs.push_back("-EB");
else
CmdArgs.push_back("-EL");
Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
options::OPT_fpic, options::OPT_fno_pic,
options::OPT_fPIE, options::OPT_fno_PIE,
options::OPT_fpie, options::OPT_fno_pie);
if (LastPICArg &&
(LastPICArg->getOption().matches(options::OPT_fPIC) ||
LastPICArg->getOption().matches(options::OPT_fpic) ||
LastPICArg->getOption().matches(options::OPT_fPIE) ||
LastPICArg->getOption().matches(options::OPT_fpie))) {
CmdArgs.push_back("-KPIC");
}
}
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back(II.getFilename());
}
const char *Exec = Args.MakeArgString((getToolChain().GetProgramPath("as")));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void netbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
if (!D.SysRoot.empty())
CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
} else {
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
CmdArgs.push_back("--eh-frame-hdr");
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-Bshareable");
} else {
CmdArgs.push_back("-dynamic-linker");
CmdArgs.push_back("/libexec/ld.elf_so");
}
}
// When building 32-bit code on NetBSD/amd64, we have to explicitly
// instruct ld in the base system to link 32-bit code.
if (getToolChain().getArch() == llvm::Triple::x86) {
CmdArgs.push_back("-m");
CmdArgs.push_back("elf_i386");
}
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crt0.o")));
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crti.o")));
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtbegin.o")));
} else {
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crti.o")));
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtbeginS.o")));
}
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_e);
Args.AddAllArgs(CmdArgs, options::OPT_s);
Args.AddAllArgs(CmdArgs, options::OPT_t);
Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
Args.AddAllArgs(CmdArgs, options::OPT_r);
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
unsigned Major, Minor, Micro;
getToolChain().getTriple().getOSVersion(Major, Minor, Micro);
bool useLibgcc = true;
if (Major >= 7 || (Major == 6 && Minor == 99 && Micro >= 23) || Major == 0) {
if (getToolChain().getArch() == llvm::Triple::x86 ||
getToolChain().getArch() == llvm::Triple::x86_64)
useLibgcc = false;
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
if (D.CCCIsCXX()) {
getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
CmdArgs.push_back("-lm");
}
if (Args.hasArg(options::OPT_pthread))
CmdArgs.push_back("-lpthread");
CmdArgs.push_back("-lc");
if (useLibgcc) {
if (Args.hasArg(options::OPT_static)) {
// libgcc_eh depends on libc, so resolve as much as possible,
// pull in any new requirements from libc and then get the rest
// of libgcc.
CmdArgs.push_back("-lgcc_eh");
CmdArgs.push_back("-lc");
CmdArgs.push_back("-lgcc");
} else {
CmdArgs.push_back("-lgcc");
CmdArgs.push_back("--as-needed");
CmdArgs.push_back("-lgcc_s");
CmdArgs.push_back("--no-as-needed");
}
}
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared))
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath(
"crtend.o")));
else
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath(
"crtendS.o")));
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath(
"crtn.o")));
}
addProfileRT(getToolChain(), Args, CmdArgs, getToolChain().getTriple());
const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("ld"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void gnutools::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
bool NeedsKPIC = false;
// Add --32/--64 to make sure we get the format we want.
// This is incomplete
if (getToolChain().getArch() == llvm::Triple::x86) {
CmdArgs.push_back("--32");
} else if (getToolChain().getArch() == llvm::Triple::x86_64) {
CmdArgs.push_back("--64");
} else if (getToolChain().getArch() == llvm::Triple::ppc) {
CmdArgs.push_back("-a32");
CmdArgs.push_back("-mppc");
CmdArgs.push_back("-many");
} else if (getToolChain().getArch() == llvm::Triple::ppc64) {
CmdArgs.push_back("-a64");
CmdArgs.push_back("-mppc64");
CmdArgs.push_back("-many");
} else if (getToolChain().getArch() == llvm::Triple::ppc64le) {
CmdArgs.push_back("-a64");
CmdArgs.push_back("-mppc64le");
CmdArgs.push_back("-many");
} else if (getToolChain().getArch() == llvm::Triple::sparc) {
CmdArgs.push_back("-32");
CmdArgs.push_back("-Av8plusa");
NeedsKPIC = true;
} else if (getToolChain().getArch() == llvm::Triple::sparcv9) {
CmdArgs.push_back("-64");
CmdArgs.push_back("-Av9a");
NeedsKPIC = true;
} else if (getToolChain().getArch() == llvm::Triple::arm) {
StringRef MArch = getToolChain().getArchName();
if (MArch == "armv7" || MArch == "armv7a" || MArch == "armv7-a")
CmdArgs.push_back("-mfpu=neon");
if (MArch == "armv8" || MArch == "armv8a" || MArch == "armv8-a")
CmdArgs.push_back("-mfpu=crypto-neon-fp-armv8");
StringRef ARMFloatABI = getARMFloatABI(getToolChain().getDriver(), Args,
getToolChain().getTriple());
CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=" + ARMFloatABI));
Args.AddLastArg(CmdArgs, options::OPT_march_EQ);
Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ);
Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ);
} else if (getToolChain().getArch() == llvm::Triple::mips ||
getToolChain().getArch() == llvm::Triple::mipsel ||
getToolChain().getArch() == llvm::Triple::mips64 ||
getToolChain().getArch() == llvm::Triple::mips64el) {
StringRef CPUName;
StringRef ABIName;
getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
CmdArgs.push_back("-march");
CmdArgs.push_back(CPUName.data());
CmdArgs.push_back("-mabi");
CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data());
if (getToolChain().getArch() == llvm::Triple::mips ||
getToolChain().getArch() == llvm::Triple::mips64)
CmdArgs.push_back("-EB");
else
CmdArgs.push_back("-EL");
if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) {
if (StringRef(A->getValue()) == "2008")
CmdArgs.push_back(Args.MakeArgString("-mnan=2008"));
}
if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfp64)) {
if (A->getOption().matches(options::OPT_mfp32))
CmdArgs.push_back(Args.MakeArgString("-mfp32"));
else
CmdArgs.push_back(Args.MakeArgString("-mfp64"));
}
Args.AddLastArg(CmdArgs, options::OPT_mips16, options::OPT_mno_mips16);
Args.AddLastArg(CmdArgs, options::OPT_mmicromips,
options::OPT_mno_micromips);
Args.AddLastArg(CmdArgs, options::OPT_mdsp, options::OPT_mno_dsp);
Args.AddLastArg(CmdArgs, options::OPT_mdspr2, options::OPT_mno_dspr2);
if (Arg *A = Args.getLastArg(options::OPT_mmsa, options::OPT_mno_msa)) {
// Do not use AddLastArg because not all versions of MIPS assembler
// support -mmsa / -mno-msa options.
if (A->getOption().matches(options::OPT_mmsa))
CmdArgs.push_back(Args.MakeArgString("-mmsa"));
}
NeedsKPIC = true;
} else if (getToolChain().getArch() == llvm::Triple::systemz) {
// Always pass an -march option, since our default of z10 is later
// than the GNU assembler's default.
StringRef CPUName = getSystemZTargetCPU(Args);
CmdArgs.push_back(Args.MakeArgString("-march=" + CPUName));
}
if (NeedsKPIC) {
Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
options::OPT_fpic, options::OPT_fno_pic,
options::OPT_fPIE, options::OPT_fno_PIE,
options::OPT_fpie, options::OPT_fno_pie);
if (LastPICArg &&
(LastPICArg->getOption().matches(options::OPT_fPIC) ||
LastPICArg->getOption().matches(options::OPT_fpic) ||
LastPICArg->getOption().matches(options::OPT_fPIE) ||
LastPICArg->getOption().matches(options::OPT_fpie))) {
CmdArgs.push_back("-KPIC");
}
}
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back(II.getFilename());
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
// Handle the debug info splitting at object creation time if we're
// creating an object.
// TODO: Currently only works on linux with newer objcopy.
if (Args.hasArg(options::OPT_gsplit_dwarf) &&
getToolChain().getTriple().isOSLinux())
SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
SplitDebugName(Args, Inputs));
}
static void AddLibgcc(llvm::Triple Triple, const Driver &D,
ArgStringList &CmdArgs, const ArgList &Args) {
bool isAndroid = Triple.getEnvironment() == llvm::Triple::Android;
bool StaticLibgcc = Args.hasArg(options::OPT_static_libgcc) ||
Args.hasArg(options::OPT_static);
if (!D.CCCIsCXX())
CmdArgs.push_back("-lgcc");
if (StaticLibgcc || isAndroid) {
if (D.CCCIsCXX())
CmdArgs.push_back("-lgcc");
} else {
if (!D.CCCIsCXX())
CmdArgs.push_back("--as-needed");
CmdArgs.push_back("-lgcc_s");
if (!D.CCCIsCXX())
CmdArgs.push_back("--no-as-needed");
}
if (StaticLibgcc && !isAndroid)
CmdArgs.push_back("-lgcc_eh");
else if (!Args.hasArg(options::OPT_shared) && D.CCCIsCXX())
CmdArgs.push_back("-lgcc");
// According to Android ABI, we have to link with libdl if we are
// linking with non-static libgcc.
//
// NOTE: This fixes a link error on Android MIPS as well. The non-static
// libgcc for MIPS relies on _Unwind_Find_FDE and dl_iterate_phdr from libdl.
if (isAndroid && !StaticLibgcc)
CmdArgs.push_back("-ldl");
}
static bool hasMipsN32ABIArg(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_mabi_EQ);
return A && (A->getValue() == StringRef("n32"));
}
static StringRef getLinuxDynamicLinker(const ArgList &Args,
const toolchains::Linux &ToolChain) {
if (ToolChain.getTriple().getEnvironment() == llvm::Triple::Android)
return "/system/bin/linker";
else if (ToolChain.getArch() == llvm::Triple::x86 ||
ToolChain.getArch() == llvm::Triple::sparc)
return "/lib/ld-linux.so.2";
else if (ToolChain.getArch() == llvm::Triple::aarch64)
return "/lib/ld-linux-aarch64.so.1";
else if (ToolChain.getArch() == llvm::Triple::arm ||
ToolChain.getArch() == llvm::Triple::thumb) {
if (ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUEABIHF)
return "/lib/ld-linux-armhf.so.3";
else
return "/lib/ld-linux.so.3";
} else if (ToolChain.getArch() == llvm::Triple::mips ||
ToolChain.getArch() == llvm::Triple::mipsel)
return "/lib/ld.so.1";
else if (ToolChain.getArch() == llvm::Triple::mips64 ||
ToolChain.getArch() == llvm::Triple::mips64el) {
if (hasMipsN32ABIArg(Args))
return "/lib32/ld.so.1";
else
return "/lib64/ld.so.1";
} else if (ToolChain.getArch() == llvm::Triple::ppc)
return "/lib/ld.so.1";
else if (ToolChain.getArch() == llvm::Triple::ppc64 ||
ToolChain.getArch() == llvm::Triple::ppc64le ||
ToolChain.getArch() == llvm::Triple::systemz)
return "/lib64/ld64.so.1";
else if (ToolChain.getArch() == llvm::Triple::sparcv9)
return "/lib64/ld-linux.so.2";
else
return "/lib64/ld-linux-x86-64.so.2";
}
void gnutools::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const toolchains::Linux& ToolChain =
static_cast<const toolchains::Linux&>(getToolChain());
const Driver &D = ToolChain.getDriver();
const bool isAndroid =
ToolChain.getTriple().getEnvironment() == llvm::Triple::Android;
const SanitizerArgs &Sanitize = ToolChain.getSanitizerArgs();
const bool IsPIE =
!Args.hasArg(options::OPT_shared) &&
(Args.hasArg(options::OPT_pie) || Sanitize.hasZeroBaseShadow());
ArgStringList CmdArgs;
// Silence warning for "clang -g foo.o -o foo"
Args.ClaimAllArgs(options::OPT_g_Group);
// and "clang -emit-llvm foo.o -o foo"
Args.ClaimAllArgs(options::OPT_emit_llvm);
// and for "clang -w foo.o -o foo". Other warning options are already
// handled somewhere else.
Args.ClaimAllArgs(options::OPT_w);
if (!D.SysRoot.empty())
CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
if (IsPIE)
CmdArgs.push_back("-pie");
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
if (Args.hasArg(options::OPT_s))
CmdArgs.push_back("-s");
for (std::vector<std::string>::const_iterator i = ToolChain.ExtraOpts.begin(),
e = ToolChain.ExtraOpts.end();
i != e; ++i)
CmdArgs.push_back(i->c_str());
if (!Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("--eh-frame-hdr");
}
CmdArgs.push_back("-m");
if (ToolChain.getArch() == llvm::Triple::x86)
CmdArgs.push_back("elf_i386");
else if (ToolChain.getArch() == llvm::Triple::aarch64)
CmdArgs.push_back("aarch64linux");
else if (ToolChain.getArch() == llvm::Triple::arm
|| ToolChain.getArch() == llvm::Triple::thumb)
CmdArgs.push_back("armelf_linux_eabi");
else if (ToolChain.getArch() == llvm::Triple::ppc)
CmdArgs.push_back("elf32ppclinux");
else if (ToolChain.getArch() == llvm::Triple::ppc64)
CmdArgs.push_back("elf64ppc");
else if (ToolChain.getArch() == llvm::Triple::sparc)
CmdArgs.push_back("elf32_sparc");
else if (ToolChain.getArch() == llvm::Triple::sparcv9)
CmdArgs.push_back("elf64_sparc");
else if (ToolChain.getArch() == llvm::Triple::mips)
CmdArgs.push_back("elf32btsmip");
else if (ToolChain.getArch() == llvm::Triple::mipsel)
CmdArgs.push_back("elf32ltsmip");
else if (ToolChain.getArch() == llvm::Triple::mips64) {
if (hasMipsN32ABIArg(Args))
CmdArgs.push_back("elf32btsmipn32");
else
CmdArgs.push_back("elf64btsmip");
}
else if (ToolChain.getArch() == llvm::Triple::mips64el) {
if (hasMipsN32ABIArg(Args))
CmdArgs.push_back("elf32ltsmipn32");
else
CmdArgs.push_back("elf64ltsmip");
}
else if (ToolChain.getArch() == llvm::Triple::systemz)
CmdArgs.push_back("elf64_s390");
else
CmdArgs.push_back("elf_x86_64");
if (Args.hasArg(options::OPT_static)) {
if (ToolChain.getArch() == llvm::Triple::arm
|| ToolChain.getArch() == llvm::Triple::thumb)
CmdArgs.push_back("-Bstatic");
else
CmdArgs.push_back("-static");
} else if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-shared");
if (isAndroid) {
CmdArgs.push_back("-Bsymbolic");
}
}
if (ToolChain.getArch() == llvm::Triple::arm ||
ToolChain.getArch() == llvm::Triple::thumb ||
(!Args.hasArg(options::OPT_static) &&
!Args.hasArg(options::OPT_shared))) {
CmdArgs.push_back("-dynamic-linker");
CmdArgs.push_back(Args.MakeArgString(
D.DyldPrefix + getLinuxDynamicLinker(Args, ToolChain)));
}
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!isAndroid) {
const char *crt1 = NULL;
if (!Args.hasArg(options::OPT_shared)){
if (Args.hasArg(options::OPT_pg))
crt1 = "gcrt1.o";
else if (IsPIE)
crt1 = "Scrt1.o";
else
crt1 = "crt1.o";
}
if (crt1)
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1)));
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
}
const char *crtbegin;
if (Args.hasArg(options::OPT_static))
crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o";
else if (Args.hasArg(options::OPT_shared))
crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o";
else if (IsPIE)
crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o";
else
crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o";
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
// Add crtfastmath.o if available and fast math is enabled.
ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
const ToolChain::path_list Paths = ToolChain.getFilePaths();
for (ToolChain::path_list::const_iterator i = Paths.begin(), e = Paths.end();
i != e; ++i)
CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + *i));
// Tell the linker to load the plugin. This has to come before AddLinkerInputs
// as gold requires -plugin to come before any -plugin-opt that -Wl might
// forward.
if (D.IsUsingLTO(Args)) {
CmdArgs.push_back("-plugin");
std::string Plugin = ToolChain.getDriver().Dir + "/../lib/LLVMgold.so";
CmdArgs.push_back(Args.MakeArgString(Plugin));
// Try to pass driver level flags relevant to LTO code generation down to
// the plugin.
// Handle flags for selecting CPU variants.
std::string CPU = getCPUName(Args, ToolChain.getTriple());
if (!CPU.empty()) {
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=mcpu=") +
CPU));
}
}
if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
CmdArgs.push_back("--no-demangle");
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
// Call these before we add the C++ ABI library.
if (Sanitize.needsUbsanRt())
addUbsanRTLinux(getToolChain(), Args, CmdArgs, D.CCCIsCXX(),
Sanitize.needsAsanRt() || Sanitize.needsTsanRt() ||
Sanitize.needsMsanRt() || Sanitize.needsLsanRt());
if (Sanitize.needsAsanRt())
addAsanRTLinux(getToolChain(), Args, CmdArgs);
if (Sanitize.needsTsanRt())
addTsanRTLinux(getToolChain(), Args, CmdArgs);
if (Sanitize.needsMsanRt())
addMsanRTLinux(getToolChain(), Args, CmdArgs);
if (Sanitize.needsLsanRt())
addLsanRTLinux(getToolChain(), Args, CmdArgs);
if (Sanitize.needsDfsanRt())
addDfsanRTLinux(getToolChain(), Args, CmdArgs);
// The profile runtime also needs access to system libraries.
addProfileRTLinux(getToolChain(), Args, CmdArgs);
if (D.CCCIsCXX() &&
!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
!Args.hasArg(options::OPT_static);
if (OnlyLibstdcxxStatic)
CmdArgs.push_back("-Bstatic");
ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
if (OnlyLibstdcxxStatic)
CmdArgs.push_back("-Bdynamic");
CmdArgs.push_back("-lm");
}
if (!Args.hasArg(options::OPT_nostdlib)) {
if (!Args.hasArg(options::OPT_nodefaultlibs)) {
if (Args.hasArg(options::OPT_static))
CmdArgs.push_back("--start-group");
bool OpenMP = Args.hasArg(options::OPT_fopenmp);
if (OpenMP) {
CmdArgs.push_back("-lgomp");
// FIXME: Exclude this for platforms whith libgomp that doesn't require
// librt. Most modern Linux platfroms require it, but some may not.
CmdArgs.push_back("-lrt");
}
AddLibgcc(ToolChain.getTriple(), D, CmdArgs, Args);
if (Args.hasArg(options::OPT_pthread) ||
Args.hasArg(options::OPT_pthreads) || OpenMP)
CmdArgs.push_back("-lpthread");
CmdArgs.push_back("-lc");
if (Args.hasArg(options::OPT_static))
CmdArgs.push_back("--end-group");
else
AddLibgcc(ToolChain.getTriple(), D, CmdArgs, Args);
}
if (!Args.hasArg(options::OPT_nostartfiles)) {
const char *crtend;
if (Args.hasArg(options::OPT_shared))
crtend = isAndroid ? "crtend_so.o" : "crtendS.o";
else if (IsPIE)
crtend = isAndroid ? "crtend_android.o" : "crtendS.o";
else
crtend = isAndroid ? "crtend_android.o" : "crtend.o";
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
if (!isAndroid)
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
}
}
C.addCommand(new Command(JA, *this, ToolChain.Linker.c_str(), CmdArgs));
}
void minix::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back(II.getFilename());
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void minix::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_e);
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
addProfileRT(getToolChain(), Args, CmdArgs, getToolChain().getTriple());
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
if (D.CCCIsCXX()) {
getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
CmdArgs.push_back("-lm");
}
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (Args.hasArg(options::OPT_pthread))
CmdArgs.push_back("-lpthread");
CmdArgs.push_back("-lc");
CmdArgs.push_back("-lCompilerRT-Generic");
CmdArgs.push_back("-L/usr/pkg/compiler-rt/lib");
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
}
const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("ld"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
/// DragonFly Tools
// For now, DragonFly Assemble does just about the same as for
// FreeBSD, but this may change soon.
void dragonfly::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
// When building 32-bit code on DragonFly/pc64, we have to explicitly
// instruct as in the base system to assemble 32-bit code.
if (getToolChain().getArch() == llvm::Triple::x86)
CmdArgs.push_back("--32");
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back(II.getFilename());
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void dragonfly::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
bool UseGCC47 = false;
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
if (llvm::sys::fs::exists("/usr/lib/gcc47", UseGCC47))
UseGCC47 = false;
if (!D.SysRoot.empty())
CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
CmdArgs.push_back("--eh-frame-hdr");
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
} else {
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
if (Args.hasArg(options::OPT_shared))
CmdArgs.push_back("-Bshareable");
else {
CmdArgs.push_back("-dynamic-linker");
CmdArgs.push_back("/usr/libexec/ld-elf.so.2");
}
CmdArgs.push_back("--hash-style=both");
}
// When building 32-bit code on DragonFly/pc64, we have to explicitly
// instruct ld in the base system to link 32-bit code.
if (getToolChain().getArch() == llvm::Triple::x86) {
CmdArgs.push_back("-m");
CmdArgs.push_back("elf_i386");
}
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared)) {
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("gcrt1.o")));
else {
if (Args.hasArg(options::OPT_pie))
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("Scrt1.o")));
else
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crt1.o")));
}
}
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crti.o")));
if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtbeginS.o")));
else
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtbegin.o")));
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_e);
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs)) {
// FIXME: GCC passes on -lgcc, -lgcc_pic and a whole lot of
// rpaths
if (UseGCC47)
CmdArgs.push_back("-L/usr/lib/gcc47");
else
CmdArgs.push_back("-L/usr/lib/gcc44");
if (!Args.hasArg(options::OPT_static)) {
if (UseGCC47) {
CmdArgs.push_back("-rpath");
CmdArgs.push_back("/usr/lib/gcc47");
} else {
CmdArgs.push_back("-rpath");
CmdArgs.push_back("/usr/lib/gcc44");
}
}
if (D.CCCIsCXX()) {
getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
CmdArgs.push_back("-lm");
}
if (Args.hasArg(options::OPT_pthread))
CmdArgs.push_back("-lpthread");
if (!Args.hasArg(options::OPT_nolibc)) {
CmdArgs.push_back("-lc");
}
if (UseGCC47) {
if (Args.hasArg(options::OPT_static) ||
Args.hasArg(options::OPT_static_libgcc)) {
CmdArgs.push_back("-lgcc");
CmdArgs.push_back("-lgcc_eh");
} else {
if (Args.hasArg(options::OPT_shared_libgcc)) {
CmdArgs.push_back("-lgcc_pic");
if (!Args.hasArg(options::OPT_shared))
CmdArgs.push_back("-lgcc");
} else {
CmdArgs.push_back("-lgcc");
CmdArgs.push_back("--as-needed");
CmdArgs.push_back("-lgcc_pic");
CmdArgs.push_back("--no-as-needed");
}
}
} else {
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-lgcc_pic");
} else {
CmdArgs.push_back("-lgcc");
}
}
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles)) {
if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtendS.o")));
else
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtend.o")));
CmdArgs.push_back(Args.MakeArgString(
getToolChain().GetFilePath("crtn.o")));
}
addProfileRT(getToolChain(), Args, CmdArgs, getToolChain().getTriple());
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("ld"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void visualstudio::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
if (Output.isFilename()) {
CmdArgs.push_back(Args.MakeArgString(std::string("-out:") +
Output.getFilename()));
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nostartfiles) &&
!C.getDriver().IsCLMode()) {
CmdArgs.push_back("-defaultlib:libcmt");
}
CmdArgs.push_back("-nologo");
bool DLL = Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd);
if (DLL) {
CmdArgs.push_back(Args.MakeArgString("-dll"));
SmallString<128> ImplibName(Output.getFilename());
llvm::sys::path::replace_extension(ImplibName, "lib");
CmdArgs.push_back(Args.MakeArgString(std::string("-implib:") +
ImplibName.str()));
}
if (getToolChain().getSanitizerArgs().needsAsanRt()) {
CmdArgs.push_back(Args.MakeArgString("-debug"));
CmdArgs.push_back(Args.MakeArgString("-incremental:no"));
SmallString<128> LibSanitizer(getToolChain().getDriver().ResourceDir);
llvm::sys::path::append(LibSanitizer, "lib", "windows");
if (DLL) {
llvm::sys::path::append(LibSanitizer, "clang_rt.asan_dll_thunk-i386.lib");
} else {
llvm::sys::path::append(LibSanitizer, "clang_rt.asan-i386.lib");
}
// FIXME: Handle 64-bit.
CmdArgs.push_back(Args.MakeArgString(LibSanitizer));
}
Args.AddAllArgValues(CmdArgs, options::OPT_l);
Args.AddAllArgValues(CmdArgs, options::OPT__SLASH_link);
// Add filenames immediately.
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
if (it->isFilename())
CmdArgs.push_back(it->getFilename());
else
it->getInputArg().renderAsInput(Args, CmdArgs);
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("link.exe"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void visualstudio::Compile::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
C.addCommand(GetCommand(C, JA, Output, Inputs, Args, LinkingOutput));
}
// Try to find FallbackName on PATH that is not identical to ClangProgramPath.
// If one cannot be found, return FallbackName.
// We do this special search to prevent clang-cl from falling back onto itself
// if it's available as cl.exe on the path.
static std::string FindFallback(const char *FallbackName,
const char *ClangProgramPath) {
llvm::Optional<std::string> OptPath = llvm::sys::Process::GetEnv("PATH");
if (!OptPath.hasValue())
return FallbackName;
#ifdef LLVM_ON_WIN32
const StringRef PathSeparators = ";";
#else
const StringRef PathSeparators = ":";
#endif
SmallVector<StringRef, 8> PathSegments;
llvm::SplitString(OptPath.getValue(), PathSegments, PathSeparators);
for (size_t i = 0, e = PathSegments.size(); i != e; ++i) {
const StringRef &PathSegment = PathSegments[i];
if (PathSegment.empty())
continue;
SmallString<128> FilePath(PathSegment);
llvm::sys::path::append(FilePath, FallbackName);
if (llvm::sys::fs::can_execute(Twine(FilePath)) &&
!llvm::sys::fs::equivalent(Twine(FilePath), ClangProgramPath))
return FilePath.str();
}
return FallbackName;
}
Command *visualstudio::Compile::GetCommand(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
CmdArgs.push_back("/nologo");
CmdArgs.push_back("/c"); // Compile only.
CmdArgs.push_back("/W0"); // No warnings.
// The goal is to be able to invoke this tool correctly based on
// any flag accepted by clang-cl.
// These are spelled the same way in clang and cl.exe,.
Args.AddAllArgs(CmdArgs, options::OPT_D, options::OPT_U);
Args.AddAllArgs(CmdArgs, options::OPT_I);
// Optimization level.
if (Arg *A = Args.getLastArg(options::OPT_O, options::OPT_O0)) {
if (A->getOption().getID() == options::OPT_O0) {
CmdArgs.push_back("/Od");
} else {
StringRef OptLevel = A->getValue();
if (OptLevel == "1" || OptLevel == "2" || OptLevel == "s")
A->render(Args, CmdArgs);
else if (OptLevel == "3")
CmdArgs.push_back("/Ox");
}
}
// Flags for which clang-cl have an alias.
// FIXME: How can we ensure this stays in sync with relevant clang-cl options?
if (Arg *A = Args.getLastArg(options::OPT_frtti, options::OPT_fno_rtti))
CmdArgs.push_back(A->getOption().getID() == options::OPT_frtti ? "/GR"
: "/GR-");
if (Args.hasArg(options::OPT_fsyntax_only))
CmdArgs.push_back("/Zs");
std::vector<std::string> Includes = Args.getAllArgValues(options::OPT_include);
for (size_t I = 0, E = Includes.size(); I != E; ++I)
CmdArgs.push_back(Args.MakeArgString(std::string("/FI") + Includes[I]));
// Flags that can simply be passed through.
Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LD);
Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LDd);
// The order of these flags is relevant, so pick the last one.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd,
options::OPT__SLASH_MT, options::OPT__SLASH_MTd))
A->render(Args, CmdArgs);
// Input filename.
assert(Inputs.size() == 1);
const InputInfo &II = Inputs[0];
assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX);
CmdArgs.push_back(II.getType() == types::TY_C ? "/Tc" : "/Tp");
if (II.isFilename())
CmdArgs.push_back(II.getFilename());
else
II.getInputArg().renderAsInput(Args, CmdArgs);
// Output filename.
assert(Output.getType() == types::TY_Object);
const char *Fo = Args.MakeArgString(std::string("/Fo") +
Output.getFilename());
CmdArgs.push_back(Fo);
const Driver &D = getToolChain().getDriver();
std::string Exec = FindFallback("cl.exe", D.getClangProgramPath());
return new Command(JA, *this, Args.MakeArgString(Exec), CmdArgs);
}
/// XCore Tools
// We pass assemble and link construction to the xcc tool.
void XCore::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
CmdArgs.push_back("-c");
if (Args.hasArg(options::OPT_g_Group)) {
CmdArgs.push_back("-g");
}
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
options::OPT_Xassembler);
for (InputInfoList::const_iterator
it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
const InputInfo &II = *it;
CmdArgs.push_back(II.getFilename());
}
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("xcc"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
void XCore::Link::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("xcc"));
C.addCommand(new Command(JA, *this, Exec, CmdArgs));
}
Index: head/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp (revision 265924)
+++ head/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp (revision 265925)
@@ -1,6089 +1,6088 @@
//===--- SemaExprCXX.cpp - Semantic Analysis for Expressions --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Implements semantic analysis for C++ expressions.
///
//===----------------------------------------------------------------------===//
#include "clang/Sema/SemaInternal.h"
#include "TypeLocBuilder.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/EvaluatedExprVisitor.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprObjC.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/PartialDiagnostic.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/DeclSpec.h"
#include "clang/Sema/Initialization.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/ParsedTemplate.h"
#include "clang/Sema/Scope.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/SemaLambda.h"
#include "clang/Sema/TemplateDeduction.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
using namespace clang;
using namespace sema;
/// \brief Handle the result of the special case name lookup for inheriting
/// constructor declarations. 'NS::X::X' and 'NS::X<...>::X' are treated as
/// constructor names in member using declarations, even if 'X' is not the
/// name of the corresponding type.
ParsedType Sema::getInheritingConstructorName(CXXScopeSpec &SS,
SourceLocation NameLoc,
IdentifierInfo &Name) {
NestedNameSpecifier *NNS = SS.getScopeRep();
// Convert the nested-name-specifier into a type.
QualType Type;
switch (NNS->getKind()) {
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate:
Type = QualType(NNS->getAsType(), 0);
break;
case NestedNameSpecifier::Identifier:
// Strip off the last layer of the nested-name-specifier and build a
// typename type for it.
assert(NNS->getAsIdentifier() == &Name && "not a constructor name");
Type = Context.getDependentNameType(ETK_None, NNS->getPrefix(),
NNS->getAsIdentifier());
break;
case NestedNameSpecifier::Global:
case NestedNameSpecifier::Namespace:
case NestedNameSpecifier::NamespaceAlias:
llvm_unreachable("Nested name specifier is not a type for inheriting ctor");
}
// This reference to the type is located entirely at the location of the
// final identifier in the qualified-id.
return CreateParsedType(Type,
Context.getTrivialTypeSourceInfo(Type, NameLoc));
}
ParsedType Sema::getDestructorName(SourceLocation TildeLoc,
IdentifierInfo &II,
SourceLocation NameLoc,
Scope *S, CXXScopeSpec &SS,
ParsedType ObjectTypePtr,
bool EnteringContext) {
// Determine where to perform name lookup.
// FIXME: This area of the standard is very messy, and the current
// wording is rather unclear about which scopes we search for the
// destructor name; see core issues 399 and 555. Issue 399 in
// particular shows where the current description of destructor name
// lookup is completely out of line with existing practice, e.g.,
// this appears to be ill-formed:
//
// namespace N {
// template <typename T> struct S {
// ~S();
// };
// }
//
// void f(N::S<int>* s) {
// s->N::S<int>::~S();
// }
//
// See also PR6358 and PR6359.
// For this reason, we're currently only doing the C++03 version of this
// code; the C++0x version has to wait until we get a proper spec.
QualType SearchType;
DeclContext *LookupCtx = 0;
bool isDependent = false;
bool LookInScope = false;
// If we have an object type, it's because we are in a
// pseudo-destructor-expression or a member access expression, and
// we know what type we're looking for.
if (ObjectTypePtr)
SearchType = GetTypeFromParser(ObjectTypePtr);
if (SS.isSet()) {
NestedNameSpecifier *NNS = (NestedNameSpecifier *)SS.getScopeRep();
bool AlreadySearched = false;
bool LookAtPrefix = true;
// C++ [basic.lookup.qual]p6:
// If a pseudo-destructor-name (5.2.4) contains a nested-name-specifier,
// the type-names are looked up as types in the scope designated by the
// nested-name-specifier. In a qualified-id of the form:
//
// ::[opt] nested-name-specifier ~ class-name
//
// where the nested-name-specifier designates a namespace scope, and in
// a qualified-id of the form:
//
// ::opt nested-name-specifier class-name :: ~ class-name
//
// the class-names are looked up as types in the scope designated by
// the nested-name-specifier.
//
// Here, we check the first case (completely) and determine whether the
// code below is permitted to look at the prefix of the
// nested-name-specifier.
DeclContext *DC = computeDeclContext(SS, EnteringContext);
if (DC && DC->isFileContext()) {
AlreadySearched = true;
LookupCtx = DC;
isDependent = false;
} else if (DC && isa<CXXRecordDecl>(DC))
LookAtPrefix = false;
// The second case from the C++03 rules quoted further above.
NestedNameSpecifier *Prefix = 0;
if (AlreadySearched) {
// Nothing left to do.
} else if (LookAtPrefix && (Prefix = NNS->getPrefix())) {
CXXScopeSpec PrefixSS;
PrefixSS.Adopt(NestedNameSpecifierLoc(Prefix, SS.location_data()));
LookupCtx = computeDeclContext(PrefixSS, EnteringContext);
isDependent = isDependentScopeSpecifier(PrefixSS);
} else if (ObjectTypePtr) {
LookupCtx = computeDeclContext(SearchType);
isDependent = SearchType->isDependentType();
} else {
LookupCtx = computeDeclContext(SS, EnteringContext);
isDependent = LookupCtx && LookupCtx->isDependentContext();
}
LookInScope = false;
} else if (ObjectTypePtr) {
// C++ [basic.lookup.classref]p3:
// If the unqualified-id is ~type-name, the type-name is looked up
// in the context of the entire postfix-expression. If the type T
// of the object expression is of a class type C, the type-name is
// also looked up in the scope of class C. At least one of the
// lookups shall find a name that refers to (possibly
// cv-qualified) T.
LookupCtx = computeDeclContext(SearchType);
isDependent = SearchType->isDependentType();
assert((isDependent || !SearchType->isIncompleteType()) &&
"Caller should have completed object type");
LookInScope = true;
} else {
// Perform lookup into the current scope (only).
LookInScope = true;
}
TypeDecl *NonMatchingTypeDecl = 0;
LookupResult Found(*this, &II, NameLoc, LookupOrdinaryName);
for (unsigned Step = 0; Step != 2; ++Step) {
// Look for the name first in the computed lookup context (if we
// have one) and, if that fails to find a match, in the scope (if
// we're allowed to look there).
Found.clear();
if (Step == 0 && LookupCtx)
LookupQualifiedName(Found, LookupCtx);
else if (Step == 1 && LookInScope && S)
LookupName(Found, S);
else
continue;
// FIXME: Should we be suppressing ambiguities here?
if (Found.isAmbiguous())
return ParsedType();
if (TypeDecl *Type = Found.getAsSingle<TypeDecl>()) {
QualType T = Context.getTypeDeclType(Type);
if (SearchType.isNull() || SearchType->isDependentType() ||
Context.hasSameUnqualifiedType(T, SearchType)) {
// We found our type!
return ParsedType::make(T);
}
if (!SearchType.isNull())
NonMatchingTypeDecl = Type;
}
// If the name that we found is a class template name, and it is
// the same name as the template name in the last part of the
// nested-name-specifier (if present) or the object type, then
// this is the destructor for that class.
// FIXME: This is a workaround until we get real drafting for core
// issue 399, for which there isn't even an obvious direction.
if (ClassTemplateDecl *Template = Found.getAsSingle<ClassTemplateDecl>()) {
QualType MemberOfType;
if (SS.isSet()) {
if (DeclContext *Ctx = computeDeclContext(SS, EnteringContext)) {
// Figure out the type of the context, if it has one.
if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(Ctx))
MemberOfType = Context.getTypeDeclType(Record);
}
}
if (MemberOfType.isNull())
MemberOfType = SearchType;
if (MemberOfType.isNull())
continue;
// We're referring into a class template specialization. If the
// class template we found is the same as the template being
// specialized, we found what we are looking for.
if (const RecordType *Record = MemberOfType->getAs<RecordType>()) {
if (ClassTemplateSpecializationDecl *Spec
= dyn_cast<ClassTemplateSpecializationDecl>(Record->getDecl())) {
if (Spec->getSpecializedTemplate()->getCanonicalDecl() ==
Template->getCanonicalDecl())
return ParsedType::make(MemberOfType);
}
continue;
}
// We're referring to an unresolved class template
// specialization. Determine whether we class template we found
// is the same as the template being specialized or, if we don't
// know which template is being specialized, that it at least
// has the same name.
if (const TemplateSpecializationType *SpecType
= MemberOfType->getAs<TemplateSpecializationType>()) {
TemplateName SpecName = SpecType->getTemplateName();
// The class template we found is the same template being
// specialized.
if (TemplateDecl *SpecTemplate = SpecName.getAsTemplateDecl()) {
if (SpecTemplate->getCanonicalDecl() == Template->getCanonicalDecl())
return ParsedType::make(MemberOfType);
continue;
}
// The class template we found has the same name as the
// (dependent) template name being specialized.
if (DependentTemplateName *DepTemplate
= SpecName.getAsDependentTemplateName()) {
if (DepTemplate->isIdentifier() &&
DepTemplate->getIdentifier() == Template->getIdentifier())
return ParsedType::make(MemberOfType);
continue;
}
}
}
}
if (isDependent) {
// We didn't find our type, but that's okay: it's dependent
// anyway.
// FIXME: What if we have no nested-name-specifier?
QualType T = CheckTypenameType(ETK_None, SourceLocation(),
SS.getWithLocInContext(Context),
II, NameLoc);
return ParsedType::make(T);
}
if (NonMatchingTypeDecl) {
QualType T = Context.getTypeDeclType(NonMatchingTypeDecl);
Diag(NameLoc, diag::err_destructor_expr_type_mismatch)
<< T << SearchType;
Diag(NonMatchingTypeDecl->getLocation(), diag::note_destructor_type_here)
<< T;
} else if (ObjectTypePtr)
Diag(NameLoc, diag::err_ident_in_dtor_not_a_type)
<< &II;
else {
SemaDiagnosticBuilder DtorDiag = Diag(NameLoc,
diag::err_destructor_class_name);
if (S) {
const DeclContext *Ctx = S->getEntity();
if (const CXXRecordDecl *Class = dyn_cast_or_null<CXXRecordDecl>(Ctx))
DtorDiag << FixItHint::CreateReplacement(SourceRange(NameLoc),
Class->getNameAsString());
}
}
return ParsedType();
}
ParsedType Sema::getDestructorType(const DeclSpec& DS, ParsedType ObjectType) {
if (DS.getTypeSpecType() == DeclSpec::TST_error || !ObjectType)
return ParsedType();
assert(DS.getTypeSpecType() == DeclSpec::TST_decltype
&& "only get destructor types from declspecs");
QualType T = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc());
QualType SearchType = GetTypeFromParser(ObjectType);
if (SearchType->isDependentType() || Context.hasSameUnqualifiedType(SearchType, T)) {
return ParsedType::make(T);
}
Diag(DS.getTypeSpecTypeLoc(), diag::err_destructor_expr_type_mismatch)
<< T << SearchType;
return ParsedType();
}
/// \brief Build a C++ typeid expression with a type operand.
ExprResult Sema::BuildCXXTypeId(QualType TypeInfoType,
SourceLocation TypeidLoc,
TypeSourceInfo *Operand,
SourceLocation RParenLoc) {
// C++ [expr.typeid]p4:
// The top-level cv-qualifiers of the lvalue expression or the type-id
// that is the operand of typeid are always ignored.
// If the type of the type-id is a class type or a reference to a class
// type, the class shall be completely-defined.
Qualifiers Quals;
QualType T
= Context.getUnqualifiedArrayType(Operand->getType().getNonReferenceType(),
Quals);
if (T->getAs<RecordType>() &&
RequireCompleteType(TypeidLoc, T, diag::err_incomplete_typeid))
return ExprError();
return Owned(new (Context) CXXTypeidExpr(TypeInfoType.withConst(),
Operand,
SourceRange(TypeidLoc, RParenLoc)));
}
/// \brief Build a C++ typeid expression with an expression operand.
ExprResult Sema::BuildCXXTypeId(QualType TypeInfoType,
SourceLocation TypeidLoc,
Expr *E,
SourceLocation RParenLoc) {
if (E && !E->isTypeDependent()) {
if (E->getType()->isPlaceholderType()) {
ExprResult result = CheckPlaceholderExpr(E);
if (result.isInvalid()) return ExprError();
E = result.take();
}
QualType T = E->getType();
if (const RecordType *RecordT = T->getAs<RecordType>()) {
CXXRecordDecl *RecordD = cast<CXXRecordDecl>(RecordT->getDecl());
// C++ [expr.typeid]p3:
// [...] If the type of the expression is a class type, the class
// shall be completely-defined.
if (RequireCompleteType(TypeidLoc, T, diag::err_incomplete_typeid))
return ExprError();
// C++ [expr.typeid]p3:
// When typeid is applied to an expression other than an glvalue of a
// polymorphic class type [...] [the] expression is an unevaluated
// operand. [...]
if (RecordD->isPolymorphic() && E->isGLValue()) {
// The subexpression is potentially evaluated; switch the context
// and recheck the subexpression.
ExprResult Result = TransformToPotentiallyEvaluated(E);
if (Result.isInvalid()) return ExprError();
E = Result.take();
// We require a vtable to query the type at run time.
MarkVTableUsed(TypeidLoc, RecordD);
}
}
// C++ [expr.typeid]p4:
// [...] If the type of the type-id is a reference to a possibly
// cv-qualified type, the result of the typeid expression refers to a
// std::type_info object representing the cv-unqualified referenced
// type.
Qualifiers Quals;
QualType UnqualT = Context.getUnqualifiedArrayType(T, Quals);
if (!Context.hasSameType(T, UnqualT)) {
T = UnqualT;
E = ImpCastExprToType(E, UnqualT, CK_NoOp, E->getValueKind()).take();
}
}
return Owned(new (Context) CXXTypeidExpr(TypeInfoType.withConst(),
E,
SourceRange(TypeidLoc, RParenLoc)));
}
/// ActOnCXXTypeidOfType - Parse typeid( type-id ) or typeid (expression);
ExprResult
Sema::ActOnCXXTypeid(SourceLocation OpLoc, SourceLocation LParenLoc,
bool isType, void *TyOrExpr, SourceLocation RParenLoc) {
// Find the std::type_info type.
if (!getStdNamespace())
return ExprError(Diag(OpLoc, diag::err_need_header_before_typeid));
if (!CXXTypeInfoDecl) {
IdentifierInfo *TypeInfoII = &PP.getIdentifierTable().get("type_info");
LookupResult R(*this, TypeInfoII, SourceLocation(), LookupTagName);
LookupQualifiedName(R, getStdNamespace());
CXXTypeInfoDecl = R.getAsSingle<RecordDecl>();
// Microsoft's typeinfo doesn't have type_info in std but in the global
// namespace if _HAS_EXCEPTIONS is defined to 0. See PR13153.
if (!CXXTypeInfoDecl && LangOpts.MicrosoftMode) {
LookupQualifiedName(R, Context.getTranslationUnitDecl());
CXXTypeInfoDecl = R.getAsSingle<RecordDecl>();
}
if (!CXXTypeInfoDecl)
return ExprError(Diag(OpLoc, diag::err_need_header_before_typeid));
}
if (!getLangOpts().RTTI) {
return ExprError(Diag(OpLoc, diag::err_no_typeid_with_fno_rtti));
}
QualType TypeInfoType = Context.getTypeDeclType(CXXTypeInfoDecl);
if (isType) {
// The operand is a type; handle it as such.
TypeSourceInfo *TInfo = 0;
QualType T = GetTypeFromParser(ParsedType::getFromOpaquePtr(TyOrExpr),
&TInfo);
if (T.isNull())
return ExprError();
if (!TInfo)
TInfo = Context.getTrivialTypeSourceInfo(T, OpLoc);
return BuildCXXTypeId(TypeInfoType, OpLoc, TInfo, RParenLoc);
}
// The operand is an expression.
return BuildCXXTypeId(TypeInfoType, OpLoc, (Expr*)TyOrExpr, RParenLoc);
}
/// \brief Build a Microsoft __uuidof expression with a type operand.
ExprResult Sema::BuildCXXUuidof(QualType TypeInfoType,
SourceLocation TypeidLoc,
TypeSourceInfo *Operand,
SourceLocation RParenLoc) {
if (!Operand->getType()->isDependentType()) {
bool HasMultipleGUIDs = false;
if (!CXXUuidofExpr::GetUuidAttrOfType(Operand->getType(),
&HasMultipleGUIDs)) {
if (HasMultipleGUIDs)
return ExprError(Diag(TypeidLoc, diag::err_uuidof_with_multiple_guids));
else
return ExprError(Diag(TypeidLoc, diag::err_uuidof_without_guid));
}
}
return Owned(new (Context) CXXUuidofExpr(TypeInfoType.withConst(),
Operand,
SourceRange(TypeidLoc, RParenLoc)));
}
/// \brief Build a Microsoft __uuidof expression with an expression operand.
ExprResult Sema::BuildCXXUuidof(QualType TypeInfoType,
SourceLocation TypeidLoc,
Expr *E,
SourceLocation RParenLoc) {
if (!E->getType()->isDependentType()) {
bool HasMultipleGUIDs = false;
if (!CXXUuidofExpr::GetUuidAttrOfType(E->getType(), &HasMultipleGUIDs) &&
!E->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) {
if (HasMultipleGUIDs)
return ExprError(Diag(TypeidLoc, diag::err_uuidof_with_multiple_guids));
else
return ExprError(Diag(TypeidLoc, diag::err_uuidof_without_guid));
}
}
return Owned(new (Context) CXXUuidofExpr(TypeInfoType.withConst(),
E,
SourceRange(TypeidLoc, RParenLoc)));
}
/// ActOnCXXUuidof - Parse __uuidof( type-id ) or __uuidof (expression);
ExprResult
Sema::ActOnCXXUuidof(SourceLocation OpLoc, SourceLocation LParenLoc,
bool isType, void *TyOrExpr, SourceLocation RParenLoc) {
// If MSVCGuidDecl has not been cached, do the lookup.
if (!MSVCGuidDecl) {
IdentifierInfo *GuidII = &PP.getIdentifierTable().get("_GUID");
LookupResult R(*this, GuidII, SourceLocation(), LookupTagName);
LookupQualifiedName(R, Context.getTranslationUnitDecl());
MSVCGuidDecl = R.getAsSingle<RecordDecl>();
if (!MSVCGuidDecl)
return ExprError(Diag(OpLoc, diag::err_need_header_before_ms_uuidof));
}
QualType GuidType = Context.getTypeDeclType(MSVCGuidDecl);
if (isType) {
// The operand is a type; handle it as such.
TypeSourceInfo *TInfo = 0;
QualType T = GetTypeFromParser(ParsedType::getFromOpaquePtr(TyOrExpr),
&TInfo);
if (T.isNull())
return ExprError();
if (!TInfo)
TInfo = Context.getTrivialTypeSourceInfo(T, OpLoc);
return BuildCXXUuidof(GuidType, OpLoc, TInfo, RParenLoc);
}
// The operand is an expression.
return BuildCXXUuidof(GuidType, OpLoc, (Expr*)TyOrExpr, RParenLoc);
}
/// ActOnCXXBoolLiteral - Parse {true,false} literals.
ExprResult
Sema::ActOnCXXBoolLiteral(SourceLocation OpLoc, tok::TokenKind Kind) {
assert((Kind == tok::kw_true || Kind == tok::kw_false) &&
"Unknown C++ Boolean value!");
return Owned(new (Context) CXXBoolLiteralExpr(Kind == tok::kw_true,
Context.BoolTy, OpLoc));
}
/// ActOnCXXNullPtrLiteral - Parse 'nullptr'.
ExprResult
Sema::ActOnCXXNullPtrLiteral(SourceLocation Loc) {
return Owned(new (Context) CXXNullPtrLiteralExpr(Context.NullPtrTy, Loc));
}
/// ActOnCXXThrow - Parse throw expressions.
ExprResult
Sema::ActOnCXXThrow(Scope *S, SourceLocation OpLoc, Expr *Ex) {
bool IsThrownVarInScope = false;
if (Ex) {
// C++0x [class.copymove]p31:
// When certain criteria are met, an implementation is allowed to omit the
// copy/move construction of a class object [...]
//
// - in a throw-expression, when the operand is the name of a
// non-volatile automatic object (other than a function or catch-
// clause parameter) whose scope does not extend beyond the end of the
// innermost enclosing try-block (if there is one), the copy/move
// operation from the operand to the exception object (15.1) can be
// omitted by constructing the automatic object directly into the
// exception object
if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Ex->IgnoreParens()))
if (VarDecl *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
if (Var->hasLocalStorage() && !Var->getType().isVolatileQualified()) {
for( ; S; S = S->getParent()) {
if (S->isDeclScope(Var)) {
IsThrownVarInScope = true;
break;
}
if (S->getFlags() &
(Scope::FnScope | Scope::ClassScope | Scope::BlockScope |
Scope::FunctionPrototypeScope | Scope::ObjCMethodScope |
Scope::TryScope))
break;
}
}
}
}
return BuildCXXThrow(OpLoc, Ex, IsThrownVarInScope);
}
ExprResult Sema::BuildCXXThrow(SourceLocation OpLoc, Expr *Ex,
bool IsThrownVarInScope) {
// Don't report an error if 'throw' is used in system headers.
if (!getLangOpts().CXXExceptions &&
!getSourceManager().isInSystemHeader(OpLoc))
Diag(OpLoc, diag::err_exceptions_disabled) << "throw";
if (Ex && !Ex->isTypeDependent()) {
ExprResult ExRes = CheckCXXThrowOperand(OpLoc, Ex, IsThrownVarInScope);
if (ExRes.isInvalid())
return ExprError();
Ex = ExRes.take();
}
return Owned(new (Context) CXXThrowExpr(Ex, Context.VoidTy, OpLoc,
IsThrownVarInScope));
}
/// CheckCXXThrowOperand - Validate the operand of a throw.
ExprResult Sema::CheckCXXThrowOperand(SourceLocation ThrowLoc, Expr *E,
bool IsThrownVarInScope) {
// C++ [except.throw]p3:
// A throw-expression initializes a temporary object, called the exception
// object, the type of which is determined by removing any top-level
// cv-qualifiers from the static type of the operand of throw and adjusting
// the type from "array of T" or "function returning T" to "pointer to T"
// or "pointer to function returning T", [...]
if (E->getType().hasQualifiers())
E = ImpCastExprToType(E, E->getType().getUnqualifiedType(), CK_NoOp,
E->getValueKind()).take();
ExprResult Res = DefaultFunctionArrayConversion(E);
if (Res.isInvalid())
return ExprError();
E = Res.take();
// If the type of the exception would be an incomplete type or a pointer
// to an incomplete type other than (cv) void the program is ill-formed.
QualType Ty = E->getType();
bool isPointer = false;
if (const PointerType* Ptr = Ty->getAs<PointerType>()) {
Ty = Ptr->getPointeeType();
isPointer = true;
}
if (!isPointer || !Ty->isVoidType()) {
if (RequireCompleteType(ThrowLoc, Ty,
isPointer? diag::err_throw_incomplete_ptr
: diag::err_throw_incomplete,
E->getSourceRange()))
return ExprError();
if (RequireNonAbstractType(ThrowLoc, E->getType(),
diag::err_throw_abstract_type, E))
return ExprError();
}
// Initialize the exception result. This implicitly weeds out
// abstract types or types with inaccessible copy constructors.
// C++0x [class.copymove]p31:
// When certain criteria are met, an implementation is allowed to omit the
// copy/move construction of a class object [...]
//
// - in a throw-expression, when the operand is the name of a
// non-volatile automatic object (other than a function or catch-clause
// parameter) whose scope does not extend beyond the end of the
// innermost enclosing try-block (if there is one), the copy/move
// operation from the operand to the exception object (15.1) can be
// omitted by constructing the automatic object directly into the
// exception object
const VarDecl *NRVOVariable = 0;
if (IsThrownVarInScope)
NRVOVariable = getCopyElisionCandidate(QualType(), E, false);
InitializedEntity Entity =
InitializedEntity::InitializeException(ThrowLoc, E->getType(),
/*NRVO=*/NRVOVariable != 0);
Res = PerformMoveOrCopyInitialization(Entity, NRVOVariable,
QualType(), E,
IsThrownVarInScope);
if (Res.isInvalid())
return ExprError();
E = Res.take();
// If the exception has class type, we need additional handling.
const RecordType *RecordTy = Ty->getAs<RecordType>();
if (!RecordTy)
return Owned(E);
CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
// If we are throwing a polymorphic class type or pointer thereof,
// exception handling will make use of the vtable.
MarkVTableUsed(ThrowLoc, RD);
// If a pointer is thrown, the referenced object will not be destroyed.
if (isPointer)
return Owned(E);
// If the class has a destructor, we must be able to call it.
if (RD->hasIrrelevantDestructor())
return Owned(E);
CXXDestructorDecl *Destructor = LookupDestructor(RD);
if (!Destructor)
return Owned(E);
MarkFunctionReferenced(E->getExprLoc(), Destructor);
CheckDestructorAccess(E->getExprLoc(), Destructor,
PDiag(diag::err_access_dtor_exception) << Ty);
if (DiagnoseUseOfDecl(Destructor, E->getExprLoc()))
return ExprError();
return Owned(E);
}
QualType Sema::getCurrentThisType() {
DeclContext *DC = getFunctionLevelDeclContext();
QualType ThisTy = CXXThisTypeOverride;
if (CXXMethodDecl *method = dyn_cast<CXXMethodDecl>(DC)) {
if (method && method->isInstance())
ThisTy = method->getThisType(Context);
}
return ThisTy;
}
Sema::CXXThisScopeRAII::CXXThisScopeRAII(Sema &S,
Decl *ContextDecl,
unsigned CXXThisTypeQuals,
bool Enabled)
: S(S), OldCXXThisTypeOverride(S.CXXThisTypeOverride), Enabled(false)
{
if (!Enabled || !ContextDecl)
return;
CXXRecordDecl *Record = 0;
if (ClassTemplateDecl *Template = dyn_cast<ClassTemplateDecl>(ContextDecl))
Record = Template->getTemplatedDecl();
else
Record = cast<CXXRecordDecl>(ContextDecl);
S.CXXThisTypeOverride
= S.Context.getPointerType(
S.Context.getRecordType(Record).withCVRQualifiers(CXXThisTypeQuals));
this->Enabled = true;
}
Sema::CXXThisScopeRAII::~CXXThisScopeRAII() {
if (Enabled) {
S.CXXThisTypeOverride = OldCXXThisTypeOverride;
}
}
static Expr *captureThis(ASTContext &Context, RecordDecl *RD,
QualType ThisTy, SourceLocation Loc) {
FieldDecl *Field
= FieldDecl::Create(Context, RD, Loc, Loc, 0, ThisTy,
Context.getTrivialTypeSourceInfo(ThisTy, Loc),
0, false, ICIS_NoInit);
Field->setImplicit(true);
Field->setAccess(AS_private);
RD->addDecl(Field);
return new (Context) CXXThisExpr(Loc, ThisTy, /*isImplicit*/true);
}
bool Sema::CheckCXXThisCapture(SourceLocation Loc, bool Explicit,
bool BuildAndDiagnose, const unsigned *const FunctionScopeIndexToStopAt) {
// We don't need to capture this in an unevaluated context.
if (isUnevaluatedContext() && !Explicit)
return true;
const unsigned MaxFunctionScopesIndex = FunctionScopeIndexToStopAt ?
*FunctionScopeIndexToStopAt : FunctionScopes.size() - 1;
// Otherwise, check that we can capture 'this'.
unsigned NumClosures = 0;
for (unsigned idx = MaxFunctionScopesIndex; idx != 0; idx--) {
if (CapturingScopeInfo *CSI =
dyn_cast<CapturingScopeInfo>(FunctionScopes[idx])) {
if (CSI->CXXThisCaptureIndex != 0) {
// 'this' is already being captured; there isn't anything more to do.
break;
}
LambdaScopeInfo *LSI = dyn_cast<LambdaScopeInfo>(CSI);
if (LSI && isGenericLambdaCallOperatorSpecialization(LSI->CallOperator)) {
// This context can't implicitly capture 'this'; fail out.
if (BuildAndDiagnose)
Diag(Loc, diag::err_this_capture) << Explicit;
return true;
}
if (CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_LambdaByref ||
CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_LambdaByval ||
CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_Block ||
CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_CapturedRegion ||
Explicit) {
// This closure can capture 'this'; continue looking upwards.
NumClosures++;
Explicit = false;
continue;
}
// This context can't implicitly capture 'this'; fail out.
if (BuildAndDiagnose)
Diag(Loc, diag::err_this_capture) << Explicit;
return true;
}
break;
}
if (!BuildAndDiagnose) return false;
// Mark that we're implicitly capturing 'this' in all the scopes we skipped.
// FIXME: We need to delay this marking in PotentiallyPotentiallyEvaluated
// contexts.
for (unsigned idx = MaxFunctionScopesIndex; NumClosures;
--idx, --NumClosures) {
CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FunctionScopes[idx]);
Expr *ThisExpr = 0;
QualType ThisTy = getCurrentThisType();
if (LambdaScopeInfo *LSI = dyn_cast<LambdaScopeInfo>(CSI))
// For lambda expressions, build a field and an initializing expression.
ThisExpr = captureThis(Context, LSI->Lambda, ThisTy, Loc);
else if (CapturedRegionScopeInfo *RSI
= dyn_cast<CapturedRegionScopeInfo>(FunctionScopes[idx]))
ThisExpr = captureThis(Context, RSI->TheRecordDecl, ThisTy, Loc);
bool isNested = NumClosures > 1;
CSI->addThisCapture(isNested, Loc, ThisTy, ThisExpr);
}
return false;
}
ExprResult Sema::ActOnCXXThis(SourceLocation Loc) {
/// C++ 9.3.2: In the body of a non-static member function, the keyword this
/// is a non-lvalue expression whose value is the address of the object for
/// which the function is called.
QualType ThisTy = getCurrentThisType();
if (ThisTy.isNull()) return Diag(Loc, diag::err_invalid_this_use);
CheckCXXThisCapture(Loc);
return Owned(new (Context) CXXThisExpr(Loc, ThisTy, /*isImplicit=*/false));
}
bool Sema::isThisOutsideMemberFunctionBody(QualType BaseType) {
// If we're outside the body of a member function, then we'll have a specified
// type for 'this'.
if (CXXThisTypeOverride.isNull())
return false;
// Determine whether we're looking into a class that's currently being
// defined.
CXXRecordDecl *Class = BaseType->getAsCXXRecordDecl();
return Class && Class->isBeingDefined();
}
ExprResult
Sema::ActOnCXXTypeConstructExpr(ParsedType TypeRep,
SourceLocation LParenLoc,
MultiExprArg exprs,
SourceLocation RParenLoc) {
if (!TypeRep)
return ExprError();
TypeSourceInfo *TInfo;
QualType Ty = GetTypeFromParser(TypeRep, &TInfo);
if (!TInfo)
TInfo = Context.getTrivialTypeSourceInfo(Ty, SourceLocation());
return BuildCXXTypeConstructExpr(TInfo, LParenLoc, exprs, RParenLoc);
}
/// ActOnCXXTypeConstructExpr - Parse construction of a specified type.
/// Can be interpreted either as function-style casting ("int(x)")
/// or class type construction ("ClassType(x,y,z)")
/// or creation of a value-initialized type ("int()").
ExprResult
Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
SourceLocation LParenLoc,
MultiExprArg Exprs,
SourceLocation RParenLoc) {
QualType Ty = TInfo->getType();
SourceLocation TyBeginLoc = TInfo->getTypeLoc().getBeginLoc();
if (Ty->isDependentType() || CallExpr::hasAnyTypeDependentArguments(Exprs)) {
return Owned(CXXUnresolvedConstructExpr::Create(Context, TInfo,
LParenLoc,
Exprs,
RParenLoc));
}
bool ListInitialization = LParenLoc.isInvalid();
assert((!ListInitialization || (Exprs.size() == 1 && isa<InitListExpr>(Exprs[0])))
&& "List initialization must have initializer list as expression.");
SourceRange FullRange = SourceRange(TyBeginLoc,
ListInitialization ? Exprs[0]->getSourceRange().getEnd() : RParenLoc);
// C++ [expr.type.conv]p1:
// If the expression list is a single expression, the type conversion
// expression is equivalent (in definedness, and if defined in meaning) to the
// corresponding cast expression.
if (Exprs.size() == 1 && !ListInitialization) {
Expr *Arg = Exprs[0];
return BuildCXXFunctionalCastExpr(TInfo, LParenLoc, Arg, RParenLoc);
}
QualType ElemTy = Ty;
if (Ty->isArrayType()) {
if (!ListInitialization)
return ExprError(Diag(TyBeginLoc,
diag::err_value_init_for_array_type) << FullRange);
ElemTy = Context.getBaseElementType(Ty);
}
if (!Ty->isVoidType() &&
RequireCompleteType(TyBeginLoc, ElemTy,
diag::err_invalid_incomplete_type_use, FullRange))
return ExprError();
if (RequireNonAbstractType(TyBeginLoc, Ty,
diag::err_allocation_of_abstract_type))
return ExprError();
InitializedEntity Entity = InitializedEntity::InitializeTemporary(TInfo);
InitializationKind Kind =
Exprs.size() ? ListInitialization
? InitializationKind::CreateDirectList(TyBeginLoc)
: InitializationKind::CreateDirect(TyBeginLoc, LParenLoc, RParenLoc)
: InitializationKind::CreateValue(TyBeginLoc, LParenLoc, RParenLoc);
InitializationSequence InitSeq(*this, Entity, Kind, Exprs);
ExprResult Result = InitSeq.Perform(*this, Entity, Kind, Exprs);
if (Result.isInvalid() || !ListInitialization)
return Result;
Expr *Inner = Result.get();
if (CXXBindTemporaryExpr *BTE = dyn_cast_or_null<CXXBindTemporaryExpr>(Inner))
Inner = BTE->getSubExpr();
if (isa<InitListExpr>(Inner)) {
// If the list-initialization doesn't involve a constructor call, we'll get
// the initializer-list (with corrected type) back, but that's not what we
// want, since it will be treated as an initializer list in further
// processing. Explicitly insert a cast here.
QualType ResultType = Result.get()->getType();
Result = Owned(CXXFunctionalCastExpr::Create(
Context, ResultType, Expr::getValueKindForType(TInfo->getType()), TInfo,
CK_NoOp, Result.take(), /*Path=*/ 0, LParenLoc, RParenLoc));
}
// FIXME: Improve AST representation?
return Result;
}
/// doesUsualArrayDeleteWantSize - Answers whether the usual
/// operator delete[] for the given type has a size_t parameter.
static bool doesUsualArrayDeleteWantSize(Sema &S, SourceLocation loc,
QualType allocType) {
const RecordType *record =
allocType->getBaseElementTypeUnsafe()->getAs<RecordType>();
if (!record) return false;
// Try to find an operator delete[] in class scope.
DeclarationName deleteName =
S.Context.DeclarationNames.getCXXOperatorName(OO_Array_Delete);
LookupResult ops(S, deleteName, loc, Sema::LookupOrdinaryName);
S.LookupQualifiedName(ops, record->getDecl());
// We're just doing this for information.
ops.suppressDiagnostics();
// Very likely: there's no operator delete[].
if (ops.empty()) return false;
// If it's ambiguous, it should be illegal to call operator delete[]
// on this thing, so it doesn't matter if we allocate extra space or not.
if (ops.isAmbiguous()) return false;
LookupResult::Filter filter = ops.makeFilter();
while (filter.hasNext()) {
NamedDecl *del = filter.next()->getUnderlyingDecl();
// C++0x [basic.stc.dynamic.deallocation]p2:
// A template instance is never a usual deallocation function,
// regardless of its signature.
if (isa<FunctionTemplateDecl>(del)) {
filter.erase();
continue;
}
// C++0x [basic.stc.dynamic.deallocation]p2:
// If class T does not declare [an operator delete[] with one
// parameter] but does declare a member deallocation function
// named operator delete[] with exactly two parameters, the
// second of which has type std::size_t, then this function
// is a usual deallocation function.
if (!cast<CXXMethodDecl>(del)->isUsualDeallocationFunction()) {
filter.erase();
continue;
}
}
filter.done();
if (!ops.isSingleResult()) return false;
const FunctionDecl *del = cast<FunctionDecl>(ops.getFoundDecl());
return (del->getNumParams() == 2);
}
/// \brief Parsed a C++ 'new' expression (C++ 5.3.4).
///
/// E.g.:
/// @code new (memory) int[size][4] @endcode
/// or
/// @code ::new Foo(23, "hello") @endcode
///
/// \param StartLoc The first location of the expression.
/// \param UseGlobal True if 'new' was prefixed with '::'.
/// \param PlacementLParen Opening paren of the placement arguments.
/// \param PlacementArgs Placement new arguments.
/// \param PlacementRParen Closing paren of the placement arguments.
/// \param TypeIdParens If the type is in parens, the source range.
/// \param D The type to be allocated, as well as array dimensions.
/// \param Initializer The initializing expression or initializer-list, or null
/// if there is none.
ExprResult
Sema::ActOnCXXNew(SourceLocation StartLoc, bool UseGlobal,
SourceLocation PlacementLParen, MultiExprArg PlacementArgs,
SourceLocation PlacementRParen, SourceRange TypeIdParens,
Declarator &D, Expr *Initializer) {
bool TypeContainsAuto = D.getDeclSpec().containsPlaceholderType();
Expr *ArraySize = 0;
// If the specified type is an array, unwrap it and save the expression.
if (D.getNumTypeObjects() > 0 &&
D.getTypeObject(0).Kind == DeclaratorChunk::Array) {
DeclaratorChunk &Chunk = D.getTypeObject(0);
if (TypeContainsAuto)
return ExprError(Diag(Chunk.Loc, diag::err_new_array_of_auto)
<< D.getSourceRange());
if (Chunk.Arr.hasStatic)
return ExprError(Diag(Chunk.Loc, diag::err_static_illegal_in_new)
<< D.getSourceRange());
if (!Chunk.Arr.NumElts)
return ExprError(Diag(Chunk.Loc, diag::err_array_new_needs_size)
<< D.getSourceRange());
ArraySize = static_cast<Expr*>(Chunk.Arr.NumElts);
D.DropFirstTypeObject();
}
// Every dimension shall be of constant size.
if (ArraySize) {
for (unsigned I = 0, N = D.getNumTypeObjects(); I < N; ++I) {
if (D.getTypeObject(I).Kind != DeclaratorChunk::Array)
break;
DeclaratorChunk::ArrayTypeInfo &Array = D.getTypeObject(I).Arr;
if (Expr *NumElts = (Expr *)Array.NumElts) {
if (!NumElts->isTypeDependent() && !NumElts->isValueDependent()) {
if (getLangOpts().CPlusPlus1y) {
// C++1y [expr.new]p6: Every constant-expression in a noptr-new-declarator
// shall be a converted constant expression (5.19) of type std::size_t
// and shall evaluate to a strictly positive value.
unsigned IntWidth = Context.getTargetInfo().getIntWidth();
assert(IntWidth && "Builtin type of size 0?");
llvm::APSInt Value(IntWidth);
Array.NumElts
= CheckConvertedConstantExpression(NumElts, Context.getSizeType(), Value,
CCEK_NewExpr)
.take();
} else {
Array.NumElts
= VerifyIntegerConstantExpression(NumElts, 0,
diag::err_new_array_nonconst)
.take();
}
if (!Array.NumElts)
return ExprError();
}
}
}
}
TypeSourceInfo *TInfo = GetTypeForDeclarator(D, /*Scope=*/0);
QualType AllocType = TInfo->getType();
if (D.isInvalidType())
return ExprError();
SourceRange DirectInitRange;
if (ParenListExpr *List = dyn_cast_or_null<ParenListExpr>(Initializer))
DirectInitRange = List->getSourceRange();
return BuildCXXNew(SourceRange(StartLoc, D.getLocEnd()), UseGlobal,
PlacementLParen,
PlacementArgs,
PlacementRParen,
TypeIdParens,
AllocType,
TInfo,
ArraySize,
DirectInitRange,
Initializer,
TypeContainsAuto);
}
static bool isLegalArrayNewInitializer(CXXNewExpr::InitializationStyle Style,
Expr *Init) {
if (!Init)
return true;
if (ParenListExpr *PLE = dyn_cast<ParenListExpr>(Init))
return PLE->getNumExprs() == 0;
if (isa<ImplicitValueInitExpr>(Init))
return true;
else if (CXXConstructExpr *CCE = dyn_cast<CXXConstructExpr>(Init))
return !CCE->isListInitialization() &&
CCE->getConstructor()->isDefaultConstructor();
else if (Style == CXXNewExpr::ListInit) {
assert(isa<InitListExpr>(Init) &&
"Shouldn't create list CXXConstructExprs for arrays.");
return true;
}
return false;
}
ExprResult
Sema::BuildCXXNew(SourceRange Range, bool UseGlobal,
SourceLocation PlacementLParen,
MultiExprArg PlacementArgs,
SourceLocation PlacementRParen,
SourceRange TypeIdParens,
QualType AllocType,
TypeSourceInfo *AllocTypeInfo,
Expr *ArraySize,
SourceRange DirectInitRange,
Expr *Initializer,
bool TypeMayContainAuto) {
SourceRange TypeRange = AllocTypeInfo->getTypeLoc().getSourceRange();
SourceLocation StartLoc = Range.getBegin();
CXXNewExpr::InitializationStyle initStyle;
if (DirectInitRange.isValid()) {
assert(Initializer && "Have parens but no initializer.");
initStyle = CXXNewExpr::CallInit;
} else if (Initializer && isa<InitListExpr>(Initializer))
initStyle = CXXNewExpr::ListInit;
else {
assert((!Initializer || isa<ImplicitValueInitExpr>(Initializer) ||
isa<CXXConstructExpr>(Initializer)) &&
"Initializer expression that cannot have been implicitly created.");
initStyle = CXXNewExpr::NoInit;
}
Expr **Inits = &Initializer;
unsigned NumInits = Initializer ? 1 : 0;
if (ParenListExpr *List = dyn_cast_or_null<ParenListExpr>(Initializer)) {
assert(initStyle == CXXNewExpr::CallInit && "paren init for non-call init");
Inits = List->getExprs();
NumInits = List->getNumExprs();
}
// Determine whether we've already built the initializer.
bool HaveCompleteInit = false;
if (Initializer && isa<CXXConstructExpr>(Initializer) &&
!isa<CXXTemporaryObjectExpr>(Initializer))
HaveCompleteInit = true;
else if (Initializer && isa<ImplicitValueInitExpr>(Initializer))
HaveCompleteInit = true;
// C++11 [decl.spec.auto]p6. Deduce the type which 'auto' stands in for.
if (TypeMayContainAuto && AllocType->isUndeducedType()) {
if (initStyle == CXXNewExpr::NoInit || NumInits == 0)
return ExprError(Diag(StartLoc, diag::err_auto_new_requires_ctor_arg)
<< AllocType << TypeRange);
if (initStyle == CXXNewExpr::ListInit)
return ExprError(Diag(Inits[0]->getLocStart(),
diag::err_auto_new_requires_parens)
<< AllocType << TypeRange);
if (NumInits > 1) {
Expr *FirstBad = Inits[1];
return ExprError(Diag(FirstBad->getLocStart(),
diag::err_auto_new_ctor_multiple_expressions)
<< AllocType << TypeRange);
}
Expr *Deduce = Inits[0];
QualType DeducedType;
if (DeduceAutoType(AllocTypeInfo, Deduce, DeducedType) == DAR_Failed)
return ExprError(Diag(StartLoc, diag::err_auto_new_deduction_failure)
<< AllocType << Deduce->getType()
<< TypeRange << Deduce->getSourceRange());
if (DeducedType.isNull())
return ExprError();
AllocType = DeducedType;
}
// Per C++0x [expr.new]p5, the type being constructed may be a
// typedef of an array type.
if (!ArraySize) {
if (const ConstantArrayType *Array
= Context.getAsConstantArrayType(AllocType)) {
ArraySize = IntegerLiteral::Create(Context, Array->getSize(),
Context.getSizeType(),
TypeRange.getEnd());
AllocType = Array->getElementType();
}
}
if (CheckAllocatedType(AllocType, TypeRange.getBegin(), TypeRange))
return ExprError();
if (initStyle == CXXNewExpr::ListInit && isStdInitializerList(AllocType, 0)) {
Diag(AllocTypeInfo->getTypeLoc().getBeginLoc(),
diag::warn_dangling_std_initializer_list)
<< /*at end of FE*/0 << Inits[0]->getSourceRange();
}
// In ARC, infer 'retaining' for the allocated
if (getLangOpts().ObjCAutoRefCount &&
AllocType.getObjCLifetime() == Qualifiers::OCL_None &&
AllocType->isObjCLifetimeType()) {
AllocType = Context.getLifetimeQualifiedType(AllocType,
AllocType->getObjCARCImplicitLifetime());
}
QualType ResultType = Context.getPointerType(AllocType);
if (ArraySize && ArraySize->getType()->isNonOverloadPlaceholderType()) {
ExprResult result = CheckPlaceholderExpr(ArraySize);
if (result.isInvalid()) return ExprError();
ArraySize = result.take();
}
// C++98 5.3.4p6: "The expression in a direct-new-declarator shall have
// integral or enumeration type with a non-negative value."
// C++11 [expr.new]p6: The expression [...] shall be of integral or unscoped
// enumeration type, or a class type for which a single non-explicit
// conversion function to integral or unscoped enumeration type exists.
// C++1y [expr.new]p6: The expression [...] is implicitly converted to
// std::size_t.
if (ArraySize && !ArraySize->isTypeDependent()) {
ExprResult ConvertedSize;
if (getLangOpts().CPlusPlus1y) {
unsigned IntWidth = Context.getTargetInfo().getIntWidth();
assert(IntWidth && "Builtin type of size 0?");
llvm::APSInt Value(IntWidth);
ConvertedSize = PerformImplicitConversion(ArraySize, Context.getSizeType(),
AA_Converting);
if (!ConvertedSize.isInvalid() &&
ArraySize->getType()->getAs<RecordType>())
// Diagnose the compatibility of this conversion.
Diag(StartLoc, diag::warn_cxx98_compat_array_size_conversion)
<< ArraySize->getType() << 0 << "'size_t'";
} else {
class SizeConvertDiagnoser : public ICEConvertDiagnoser {
protected:
Expr *ArraySize;
public:
SizeConvertDiagnoser(Expr *ArraySize)
: ICEConvertDiagnoser(/*AllowScopedEnumerations*/false, false, false),
ArraySize(ArraySize) {}
virtual SemaDiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc,
QualType T) {
return S.Diag(Loc, diag::err_array_size_not_integral)
<< S.getLangOpts().CPlusPlus11 << T;
}
virtual SemaDiagnosticBuilder diagnoseIncomplete(
Sema &S, SourceLocation Loc, QualType T) {
return S.Diag(Loc, diag::err_array_size_incomplete_type)
<< T << ArraySize->getSourceRange();
}
virtual SemaDiagnosticBuilder diagnoseExplicitConv(
Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) {
return S.Diag(Loc, diag::err_array_size_explicit_conversion) << T << ConvTy;
}
virtual SemaDiagnosticBuilder noteExplicitConv(
Sema &S, CXXConversionDecl *Conv, QualType ConvTy) {
return S.Diag(Conv->getLocation(), diag::note_array_size_conversion)
<< ConvTy->isEnumeralType() << ConvTy;
}
virtual SemaDiagnosticBuilder diagnoseAmbiguous(
Sema &S, SourceLocation Loc, QualType T) {
return S.Diag(Loc, diag::err_array_size_ambiguous_conversion) << T;
}
virtual SemaDiagnosticBuilder noteAmbiguous(
Sema &S, CXXConversionDecl *Conv, QualType ConvTy) {
return S.Diag(Conv->getLocation(), diag::note_array_size_conversion)
<< ConvTy->isEnumeralType() << ConvTy;
}
virtual SemaDiagnosticBuilder diagnoseConversion(
Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) {
return S.Diag(Loc,
S.getLangOpts().CPlusPlus11
? diag::warn_cxx98_compat_array_size_conversion
: diag::ext_array_size_conversion)
<< T << ConvTy->isEnumeralType() << ConvTy;
}
} SizeDiagnoser(ArraySize);
ConvertedSize = PerformContextualImplicitConversion(StartLoc, ArraySize,
SizeDiagnoser);
}
if (ConvertedSize.isInvalid())
return ExprError();
ArraySize = ConvertedSize.take();
QualType SizeType = ArraySize->getType();
if (!SizeType->isIntegralOrUnscopedEnumerationType())
return ExprError();
// C++98 [expr.new]p7:
// The expression in a direct-new-declarator shall have integral type
// with a non-negative value.
//
// Let's see if this is a constant < 0. If so, we reject it out of
// hand. Otherwise, if it's not a constant, we must have an unparenthesized
// array type.
//
// Note: such a construct has well-defined semantics in C++11: it throws
// std::bad_array_new_length.
if (!ArraySize->isValueDependent()) {
llvm::APSInt Value;
// We've already performed any required implicit conversion to integer or
// unscoped enumeration type.
if (ArraySize->isIntegerConstantExpr(Value, Context)) {
if (Value < llvm::APSInt(
llvm::APInt::getNullValue(Value.getBitWidth()),
Value.isUnsigned())) {
if (getLangOpts().CPlusPlus11)
Diag(ArraySize->getLocStart(),
diag::warn_typecheck_negative_array_new_size)
<< ArraySize->getSourceRange();
else
return ExprError(Diag(ArraySize->getLocStart(),
diag::err_typecheck_negative_array_size)
<< ArraySize->getSourceRange());
} else if (!AllocType->isDependentType()) {
unsigned ActiveSizeBits =
ConstantArrayType::getNumAddressingBits(Context, AllocType, Value);
if (ActiveSizeBits > ConstantArrayType::getMaxSizeBits(Context)) {
if (getLangOpts().CPlusPlus11)
Diag(ArraySize->getLocStart(),
diag::warn_array_new_too_large)
<< Value.toString(10)
<< ArraySize->getSourceRange();
else
return ExprError(Diag(ArraySize->getLocStart(),
diag::err_array_too_large)
<< Value.toString(10)
<< ArraySize->getSourceRange());
}
}
} else if (TypeIdParens.isValid()) {
// Can't have dynamic array size when the type-id is in parentheses.
Diag(ArraySize->getLocStart(), diag::ext_new_paren_array_nonconst)
<< ArraySize->getSourceRange()
<< FixItHint::CreateRemoval(TypeIdParens.getBegin())
<< FixItHint::CreateRemoval(TypeIdParens.getEnd());
TypeIdParens = SourceRange();
}
}
// Note that we do *not* convert the argument in any way. It can
// be signed, larger than size_t, whatever.
}
FunctionDecl *OperatorNew = 0;
FunctionDecl *OperatorDelete = 0;
if (!AllocType->isDependentType() &&
!Expr::hasAnyTypeDependentArguments(PlacementArgs) &&
FindAllocationFunctions(StartLoc,
SourceRange(PlacementLParen, PlacementRParen),
UseGlobal, AllocType, ArraySize, PlacementArgs,
OperatorNew, OperatorDelete))
return ExprError();
// If this is an array allocation, compute whether the usual array
// deallocation function for the type has a size_t parameter.
bool UsualArrayDeleteWantsSize = false;
if (ArraySize && !AllocType->isDependentType())
UsualArrayDeleteWantsSize
= doesUsualArrayDeleteWantSize(*this, StartLoc, AllocType);
SmallVector<Expr *, 8> AllPlaceArgs;
if (OperatorNew) {
// Add default arguments, if any.
const FunctionProtoType *Proto =
OperatorNew->getType()->getAs<FunctionProtoType>();
VariadicCallType CallType =
Proto->isVariadic() ? VariadicFunction : VariadicDoesNotApply;
if (GatherArgumentsForCall(PlacementLParen, OperatorNew, Proto, 1,
PlacementArgs, AllPlaceArgs, CallType))
return ExprError();
if (!AllPlaceArgs.empty())
PlacementArgs = AllPlaceArgs;
DiagnoseSentinelCalls(OperatorNew, PlacementLParen, PlacementArgs);
// FIXME: Missing call to CheckFunctionCall or equivalent
}
// Warn if the type is over-aligned and is being allocated by global operator
// new.
if (PlacementArgs.empty() && OperatorNew &&
(OperatorNew->isImplicit() ||
getSourceManager().isInSystemHeader(OperatorNew->getLocStart()))) {
if (unsigned Align = Context.getPreferredTypeAlign(AllocType.getTypePtr())){
unsigned SuitableAlign = Context.getTargetInfo().getSuitableAlign();
if (Align > SuitableAlign)
Diag(StartLoc, diag::warn_overaligned_type)
<< AllocType
<< unsigned(Align / Context.getCharWidth())
<< unsigned(SuitableAlign / Context.getCharWidth());
}
}
QualType InitType = AllocType;
// Array 'new' can't have any initializers except empty parentheses.
// Initializer lists are also allowed, in C++11. Rely on the parser for the
// dialect distinction.
if (ResultType->isArrayType() || ArraySize) {
if (!isLegalArrayNewInitializer(initStyle, Initializer)) {
SourceRange InitRange(Inits[0]->getLocStart(),
Inits[NumInits - 1]->getLocEnd());
Diag(StartLoc, diag::err_new_array_init_args) << InitRange;
return ExprError();
}
if (InitListExpr *ILE = dyn_cast_or_null<InitListExpr>(Initializer)) {
// We do the initialization typechecking against the array type
// corresponding to the number of initializers + 1 (to also check
// default-initialization).
unsigned NumElements = ILE->getNumInits() + 1;
InitType = Context.getConstantArrayType(AllocType,
llvm::APInt(Context.getTypeSize(Context.getSizeType()), NumElements),
ArrayType::Normal, 0);
}
}
// If we can perform the initialization, and we've not already done so,
// do it now.
if (!AllocType->isDependentType() &&
!Expr::hasAnyTypeDependentArguments(
llvm::makeArrayRef(Inits, NumInits)) &&
!HaveCompleteInit) {
// C++11 [expr.new]p15:
// A new-expression that creates an object of type T initializes that
// object as follows:
InitializationKind Kind
// - If the new-initializer is omitted, the object is default-
// initialized (8.5); if no initialization is performed,
// the object has indeterminate value
= initStyle == CXXNewExpr::NoInit
? InitializationKind::CreateDefault(TypeRange.getBegin())
// - Otherwise, the new-initializer is interpreted according to the
// initialization rules of 8.5 for direct-initialization.
: initStyle == CXXNewExpr::ListInit
? InitializationKind::CreateDirectList(TypeRange.getBegin())
: InitializationKind::CreateDirect(TypeRange.getBegin(),
DirectInitRange.getBegin(),
DirectInitRange.getEnd());
InitializedEntity Entity
= InitializedEntity::InitializeNew(StartLoc, InitType);
InitializationSequence InitSeq(*this, Entity, Kind, MultiExprArg(Inits, NumInits));
ExprResult FullInit = InitSeq.Perform(*this, Entity, Kind,
MultiExprArg(Inits, NumInits));
if (FullInit.isInvalid())
return ExprError();
// FullInit is our initializer; strip off CXXBindTemporaryExprs, because
// we don't want the initialized object to be destructed.
if (CXXBindTemporaryExpr *Binder =
dyn_cast_or_null<CXXBindTemporaryExpr>(FullInit.get()))
FullInit = Owned(Binder->getSubExpr());
Initializer = FullInit.take();
}
// Mark the new and delete operators as referenced.
if (OperatorNew) {
if (DiagnoseUseOfDecl(OperatorNew, StartLoc))
return ExprError();
MarkFunctionReferenced(StartLoc, OperatorNew);
}
if (OperatorDelete) {
if (DiagnoseUseOfDecl(OperatorDelete, StartLoc))
return ExprError();
MarkFunctionReferenced(StartLoc, OperatorDelete);
}
// C++0x [expr.new]p17:
// If the new expression creates an array of objects of class type,
// access and ambiguity control are done for the destructor.
QualType BaseAllocType = Context.getBaseElementType(AllocType);
if (ArraySize && !BaseAllocType->isDependentType()) {
if (const RecordType *BaseRecordType = BaseAllocType->getAs<RecordType>()) {
if (CXXDestructorDecl *dtor = LookupDestructor(
cast<CXXRecordDecl>(BaseRecordType->getDecl()))) {
MarkFunctionReferenced(StartLoc, dtor);
CheckDestructorAccess(StartLoc, dtor,
PDiag(diag::err_access_dtor)
<< BaseAllocType);
if (DiagnoseUseOfDecl(dtor, StartLoc))
return ExprError();
}
}
}
return Owned(new (Context) CXXNewExpr(Context, UseGlobal, OperatorNew,
OperatorDelete,
UsualArrayDeleteWantsSize,
PlacementArgs, TypeIdParens,
ArraySize, initStyle, Initializer,
ResultType, AllocTypeInfo,
Range, DirectInitRange));
}
/// \brief Checks that a type is suitable as the allocated type
/// in a new-expression.
bool Sema::CheckAllocatedType(QualType AllocType, SourceLocation Loc,
SourceRange R) {
// C++ 5.3.4p1: "[The] type shall be a complete object type, but not an
// abstract class type or array thereof.
if (AllocType->isFunctionType())
return Diag(Loc, diag::err_bad_new_type)
<< AllocType << 0 << R;
else if (AllocType->isReferenceType())
return Diag(Loc, diag::err_bad_new_type)
<< AllocType << 1 << R;
else if (!AllocType->isDependentType() &&
RequireCompleteType(Loc, AllocType, diag::err_new_incomplete_type,R))
return true;
else if (RequireNonAbstractType(Loc, AllocType,
diag::err_allocation_of_abstract_type))
return true;
else if (AllocType->isVariablyModifiedType())
return Diag(Loc, diag::err_variably_modified_new_type)
<< AllocType;
else if (unsigned AddressSpace = AllocType.getAddressSpace())
return Diag(Loc, diag::err_address_space_qualified_new)
<< AllocType.getUnqualifiedType() << AddressSpace;
else if (getLangOpts().ObjCAutoRefCount) {
if (const ArrayType *AT = Context.getAsArrayType(AllocType)) {
QualType BaseAllocType = Context.getBaseElementType(AT);
if (BaseAllocType.getObjCLifetime() == Qualifiers::OCL_None &&
BaseAllocType->isObjCLifetimeType())
return Diag(Loc, diag::err_arc_new_array_without_ownership)
<< BaseAllocType;
}
}
return false;
}
/// \brief Determine whether the given function is a non-placement
/// deallocation function.
static bool isNonPlacementDeallocationFunction(Sema &S, FunctionDecl *FD) {
if (FD->isInvalidDecl())
return false;
if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(FD))
return Method->isUsualDeallocationFunction();
if (FD->getOverloadedOperator() != OO_Delete &&
FD->getOverloadedOperator() != OO_Array_Delete)
return false;
if (FD->getNumParams() == 1)
return true;
return S.getLangOpts().SizedDeallocation && FD->getNumParams() == 2 &&
S.Context.hasSameUnqualifiedType(FD->getParamDecl(1)->getType(),
S.Context.getSizeType());
}
/// FindAllocationFunctions - Finds the overloads of operator new and delete
/// that are appropriate for the allocation.
bool Sema::FindAllocationFunctions(SourceLocation StartLoc, SourceRange Range,
bool UseGlobal, QualType AllocType,
bool IsArray, MultiExprArg PlaceArgs,
FunctionDecl *&OperatorNew,
FunctionDecl *&OperatorDelete) {
// --- Choosing an allocation function ---
// C++ 5.3.4p8 - 14 & 18
// 1) If UseGlobal is true, only look in the global scope. Else, also look
// in the scope of the allocated class.
// 2) If an array size is given, look for operator new[], else look for
// operator new.
// 3) The first argument is always size_t. Append the arguments from the
// placement form.
SmallVector<Expr*, 8> AllocArgs(1 + PlaceArgs.size());
// We don't care about the actual value of this argument.
// FIXME: Should the Sema create the expression and embed it in the syntax
// tree? Or should the consumer just recalculate the value?
IntegerLiteral Size(Context, llvm::APInt::getNullValue(
Context.getTargetInfo().getPointerWidth(0)),
Context.getSizeType(),
SourceLocation());
AllocArgs[0] = &Size;
std::copy(PlaceArgs.begin(), PlaceArgs.end(), AllocArgs.begin() + 1);
// C++ [expr.new]p8:
// If the allocated type is a non-array type, the allocation
// function's name is operator new and the deallocation function's
// name is operator delete. If the allocated type is an array
// type, the allocation function's name is operator new[] and the
// deallocation function's name is operator delete[].
DeclarationName NewName = Context.DeclarationNames.getCXXOperatorName(
IsArray ? OO_Array_New : OO_New);
DeclarationName DeleteName = Context.DeclarationNames.getCXXOperatorName(
IsArray ? OO_Array_Delete : OO_Delete);
QualType AllocElemType = Context.getBaseElementType(AllocType);
if (AllocElemType->isRecordType() && !UseGlobal) {
CXXRecordDecl *Record
= cast<CXXRecordDecl>(AllocElemType->getAs<RecordType>()->getDecl());
if (FindAllocationOverload(StartLoc, Range, NewName, AllocArgs, Record,
/*AllowMissing=*/true, OperatorNew))
return true;
}
if (!OperatorNew) {
// Didn't find a member overload. Look for a global one.
DeclareGlobalNewDelete();
DeclContext *TUDecl = Context.getTranslationUnitDecl();
bool FallbackEnabled = IsArray && Context.getLangOpts().MicrosoftMode;
if (FindAllocationOverload(StartLoc, Range, NewName, AllocArgs, TUDecl,
/*AllowMissing=*/FallbackEnabled, OperatorNew,
/*Diagnose=*/!FallbackEnabled)) {
if (!FallbackEnabled)
return true;
// MSVC will fall back on trying to find a matching global operator new
// if operator new[] cannot be found. Also, MSVC will leak by not
// generating a call to operator delete or operator delete[], but we
// will not replicate that bug.
NewName = Context.DeclarationNames.getCXXOperatorName(OO_New);
DeleteName = Context.DeclarationNames.getCXXOperatorName(OO_Delete);
if (FindAllocationOverload(StartLoc, Range, NewName, AllocArgs, TUDecl,
/*AllowMissing=*/false, OperatorNew))
return true;
}
}
// We don't need an operator delete if we're running under
// -fno-exceptions.
if (!getLangOpts().Exceptions) {
OperatorDelete = 0;
return false;
}
// FindAllocationOverload can change the passed in arguments, so we need to
// copy them back.
if (!PlaceArgs.empty())
std::copy(AllocArgs.begin() + 1, AllocArgs.end(), PlaceArgs.data());
// C++ [expr.new]p19:
//
// If the new-expression begins with a unary :: operator, the
// deallocation function's name is looked up in the global
// scope. Otherwise, if the allocated type is a class type T or an
// array thereof, the deallocation function's name is looked up in
// the scope of T. If this lookup fails to find the name, or if
// the allocated type is not a class type or array thereof, the
// deallocation function's name is looked up in the global scope.
LookupResult FoundDelete(*this, DeleteName, StartLoc, LookupOrdinaryName);
if (AllocElemType->isRecordType() && !UseGlobal) {
CXXRecordDecl *RD
= cast<CXXRecordDecl>(AllocElemType->getAs<RecordType>()->getDecl());
LookupQualifiedName(FoundDelete, RD);
}
if (FoundDelete.isAmbiguous())
return true; // FIXME: clean up expressions?
if (FoundDelete.empty()) {
DeclareGlobalNewDelete();
LookupQualifiedName(FoundDelete, Context.getTranslationUnitDecl());
}
FoundDelete.suppressDiagnostics();
SmallVector<std::pair<DeclAccessPair,FunctionDecl*>, 2> Matches;
// Whether we're looking for a placement operator delete is dictated
// by whether we selected a placement operator new, not by whether
// we had explicit placement arguments. This matters for things like
// struct A { void *operator new(size_t, int = 0); ... };
// A *a = new A()
bool isPlacementNew = (!PlaceArgs.empty() || OperatorNew->param_size() != 1);
if (isPlacementNew) {
// C++ [expr.new]p20:
// A declaration of a placement deallocation function matches the
// declaration of a placement allocation function if it has the
// same number of parameters and, after parameter transformations
// (8.3.5), all parameter types except the first are
// identical. [...]
//
// To perform this comparison, we compute the function type that
// the deallocation function should have, and use that type both
// for template argument deduction and for comparison purposes.
//
// FIXME: this comparison should ignore CC and the like.
QualType ExpectedFunctionType;
{
const FunctionProtoType *Proto
= OperatorNew->getType()->getAs<FunctionProtoType>();
SmallVector<QualType, 4> ArgTypes;
ArgTypes.push_back(Context.VoidPtrTy);
for (unsigned I = 1, N = Proto->getNumArgs(); I < N; ++I)
ArgTypes.push_back(Proto->getArgType(I));
FunctionProtoType::ExtProtoInfo EPI;
EPI.Variadic = Proto->isVariadic();
ExpectedFunctionType
= Context.getFunctionType(Context.VoidTy, ArgTypes, EPI);
}
for (LookupResult::iterator D = FoundDelete.begin(),
DEnd = FoundDelete.end();
D != DEnd; ++D) {
FunctionDecl *Fn = 0;
if (FunctionTemplateDecl *FnTmpl
= dyn_cast<FunctionTemplateDecl>((*D)->getUnderlyingDecl())) {
// Perform template argument deduction to try to match the
// expected function type.
TemplateDeductionInfo Info(StartLoc);
if (DeduceTemplateArguments(FnTmpl, 0, ExpectedFunctionType, Fn, Info))
continue;
} else
Fn = cast<FunctionDecl>((*D)->getUnderlyingDecl());
if (Context.hasSameType(Fn->getType(), ExpectedFunctionType))
Matches.push_back(std::make_pair(D.getPair(), Fn));
}
} else {
// C++ [expr.new]p20:
// [...] Any non-placement deallocation function matches a
// non-placement allocation function. [...]
for (LookupResult::iterator D = FoundDelete.begin(),
DEnd = FoundDelete.end();
D != DEnd; ++D) {
if (FunctionDecl *Fn = dyn_cast<FunctionDecl>((*D)->getUnderlyingDecl()))
if (isNonPlacementDeallocationFunction(*this, Fn))
Matches.push_back(std::make_pair(D.getPair(), Fn));
}
// C++1y [expr.new]p22:
// For a non-placement allocation function, the normal deallocation
// function lookup is used
// C++1y [expr.delete]p?:
// If [...] deallocation function lookup finds both a usual deallocation
// function with only a pointer parameter and a usual deallocation
// function with both a pointer parameter and a size parameter, then the
// selected deallocation function shall be the one with two parameters.
// Otherwise, the selected deallocation function shall be the function
// with one parameter.
if (getLangOpts().SizedDeallocation && Matches.size() == 2) {
if (Matches[0].second->getNumParams() == 1)
Matches.erase(Matches.begin());
else
Matches.erase(Matches.begin() + 1);
assert(Matches[0].second->getNumParams() == 2 &&
"found an unexpected uusal deallocation function");
}
}
// C++ [expr.new]p20:
// [...] If the lookup finds a single matching deallocation
// function, that function will be called; otherwise, no
// deallocation function will be called.
if (Matches.size() == 1) {
OperatorDelete = Matches[0].second;
// C++0x [expr.new]p20:
// If the lookup finds the two-parameter form of a usual
// deallocation function (3.7.4.2) and that function, considered
// as a placement deallocation function, would have been
// selected as a match for the allocation function, the program
// is ill-formed.
if (!PlaceArgs.empty() && getLangOpts().CPlusPlus11 &&
isNonPlacementDeallocationFunction(*this, OperatorDelete)) {
Diag(StartLoc, diag::err_placement_new_non_placement_delete)
<< SourceRange(PlaceArgs.front()->getLocStart(),
PlaceArgs.back()->getLocEnd());
if (!OperatorDelete->isImplicit())
Diag(OperatorDelete->getLocation(), diag::note_previous_decl)
<< DeleteName;
} else {
CheckAllocationAccess(StartLoc, Range, FoundDelete.getNamingClass(),
Matches[0].first);
}
}
return false;
}
/// FindAllocationOverload - Find an fitting overload for the allocation
/// function in the specified scope.
bool Sema::FindAllocationOverload(SourceLocation StartLoc, SourceRange Range,
DeclarationName Name, MultiExprArg Args,
DeclContext *Ctx,
bool AllowMissing, FunctionDecl *&Operator,
bool Diagnose) {
LookupResult R(*this, Name, StartLoc, LookupOrdinaryName);
LookupQualifiedName(R, Ctx);
if (R.empty()) {
if (AllowMissing || !Diagnose)
return false;
return Diag(StartLoc, diag::err_ovl_no_viable_function_in_call)
<< Name << Range;
}
if (R.isAmbiguous())
return true;
R.suppressDiagnostics();
OverloadCandidateSet Candidates(StartLoc);
for (LookupResult::iterator Alloc = R.begin(), AllocEnd = R.end();
Alloc != AllocEnd; ++Alloc) {
// Even member operator new/delete are implicitly treated as
// static, so don't use AddMemberCandidate.
NamedDecl *D = (*Alloc)->getUnderlyingDecl();
if (FunctionTemplateDecl *FnTemplate = dyn_cast<FunctionTemplateDecl>(D)) {
AddTemplateOverloadCandidate(FnTemplate, Alloc.getPair(),
/*ExplicitTemplateArgs=*/0,
Args, Candidates,
/*SuppressUserConversions=*/false);
continue;
}
FunctionDecl *Fn = cast<FunctionDecl>(D);
AddOverloadCandidate(Fn, Alloc.getPair(), Args, Candidates,
/*SuppressUserConversions=*/false);
}
// Do the resolution.
OverloadCandidateSet::iterator Best;
switch (Candidates.BestViableFunction(*this, StartLoc, Best)) {
case OR_Success: {
// Got one!
FunctionDecl *FnDecl = Best->Function;
MarkFunctionReferenced(StartLoc, FnDecl);
// The first argument is size_t, and the first parameter must be size_t,
// too. This is checked on declaration and can be assumed. (It can't be
// asserted on, though, since invalid decls are left in there.)
// Watch out for variadic allocator function.
unsigned NumArgsInFnDecl = FnDecl->getNumParams();
for (unsigned i = 0; (i < Args.size() && i < NumArgsInFnDecl); ++i) {
InitializedEntity Entity = InitializedEntity::InitializeParameter(Context,
FnDecl->getParamDecl(i));
if (!Diagnose && !CanPerformCopyInitialization(Entity, Owned(Args[i])))
return true;
ExprResult Result
= PerformCopyInitialization(Entity, SourceLocation(), Owned(Args[i]));
if (Result.isInvalid())
return true;
Args[i] = Result.takeAs<Expr>();
}
Operator = FnDecl;
if (CheckAllocationAccess(StartLoc, Range, R.getNamingClass(),
Best->FoundDecl, Diagnose) == AR_inaccessible)
return true;
return false;
}
case OR_No_Viable_Function:
if (Diagnose) {
Diag(StartLoc, diag::err_ovl_no_viable_function_in_call)
<< Name << Range;
Candidates.NoteCandidates(*this, OCD_AllCandidates, Args);
}
return true;
case OR_Ambiguous:
if (Diagnose) {
Diag(StartLoc, diag::err_ovl_ambiguous_call)
<< Name << Range;
Candidates.NoteCandidates(*this, OCD_ViableCandidates, Args);
}
return true;
case OR_Deleted: {
if (Diagnose) {
Diag(StartLoc, diag::err_ovl_deleted_call)
<< Best->Function->isDeleted()
<< Name
<< getDeletedOrUnavailableSuffix(Best->Function)
<< Range;
Candidates.NoteCandidates(*this, OCD_AllCandidates, Args);
}
return true;
}
}
llvm_unreachable("Unreachable, bad result from BestViableFunction");
}
/// DeclareGlobalNewDelete - Declare the global forms of operator new and
/// delete. These are:
/// @code
/// // C++03:
/// void* operator new(std::size_t) throw(std::bad_alloc);
/// void* operator new[](std::size_t) throw(std::bad_alloc);
/// void operator delete(void *) throw();
/// void operator delete[](void *) throw();
/// // C++11:
/// void* operator new(std::size_t);
/// void* operator new[](std::size_t);
/// void operator delete(void *) noexcept;
/// void operator delete[](void *) noexcept;
/// // C++1y:
/// void* operator new(std::size_t);
/// void* operator new[](std::size_t);
/// void operator delete(void *) noexcept;
/// void operator delete[](void *) noexcept;
/// void operator delete(void *, std::size_t) noexcept;
/// void operator delete[](void *, std::size_t) noexcept;
/// @endcode
/// Note that the placement and nothrow forms of new are *not* implicitly
/// declared. Their use requires including \<new\>.
void Sema::DeclareGlobalNewDelete() {
if (GlobalNewDeleteDeclared)
return;
// C++ [basic.std.dynamic]p2:
// [...] The following allocation and deallocation functions (18.4) are
// implicitly declared in global scope in each translation unit of a
// program
//
// C++03:
// void* operator new(std::size_t) throw(std::bad_alloc);
// void* operator new[](std::size_t) throw(std::bad_alloc);
// void operator delete(void*) throw();
// void operator delete[](void*) throw();
// C++11:
// void* operator new(std::size_t);
// void* operator new[](std::size_t);
// void operator delete(void*) noexcept;
// void operator delete[](void*) noexcept;
// C++1y:
// void* operator new(std::size_t);
// void* operator new[](std::size_t);
// void operator delete(void*) noexcept;
// void operator delete[](void*) noexcept;
// void operator delete(void*, std::size_t) noexcept;
// void operator delete[](void*, std::size_t) noexcept;
//
// These implicit declarations introduce only the function names operator
// new, operator new[], operator delete, operator delete[].
//
// Here, we need to refer to std::bad_alloc, so we will implicitly declare
// "std" or "bad_alloc" as necessary to form the exception specification.
// However, we do not make these implicit declarations visible to name
// lookup.
if (!StdBadAlloc && !getLangOpts().CPlusPlus11) {
// The "std::bad_alloc" class has not yet been declared, so build it
// implicitly.
StdBadAlloc = CXXRecordDecl::Create(Context, TTK_Class,
getOrCreateStdNamespace(),
SourceLocation(), SourceLocation(),
&PP.getIdentifierTable().get("bad_alloc"),
0);
getStdBadAlloc()->setImplicit(true);
}
GlobalNewDeleteDeclared = true;
QualType VoidPtr = Context.getPointerType(Context.VoidTy);
QualType SizeT = Context.getSizeType();
bool AssumeSaneOperatorNew = getLangOpts().AssumeSaneOperatorNew;
DeclareGlobalAllocationFunction(
Context.DeclarationNames.getCXXOperatorName(OO_New),
VoidPtr, SizeT, QualType(), AssumeSaneOperatorNew);
DeclareGlobalAllocationFunction(
Context.DeclarationNames.getCXXOperatorName(OO_Array_New),
VoidPtr, SizeT, QualType(), AssumeSaneOperatorNew);
DeclareGlobalAllocationFunction(
Context.DeclarationNames.getCXXOperatorName(OO_Delete),
Context.VoidTy, VoidPtr);
DeclareGlobalAllocationFunction(
Context.DeclarationNames.getCXXOperatorName(OO_Array_Delete),
Context.VoidTy, VoidPtr);
if (getLangOpts().SizedDeallocation) {
DeclareGlobalAllocationFunction(
Context.DeclarationNames.getCXXOperatorName(OO_Delete),
Context.VoidTy, VoidPtr, Context.getSizeType());
DeclareGlobalAllocationFunction(
Context.DeclarationNames.getCXXOperatorName(OO_Array_Delete),
Context.VoidTy, VoidPtr, Context.getSizeType());
}
}
/// DeclareGlobalAllocationFunction - Declares a single implicit global
/// allocation function if it doesn't already exist.
void Sema::DeclareGlobalAllocationFunction(DeclarationName Name,
QualType Return,
QualType Param1, QualType Param2,
bool AddMallocAttr) {
DeclContext *GlobalCtx = Context.getTranslationUnitDecl();
unsigned NumParams = Param2.isNull() ? 1 : 2;
// Check if this function is already declared.
DeclContext::lookup_result R = GlobalCtx->lookup(Name);
for (DeclContext::lookup_iterator Alloc = R.begin(), AllocEnd = R.end();
Alloc != AllocEnd; ++Alloc) {
// Only look at non-template functions, as it is the predefined,
// non-templated allocation function we are trying to declare here.
if (FunctionDecl *Func = dyn_cast<FunctionDecl>(*Alloc)) {
if (Func->getNumParams() == NumParams) {
QualType InitialParam1Type =
Context.getCanonicalType(Func->getParamDecl(0)
->getType().getUnqualifiedType());
QualType InitialParam2Type =
NumParams == 2
? Context.getCanonicalType(Func->getParamDecl(1)
->getType().getUnqualifiedType())
: QualType();
// FIXME: Do we need to check for default arguments here?
if (InitialParam1Type == Param1 &&
(NumParams == 1 || InitialParam2Type == Param2)) {
if (AddMallocAttr && !Func->hasAttr<MallocAttr>())
Func->addAttr(::new (Context) MallocAttr(SourceLocation(),
Context));
// Make the function visible to name lookup, even if we found it in
// an unimported module. It either is an implicitly-declared global
// allocation function, or is suppressing that function.
Func->setHidden(false);
return;
}
}
}
}
QualType BadAllocType;
bool HasBadAllocExceptionSpec
= (Name.getCXXOverloadedOperator() == OO_New ||
Name.getCXXOverloadedOperator() == OO_Array_New);
if (HasBadAllocExceptionSpec && !getLangOpts().CPlusPlus11) {
assert(StdBadAlloc && "Must have std::bad_alloc declared");
BadAllocType = Context.getTypeDeclType(getStdBadAlloc());
}
FunctionProtoType::ExtProtoInfo EPI;
if (HasBadAllocExceptionSpec) {
if (!getLangOpts().CPlusPlus11) {
EPI.ExceptionSpecType = EST_Dynamic;
EPI.NumExceptions = 1;
EPI.Exceptions = &BadAllocType;
}
} else {
EPI.ExceptionSpecType = getLangOpts().CPlusPlus11 ?
EST_BasicNoexcept : EST_DynamicNone;
}
QualType Params[] = { Param1, Param2 };
QualType FnType = Context.getFunctionType(
Return, ArrayRef<QualType>(Params, NumParams), EPI);
FunctionDecl *Alloc =
FunctionDecl::Create(Context, GlobalCtx, SourceLocation(),
SourceLocation(), Name,
FnType, /*TInfo=*/0, SC_None, false, true);
Alloc->setImplicit();
if (AddMallocAttr)
Alloc->addAttr(::new (Context) MallocAttr(SourceLocation(), Context));
ParmVarDecl *ParamDecls[2];
for (unsigned I = 0; I != NumParams; ++I)
ParamDecls[I] = ParmVarDecl::Create(Context, Alloc, SourceLocation(),
SourceLocation(), 0,
Params[I], /*TInfo=*/0,
SC_None, 0);
Alloc->setParams(ArrayRef<ParmVarDecl*>(ParamDecls, NumParams));
// FIXME: Also add this declaration to the IdentifierResolver, but
// make sure it is at the end of the chain to coincide with the
// global scope.
Context.getTranslationUnitDecl()->addDecl(Alloc);
}
FunctionDecl *Sema::FindUsualDeallocationFunction(SourceLocation StartLoc,
bool CanProvideSize,
DeclarationName Name) {
DeclareGlobalNewDelete();
LookupResult FoundDelete(*this, Name, StartLoc, LookupOrdinaryName);
LookupQualifiedName(FoundDelete, Context.getTranslationUnitDecl());
// C++ [expr.new]p20:
// [...] Any non-placement deallocation function matches a
// non-placement allocation function. [...]
llvm::SmallVector<FunctionDecl*, 2> Matches;
for (LookupResult::iterator D = FoundDelete.begin(),
DEnd = FoundDelete.end();
D != DEnd; ++D) {
if (FunctionDecl *Fn = dyn_cast<FunctionDecl>(*D))
if (isNonPlacementDeallocationFunction(*this, Fn))
Matches.push_back(Fn);
}
// C++1y [expr.delete]p?:
// If the type is complete and deallocation function lookup finds both a
// usual deallocation function with only a pointer parameter and a usual
// deallocation function with both a pointer parameter and a size
// parameter, then the selected deallocation function shall be the one
// with two parameters. Otherwise, the selected deallocation function
// shall be the function with one parameter.
if (getLangOpts().SizedDeallocation && Matches.size() == 2) {
unsigned NumArgs = CanProvideSize ? 2 : 1;
if (Matches[0]->getNumParams() != NumArgs)
Matches.erase(Matches.begin());
else
Matches.erase(Matches.begin() + 1);
assert(Matches[0]->getNumParams() == NumArgs &&
"found an unexpected uusal deallocation function");
}
assert(Matches.size() == 1 &&
"unexpectedly have multiple usual deallocation functions");
return Matches.front();
}
bool Sema::FindDeallocationFunction(SourceLocation StartLoc, CXXRecordDecl *RD,
DeclarationName Name,
FunctionDecl* &Operator, bool Diagnose) {
LookupResult Found(*this, Name, StartLoc, LookupOrdinaryName);
// Try to find operator delete/operator delete[] in class scope.
LookupQualifiedName(Found, RD);
if (Found.isAmbiguous())
return true;
Found.suppressDiagnostics();
SmallVector<DeclAccessPair,4> Matches;
for (LookupResult::iterator F = Found.begin(), FEnd = Found.end();
F != FEnd; ++F) {
NamedDecl *ND = (*F)->getUnderlyingDecl();
// Ignore template operator delete members from the check for a usual
// deallocation function.
if (isa<FunctionTemplateDecl>(ND))
continue;
if (cast<CXXMethodDecl>(ND)->isUsualDeallocationFunction())
Matches.push_back(F.getPair());
}
// There's exactly one suitable operator; pick it.
if (Matches.size() == 1) {
Operator = cast<CXXMethodDecl>(Matches[0]->getUnderlyingDecl());
if (Operator->isDeleted()) {
if (Diagnose) {
Diag(StartLoc, diag::err_deleted_function_use);
NoteDeletedFunction(Operator);
}
return true;
}
if (CheckAllocationAccess(StartLoc, SourceRange(), Found.getNamingClass(),
Matches[0], Diagnose) == AR_inaccessible)
return true;
return false;
// We found multiple suitable operators; complain about the ambiguity.
} else if (!Matches.empty()) {
if (Diagnose) {
Diag(StartLoc, diag::err_ambiguous_suitable_delete_member_function_found)
<< Name << RD;
for (SmallVectorImpl<DeclAccessPair>::iterator
F = Matches.begin(), FEnd = Matches.end(); F != FEnd; ++F)
Diag((*F)->getUnderlyingDecl()->getLocation(),
diag::note_member_declared_here) << Name;
}
return true;
}
// We did find operator delete/operator delete[] declarations, but
// none of them were suitable.
if (!Found.empty()) {
if (Diagnose) {
Diag(StartLoc, diag::err_no_suitable_delete_member_function_found)
<< Name << RD;
for (LookupResult::iterator F = Found.begin(), FEnd = Found.end();
F != FEnd; ++F)
Diag((*F)->getUnderlyingDecl()->getLocation(),
diag::note_member_declared_here) << Name;
}
return true;
}
Operator = 0;
return false;
}
/// ActOnCXXDelete - Parsed a C++ 'delete' expression (C++ 5.3.5), as in:
/// @code ::delete ptr; @endcode
/// or
/// @code delete [] ptr; @endcode
ExprResult
Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal,
bool ArrayForm, Expr *ExE) {
// C++ [expr.delete]p1:
// The operand shall have a pointer type, or a class type having a single
// non-explicit conversion function to a pointer type. The result has type
// void.
//
// DR599 amends "pointer type" to "pointer to object type" in both cases.
ExprResult Ex = Owned(ExE);
FunctionDecl *OperatorDelete = 0;
bool ArrayFormAsWritten = ArrayForm;
bool UsualArrayDeleteWantsSize = false;
if (!Ex.get()->isTypeDependent()) {
// Perform lvalue-to-rvalue cast, if needed.
Ex = DefaultLvalueConversion(Ex.take());
if (Ex.isInvalid())
return ExprError();
QualType Type = Ex.get()->getType();
class DeleteConverter : public ContextualImplicitConverter {
public:
DeleteConverter() : ContextualImplicitConverter(false, true) {}
bool match(QualType ConvType) {
// FIXME: If we have an operator T* and an operator void*, we must pick
// the operator T*.
if (const PointerType *ConvPtrType = ConvType->getAs<PointerType>())
if (ConvPtrType->getPointeeType()->isIncompleteOrObjectType())
return true;
return false;
}
SemaDiagnosticBuilder diagnoseNoMatch(Sema &S, SourceLocation Loc,
QualType T) {
return S.Diag(Loc, diag::err_delete_operand) << T;
}
SemaDiagnosticBuilder diagnoseIncomplete(Sema &S, SourceLocation Loc,
QualType T) {
return S.Diag(Loc, diag::err_delete_incomplete_class_type) << T;
}
SemaDiagnosticBuilder diagnoseExplicitConv(Sema &S, SourceLocation Loc,
QualType T, QualType ConvTy) {
return S.Diag(Loc, diag::err_delete_explicit_conversion) << T << ConvTy;
}
SemaDiagnosticBuilder noteExplicitConv(Sema &S, CXXConversionDecl *Conv,
QualType ConvTy) {
return S.Diag(Conv->getLocation(), diag::note_delete_conversion)
<< ConvTy;
}
SemaDiagnosticBuilder diagnoseAmbiguous(Sema &S, SourceLocation Loc,
QualType T) {
return S.Diag(Loc, diag::err_ambiguous_delete_operand) << T;
}
SemaDiagnosticBuilder noteAmbiguous(Sema &S, CXXConversionDecl *Conv,
QualType ConvTy) {
return S.Diag(Conv->getLocation(), diag::note_delete_conversion)
<< ConvTy;
}
SemaDiagnosticBuilder diagnoseConversion(Sema &S, SourceLocation Loc,
QualType T, QualType ConvTy) {
llvm_unreachable("conversion functions are permitted");
}
} Converter;
Ex = PerformContextualImplicitConversion(StartLoc, Ex.take(), Converter);
if (Ex.isInvalid())
return ExprError();
Type = Ex.get()->getType();
if (!Converter.match(Type))
// FIXME: PerformContextualImplicitConversion should return ExprError
// itself in this case.
return ExprError();
QualType Pointee = Type->getAs<PointerType>()->getPointeeType();
QualType PointeeElem = Context.getBaseElementType(Pointee);
if (unsigned AddressSpace = Pointee.getAddressSpace())
return Diag(Ex.get()->getLocStart(),
diag::err_address_space_qualified_delete)
<< Pointee.getUnqualifiedType() << AddressSpace;
CXXRecordDecl *PointeeRD = 0;
if (Pointee->isVoidType() && !isSFINAEContext()) {
// The C++ standard bans deleting a pointer to a non-object type, which
// effectively bans deletion of "void*". However, most compilers support
// this, so we treat it as a warning unless we're in a SFINAE context.
Diag(StartLoc, diag::ext_delete_void_ptr_operand)
<< Type << Ex.get()->getSourceRange();
} else if (Pointee->isFunctionType() || Pointee->isVoidType()) {
return ExprError(Diag(StartLoc, diag::err_delete_operand)
<< Type << Ex.get()->getSourceRange());
} else if (!Pointee->isDependentType()) {
if (!RequireCompleteType(StartLoc, Pointee,
diag::warn_delete_incomplete, Ex.get())) {
if (const RecordType *RT = PointeeElem->getAs<RecordType>())
PointeeRD = cast<CXXRecordDecl>(RT->getDecl());
}
}
// C++ [expr.delete]p2:
// [Note: a pointer to a const type can be the operand of a
// delete-expression; it is not necessary to cast away the constness
// (5.2.11) of the pointer expression before it is used as the operand
// of the delete-expression. ]
if (Pointee->isArrayType() && !ArrayForm) {
Diag(StartLoc, diag::warn_delete_array_type)
<< Type << Ex.get()->getSourceRange()
<< FixItHint::CreateInsertion(PP.getLocForEndOfToken(StartLoc), "[]");
ArrayForm = true;
}
DeclarationName DeleteName = Context.DeclarationNames.getCXXOperatorName(
ArrayForm ? OO_Array_Delete : OO_Delete);
if (PointeeRD) {
if (!UseGlobal &&
FindDeallocationFunction(StartLoc, PointeeRD, DeleteName,
OperatorDelete))
return ExprError();
// If we're allocating an array of records, check whether the
// usual operator delete[] has a size_t parameter.
if (ArrayForm) {
// If the user specifically asked to use the global allocator,
// we'll need to do the lookup into the class.
if (UseGlobal)
UsualArrayDeleteWantsSize =
doesUsualArrayDeleteWantSize(*this, StartLoc, PointeeElem);
// Otherwise, the usual operator delete[] should be the
// function we just found.
else if (OperatorDelete && isa<CXXMethodDecl>(OperatorDelete))
UsualArrayDeleteWantsSize = (OperatorDelete->getNumParams() == 2);
}
if (!PointeeRD->hasIrrelevantDestructor())
if (CXXDestructorDecl *Dtor = LookupDestructor(PointeeRD)) {
MarkFunctionReferenced(StartLoc,
const_cast<CXXDestructorDecl*>(Dtor));
if (DiagnoseUseOfDecl(Dtor, StartLoc))
return ExprError();
}
// C++ [expr.delete]p3:
// In the first alternative (delete object), if the static type of the
// object to be deleted is different from its dynamic type, the static
// type shall be a base class of the dynamic type of the object to be
// deleted and the static type shall have a virtual destructor or the
// behavior is undefined.
//
// Note: a final class cannot be derived from, no issue there
if (PointeeRD->isPolymorphic() && !PointeeRD->hasAttr<FinalAttr>()) {
CXXDestructorDecl *dtor = PointeeRD->getDestructor();
if (dtor && !dtor->isVirtual()) {
if (PointeeRD->isAbstract()) {
// If the class is abstract, we warn by default, because we're
// sure the code has undefined behavior.
Diag(StartLoc, diag::warn_delete_abstract_non_virtual_dtor)
<< PointeeElem;
} else if (!ArrayForm) {
// Otherwise, if this is not an array delete, it's a bit suspect,
// but not necessarily wrong.
Diag(StartLoc, diag::warn_delete_non_virtual_dtor) << PointeeElem;
}
}
}
}
if (!OperatorDelete)
// Look for a global declaration.
OperatorDelete = FindUsualDeallocationFunction(
StartLoc, !RequireCompleteType(StartLoc, Pointee, 0) &&
(!ArrayForm || UsualArrayDeleteWantsSize ||
Pointee.isDestructedType()),
DeleteName);
MarkFunctionReferenced(StartLoc, OperatorDelete);
// Check access and ambiguity of operator delete and destructor.
if (PointeeRD) {
if (CXXDestructorDecl *Dtor = LookupDestructor(PointeeRD)) {
CheckDestructorAccess(Ex.get()->getExprLoc(), Dtor,
PDiag(diag::err_access_dtor) << PointeeElem);
}
}
}
return Owned(new (Context) CXXDeleteExpr(Context.VoidTy, UseGlobal, ArrayForm,
ArrayFormAsWritten,
UsualArrayDeleteWantsSize,
OperatorDelete, Ex.take(), StartLoc));
}
/// \brief Check the use of the given variable as a C++ condition in an if,
/// while, do-while, or switch statement.
ExprResult Sema::CheckConditionVariable(VarDecl *ConditionVar,
SourceLocation StmtLoc,
bool ConvertToBoolean) {
if (ConditionVar->isInvalidDecl())
return ExprError();
QualType T = ConditionVar->getType();
// C++ [stmt.select]p2:
// The declarator shall not specify a function or an array.
if (T->isFunctionType())
return ExprError(Diag(ConditionVar->getLocation(),
diag::err_invalid_use_of_function_type)
<< ConditionVar->getSourceRange());
else if (T->isArrayType())
return ExprError(Diag(ConditionVar->getLocation(),
diag::err_invalid_use_of_array_type)
<< ConditionVar->getSourceRange());
ExprResult Condition =
Owned(DeclRefExpr::Create(Context, NestedNameSpecifierLoc(),
SourceLocation(),
ConditionVar,
/*enclosing*/ false,
ConditionVar->getLocation(),
ConditionVar->getType().getNonReferenceType(),
VK_LValue));
MarkDeclRefReferenced(cast<DeclRefExpr>(Condition.get()));
if (ConvertToBoolean) {
Condition = CheckBooleanCondition(Condition.take(), StmtLoc);
if (Condition.isInvalid())
return ExprError();
}
return Condition;
}
/// CheckCXXBooleanCondition - Returns true if a conversion to bool is invalid.
ExprResult Sema::CheckCXXBooleanCondition(Expr *CondExpr) {
// C++ 6.4p4:
// The value of a condition that is an initialized declaration in a statement
// other than a switch statement is the value of the declared variable
// implicitly converted to type bool. If that conversion is ill-formed, the
// program is ill-formed.
// The value of a condition that is an expression is the value of the
// expression, implicitly converted to bool.
//
return PerformContextuallyConvertToBool(CondExpr);
}
/// Helper function to determine whether this is the (deprecated) C++
/// conversion from a string literal to a pointer to non-const char or
/// non-const wchar_t (for narrow and wide string literals,
/// respectively).
bool
Sema::IsStringLiteralToNonConstPointerConversion(Expr *From, QualType ToType) {
// Look inside the implicit cast, if it exists.
if (ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(From))
From = Cast->getSubExpr();
// A string literal (2.13.4) that is not a wide string literal can
// be converted to an rvalue of type "pointer to char"; a wide
// string literal can be converted to an rvalue of type "pointer
// to wchar_t" (C++ 4.2p2).
if (StringLiteral *StrLit = dyn_cast<StringLiteral>(From->IgnoreParens()))
if (const PointerType *ToPtrType = ToType->getAs<PointerType>())
if (const BuiltinType *ToPointeeType
= ToPtrType->getPointeeType()->getAs<BuiltinType>()) {
// This conversion is considered only when there is an
// explicit appropriate pointer target type (C++ 4.2p2).
if (!ToPtrType->getPointeeType().hasQualifiers()) {
switch (StrLit->getKind()) {
case StringLiteral::UTF8:
case StringLiteral::UTF16:
case StringLiteral::UTF32:
// We don't allow UTF literals to be implicitly converted
break;
case StringLiteral::Ascii:
return (ToPointeeType->getKind() == BuiltinType::Char_U ||
ToPointeeType->getKind() == BuiltinType::Char_S);
case StringLiteral::Wide:
return ToPointeeType->isWideCharType();
}
}
}
return false;
}
static ExprResult BuildCXXCastArgument(Sema &S,
SourceLocation CastLoc,
QualType Ty,
CastKind Kind,
CXXMethodDecl *Method,
DeclAccessPair FoundDecl,
bool HadMultipleCandidates,
Expr *From) {
switch (Kind) {
default: llvm_unreachable("Unhandled cast kind!");
case CK_ConstructorConversion: {
CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(Method);
SmallVector<Expr*, 8> ConstructorArgs;
if (S.RequireNonAbstractType(CastLoc, Ty,
diag::err_allocation_of_abstract_type))
return ExprError();
if (S.CompleteConstructorCall(Constructor, From, CastLoc, ConstructorArgs))
return ExprError();
S.CheckConstructorAccess(CastLoc, Constructor,
InitializedEntity::InitializeTemporary(Ty),
Constructor->getAccess());
ExprResult Result
= S.BuildCXXConstructExpr(CastLoc, Ty, cast<CXXConstructorDecl>(Method),
ConstructorArgs, HadMultipleCandidates,
/*ListInit*/ false, /*ZeroInit*/ false,
CXXConstructExpr::CK_Complete, SourceRange());
if (Result.isInvalid())
return ExprError();
return S.MaybeBindToTemporary(Result.takeAs<Expr>());
}
case CK_UserDefinedConversion: {
assert(!From->getType()->isPointerType() && "Arg can't have pointer type!");
// Create an implicit call expr that calls it.
CXXConversionDecl *Conv = cast<CXXConversionDecl>(Method);
ExprResult Result = S.BuildCXXMemberCallExpr(From, FoundDecl, Conv,
HadMultipleCandidates);
if (Result.isInvalid())
return ExprError();
// Record usage of conversion in an implicit cast.
Result = S.Owned(ImplicitCastExpr::Create(S.Context,
Result.get()->getType(),
CK_UserDefinedConversion,
Result.get(), 0,
Result.get()->getValueKind()));
S.CheckMemberOperatorAccess(CastLoc, From, /*arg*/ 0, FoundDecl);
return S.MaybeBindToTemporary(Result.get());
}
}
}
/// PerformImplicitConversion - Perform an implicit conversion of the
/// expression From to the type ToType using the pre-computed implicit
/// conversion sequence ICS. Returns the converted
/// expression. Action is the kind of conversion we're performing,
/// used in the error message.
ExprResult
Sema::PerformImplicitConversion(Expr *From, QualType ToType,
const ImplicitConversionSequence &ICS,
AssignmentAction Action,
CheckedConversionKind CCK) {
switch (ICS.getKind()) {
case ImplicitConversionSequence::StandardConversion: {
ExprResult Res = PerformImplicitConversion(From, ToType, ICS.Standard,
Action, CCK);
if (Res.isInvalid())
return ExprError();
From = Res.take();
break;
}
case ImplicitConversionSequence::UserDefinedConversion: {
FunctionDecl *FD = ICS.UserDefined.ConversionFunction;
CastKind CastKind;
QualType BeforeToType;
assert(FD && "FIXME: aggregate initialization from init list");
if (const CXXConversionDecl *Conv = dyn_cast<CXXConversionDecl>(FD)) {
CastKind = CK_UserDefinedConversion;
// If the user-defined conversion is specified by a conversion function,
// the initial standard conversion sequence converts the source type to
// the implicit object parameter of the conversion function.
BeforeToType = Context.getTagDeclType(Conv->getParent());
} else {
const CXXConstructorDecl *Ctor = cast<CXXConstructorDecl>(FD);
CastKind = CK_ConstructorConversion;
// Do no conversion if dealing with ... for the first conversion.
if (!ICS.UserDefined.EllipsisConversion) {
// If the user-defined conversion is specified by a constructor, the
// initial standard conversion sequence converts the source type to the
// type required by the argument of the constructor
BeforeToType = Ctor->getParamDecl(0)->getType().getNonReferenceType();
}
}
// Watch out for ellipsis conversion.
if (!ICS.UserDefined.EllipsisConversion) {
ExprResult Res =
PerformImplicitConversion(From, BeforeToType,
ICS.UserDefined.Before, AA_Converting,
CCK);
if (Res.isInvalid())
return ExprError();
From = Res.take();
}
ExprResult CastArg
= BuildCXXCastArgument(*this,
From->getLocStart(),
ToType.getNonReferenceType(),
CastKind, cast<CXXMethodDecl>(FD),
ICS.UserDefined.FoundConversionFunction,
ICS.UserDefined.HadMultipleCandidates,
From);
if (CastArg.isInvalid())
return ExprError();
From = CastArg.take();
return PerformImplicitConversion(From, ToType, ICS.UserDefined.After,
AA_Converting, CCK);
}
case ImplicitConversionSequence::AmbiguousConversion:
ICS.DiagnoseAmbiguousConversion(*this, From->getExprLoc(),
PDiag(diag::err_typecheck_ambiguous_condition)
<< From->getSourceRange());
return ExprError();
case ImplicitConversionSequence::EllipsisConversion:
llvm_unreachable("Cannot perform an ellipsis conversion");
case ImplicitConversionSequence::BadConversion:
return ExprError();
}
// Everything went well.
return Owned(From);
}
/// PerformImplicitConversion - Perform an implicit conversion of the
/// expression From to the type ToType by following the standard
/// conversion sequence SCS. Returns the converted
/// expression. Flavor is the context in which we're performing this
/// conversion, for use in error messages.
ExprResult
Sema::PerformImplicitConversion(Expr *From, QualType ToType,
const StandardConversionSequence& SCS,
AssignmentAction Action,
CheckedConversionKind CCK) {
bool CStyle = (CCK == CCK_CStyleCast || CCK == CCK_FunctionalCast);
// Overall FIXME: we are recomputing too many types here and doing far too
// much extra work. What this means is that we need to keep track of more
// information that is computed when we try the implicit conversion initially,
// so that we don't need to recompute anything here.
QualType FromType = From->getType();
if (SCS.CopyConstructor) {
// FIXME: When can ToType be a reference type?
assert(!ToType->isReferenceType());
if (SCS.Second == ICK_Derived_To_Base) {
SmallVector<Expr*, 8> ConstructorArgs;
if (CompleteConstructorCall(cast<CXXConstructorDecl>(SCS.CopyConstructor),
From, /*FIXME:ConstructLoc*/SourceLocation(),
ConstructorArgs))
return ExprError();
return BuildCXXConstructExpr(/*FIXME:ConstructLoc*/SourceLocation(),
ToType, SCS.CopyConstructor,
ConstructorArgs,
/*HadMultipleCandidates*/ false,
/*ListInit*/ false, /*ZeroInit*/ false,
CXXConstructExpr::CK_Complete,
SourceRange());
}
return BuildCXXConstructExpr(/*FIXME:ConstructLoc*/SourceLocation(),
ToType, SCS.CopyConstructor,
From, /*HadMultipleCandidates*/ false,
/*ListInit*/ false, /*ZeroInit*/ false,
CXXConstructExpr::CK_Complete,
SourceRange());
}
// Resolve overloaded function references.
if (Context.hasSameType(FromType, Context.OverloadTy)) {
DeclAccessPair Found;
FunctionDecl *Fn = ResolveAddressOfOverloadedFunction(From, ToType,
true, Found);
if (!Fn)
return ExprError();
if (DiagnoseUseOfDecl(Fn, From->getLocStart()))
return ExprError();
From = FixOverloadedFunctionReference(From, Found, Fn);
FromType = From->getType();
}
// If we're converting to an atomic type, first convert to the corresponding
// non-atomic type.
QualType ToAtomicType;
if (const AtomicType *ToAtomic = ToType->getAs<AtomicType>()) {
ToAtomicType = ToType;
ToType = ToAtomic->getValueType();
}
// Perform the first implicit conversion.
switch (SCS.First) {
case ICK_Identity:
// Nothing to do.
break;
case ICK_Lvalue_To_Rvalue: {
assert(From->getObjectKind() != OK_ObjCProperty);
FromType = FromType.getUnqualifiedType();
ExprResult FromRes = DefaultLvalueConversion(From);
assert(!FromRes.isInvalid() && "Can't perform deduced conversion?!");
From = FromRes.take();
break;
}
case ICK_Array_To_Pointer:
FromType = Context.getArrayDecayedType(FromType);
From = ImpCastExprToType(From, FromType, CK_ArrayToPointerDecay,
VK_RValue, /*BasePath=*/0, CCK).take();
break;
case ICK_Function_To_Pointer:
FromType = Context.getPointerType(FromType);
From = ImpCastExprToType(From, FromType, CK_FunctionToPointerDecay,
VK_RValue, /*BasePath=*/0, CCK).take();
break;
default:
llvm_unreachable("Improper first standard conversion");
}
// Perform the second implicit conversion
switch (SCS.Second) {
case ICK_Identity:
// If both sides are functions (or pointers/references to them), there could
// be incompatible exception declarations.
if (CheckExceptionSpecCompatibility(From, ToType))
return ExprError();
// Nothing else to do.
break;
case ICK_NoReturn_Adjustment:
// If both sides are functions (or pointers/references to them), there could
// be incompatible exception declarations.
if (CheckExceptionSpecCompatibility(From, ToType))
return ExprError();
From = ImpCastExprToType(From, ToType, CK_NoOp,
VK_RValue, /*BasePath=*/0, CCK).take();
break;
case ICK_Integral_Promotion:
case ICK_Integral_Conversion:
if (ToType->isBooleanType()) {
assert(FromType->castAs<EnumType>()->getDecl()->isFixed() &&
SCS.Second == ICK_Integral_Promotion &&
"only enums with fixed underlying type can promote to bool");
From = ImpCastExprToType(From, ToType, CK_IntegralToBoolean,
VK_RValue, /*BasePath=*/0, CCK).take();
} else {
From = ImpCastExprToType(From, ToType, CK_IntegralCast,
VK_RValue, /*BasePath=*/0, CCK).take();
}
break;
case ICK_Floating_Promotion:
case ICK_Floating_Conversion:
From = ImpCastExprToType(From, ToType, CK_FloatingCast,
VK_RValue, /*BasePath=*/0, CCK).take();
break;
case ICK_Complex_Promotion:
case ICK_Complex_Conversion: {
QualType FromEl = From->getType()->getAs<ComplexType>()->getElementType();
QualType ToEl = ToType->getAs<ComplexType>()->getElementType();
CastKind CK;
if (FromEl->isRealFloatingType()) {
if (ToEl->isRealFloatingType())
CK = CK_FloatingComplexCast;
else
CK = CK_FloatingComplexToIntegralComplex;
} else if (ToEl->isRealFloatingType()) {
CK = CK_IntegralComplexToFloatingComplex;
} else {
CK = CK_IntegralComplexCast;
}
From = ImpCastExprToType(From, ToType, CK,
VK_RValue, /*BasePath=*/0, CCK).take();
break;
}
case ICK_Floating_Integral:
if (ToType->isRealFloatingType())
From = ImpCastExprToType(From, ToType, CK_IntegralToFloating,
VK_RValue, /*BasePath=*/0, CCK).take();
else
From = ImpCastExprToType(From, ToType, CK_FloatingToIntegral,
VK_RValue, /*BasePath=*/0, CCK).take();
break;
case ICK_Compatible_Conversion:
From = ImpCastExprToType(From, ToType, CK_NoOp,
VK_RValue, /*BasePath=*/0, CCK).take();
break;
case ICK_Writeback_Conversion:
case ICK_Pointer_Conversion: {
if (SCS.IncompatibleObjC && Action != AA_Casting) {
// Diagnose incompatible Objective-C conversions
if (Action == AA_Initializing || Action == AA_Assigning)
Diag(From->getLocStart(),
diag::ext_typecheck_convert_incompatible_pointer)
<< ToType << From->getType() << Action
<< From->getSourceRange() << 0;
else
Diag(From->getLocStart(),
diag::ext_typecheck_convert_incompatible_pointer)
<< From->getType() << ToType << Action
<< From->getSourceRange() << 0;
if (From->getType()->isObjCObjectPointerType() &&
ToType->isObjCObjectPointerType())
EmitRelatedResultTypeNote(From);
}
else if (getLangOpts().ObjCAutoRefCount &&
!CheckObjCARCUnavailableWeakConversion(ToType,
From->getType())) {
if (Action == AA_Initializing)
Diag(From->getLocStart(),
diag::err_arc_weak_unavailable_assign);
else
Diag(From->getLocStart(),
diag::err_arc_convesion_of_weak_unavailable)
<< (Action == AA_Casting) << From->getType() << ToType
<< From->getSourceRange();
}
CastKind Kind = CK_Invalid;
CXXCastPath BasePath;
if (CheckPointerConversion(From, ToType, Kind, BasePath, CStyle))
return ExprError();
// Make sure we extend blocks if necessary.
// FIXME: doing this here is really ugly.
if (Kind == CK_BlockPointerToObjCPointerCast) {
ExprResult E = From;
(void) PrepareCastToObjCObjectPointer(E);
From = E.take();
}
if (getLangOpts().ObjCAutoRefCount)
CheckObjCARCConversion(SourceRange(), ToType, From, CCK);
From = ImpCastExprToType(From, ToType, Kind, VK_RValue, &BasePath, CCK)
.take();
break;
}
case ICK_Pointer_Member: {
CastKind Kind = CK_Invalid;
CXXCastPath BasePath;
if (CheckMemberPointerConversion(From, ToType, Kind, BasePath, CStyle))
return ExprError();
if (CheckExceptionSpecCompatibility(From, ToType))
return ExprError();
From = ImpCastExprToType(From, ToType, Kind, VK_RValue, &BasePath, CCK)
.take();
break;
}
case ICK_Boolean_Conversion:
// Perform half-to-boolean conversion via float.
if (From->getType()->isHalfType()) {
From = ImpCastExprToType(From, Context.FloatTy, CK_FloatingCast).take();
FromType = Context.FloatTy;
}
From = ImpCastExprToType(From, Context.BoolTy,
ScalarTypeToBooleanCastKind(FromType),
VK_RValue, /*BasePath=*/0, CCK).take();
break;
case ICK_Derived_To_Base: {
CXXCastPath BasePath;
if (CheckDerivedToBaseConversion(From->getType(),
ToType.getNonReferenceType(),
From->getLocStart(),
From->getSourceRange(),
&BasePath,
CStyle))
return ExprError();
From = ImpCastExprToType(From, ToType.getNonReferenceType(),
CK_DerivedToBase, From->getValueKind(),
&BasePath, CCK).take();
break;
}
case ICK_Vector_Conversion:
From = ImpCastExprToType(From, ToType, CK_BitCast,
VK_RValue, /*BasePath=*/0, CCK).take();
break;
case ICK_Vector_Splat:
From = ImpCastExprToType(From, ToType, CK_VectorSplat,
VK_RValue, /*BasePath=*/0, CCK).take();
break;
case ICK_Complex_Real:
// Case 1. x -> _Complex y
if (const ComplexType *ToComplex = ToType->getAs<ComplexType>()) {
QualType ElType = ToComplex->getElementType();
bool isFloatingComplex = ElType->isRealFloatingType();
// x -> y
if (Context.hasSameUnqualifiedType(ElType, From->getType())) {
// do nothing
} else if (From->getType()->isRealFloatingType()) {
From = ImpCastExprToType(From, ElType,
isFloatingComplex ? CK_FloatingCast : CK_FloatingToIntegral).take();
} else {
assert(From->getType()->isIntegerType());
From = ImpCastExprToType(From, ElType,
isFloatingComplex ? CK_IntegralToFloating : CK_IntegralCast).take();
}
// y -> _Complex y
From = ImpCastExprToType(From, ToType,
isFloatingComplex ? CK_FloatingRealToComplex
: CK_IntegralRealToComplex).take();
// Case 2. _Complex x -> y
} else {
const ComplexType *FromComplex = From->getType()->getAs<ComplexType>();
assert(FromComplex);
QualType ElType = FromComplex->getElementType();
bool isFloatingComplex = ElType->isRealFloatingType();
// _Complex x -> x
From = ImpCastExprToType(From, ElType,
isFloatingComplex ? CK_FloatingComplexToReal
: CK_IntegralComplexToReal,
VK_RValue, /*BasePath=*/0, CCK).take();
// x -> y
if (Context.hasSameUnqualifiedType(ElType, ToType)) {
// do nothing
} else if (ToType->isRealFloatingType()) {
From = ImpCastExprToType(From, ToType,
isFloatingComplex ? CK_FloatingCast : CK_IntegralToFloating,
VK_RValue, /*BasePath=*/0, CCK).take();
} else {
assert(ToType->isIntegerType());
From = ImpCastExprToType(From, ToType,
isFloatingComplex ? CK_FloatingToIntegral : CK_IntegralCast,
VK_RValue, /*BasePath=*/0, CCK).take();
}
}
break;
case ICK_Block_Pointer_Conversion: {
From = ImpCastExprToType(From, ToType.getUnqualifiedType(), CK_BitCast,
VK_RValue, /*BasePath=*/0, CCK).take();
break;
}
case ICK_TransparentUnionConversion: {
ExprResult FromRes = Owned(From);
Sema::AssignConvertType ConvTy =
CheckTransparentUnionArgumentConstraints(ToType, FromRes);
if (FromRes.isInvalid())
return ExprError();
From = FromRes.take();
assert ((ConvTy == Sema::Compatible) &&
"Improper transparent union conversion");
(void)ConvTy;
break;
}
case ICK_Zero_Event_Conversion:
From = ImpCastExprToType(From, ToType,
CK_ZeroToOCLEvent,
From->getValueKind()).take();
break;
case ICK_Lvalue_To_Rvalue:
case ICK_Array_To_Pointer:
case ICK_Function_To_Pointer:
case ICK_Qualification:
case ICK_Num_Conversion_Kinds:
llvm_unreachable("Improper second standard conversion");
}
switch (SCS.Third) {
case ICK_Identity:
// Nothing to do.
break;
case ICK_Qualification: {
// The qualification keeps the category of the inner expression, unless the
// target type isn't a reference.
ExprValueKind VK = ToType->isReferenceType() ?
From->getValueKind() : VK_RValue;
From = ImpCastExprToType(From, ToType.getNonLValueExprType(Context),
CK_NoOp, VK, /*BasePath=*/0, CCK).take();
if (SCS.DeprecatedStringLiteralToCharPtr &&
!getLangOpts().WritableStrings)
Diag(From->getLocStart(), diag::warn_deprecated_string_literal_conversion)
<< ToType.getNonReferenceType();
break;
}
default:
llvm_unreachable("Improper third standard conversion");
}
// If this conversion sequence involved a scalar -> atomic conversion, perform
// that conversion now.
if (!ToAtomicType.isNull()) {
assert(Context.hasSameType(
ToAtomicType->castAs<AtomicType>()->getValueType(), From->getType()));
From = ImpCastExprToType(From, ToAtomicType, CK_NonAtomicToAtomic,
VK_RValue, 0, CCK).take();
}
return Owned(From);
}
ExprResult Sema::ActOnUnaryTypeTrait(UnaryTypeTrait UTT,
SourceLocation KWLoc,
ParsedType Ty,
SourceLocation RParen) {
TypeSourceInfo *TSInfo;
QualType T = GetTypeFromParser(Ty, &TSInfo);
if (!TSInfo)
TSInfo = Context.getTrivialTypeSourceInfo(T);
return BuildUnaryTypeTrait(UTT, KWLoc, TSInfo, RParen);
}
/// \brief Check the completeness of a type in a unary type trait.
///
/// If the particular type trait requires a complete type, tries to complete
/// it. If completing the type fails, a diagnostic is emitted and false
/// returned. If completing the type succeeds or no completion was required,
/// returns true.
static bool CheckUnaryTypeTraitTypeCompleteness(Sema &S,
UnaryTypeTrait UTT,
SourceLocation Loc,
QualType ArgTy) {
// C++0x [meta.unary.prop]p3:
// For all of the class templates X declared in this Clause, instantiating
// that template with a template argument that is a class template
// specialization may result in the implicit instantiation of the template
// argument if and only if the semantics of X require that the argument
// must be a complete type.
// We apply this rule to all the type trait expressions used to implement
// these class templates. We also try to follow any GCC documented behavior
// in these expressions to ensure portability of standard libraries.
switch (UTT) {
// is_complete_type somewhat obviously cannot require a complete type.
case UTT_IsCompleteType:
// Fall-through
// These traits are modeled on the type predicates in C++0x
// [meta.unary.cat] and [meta.unary.comp]. They are not specified as
// requiring a complete type, as whether or not they return true cannot be
// impacted by the completeness of the type.
case UTT_IsVoid:
case UTT_IsIntegral:
case UTT_IsFloatingPoint:
case UTT_IsArray:
case UTT_IsPointer:
case UTT_IsLvalueReference:
case UTT_IsRvalueReference:
case UTT_IsMemberFunctionPointer:
case UTT_IsMemberObjectPointer:
case UTT_IsEnum:
case UTT_IsUnion:
case UTT_IsClass:
case UTT_IsFunction:
case UTT_IsReference:
case UTT_IsArithmetic:
case UTT_IsFundamental:
case UTT_IsObject:
case UTT_IsScalar:
case UTT_IsCompound:
case UTT_IsMemberPointer:
// Fall-through
// These traits are modeled on type predicates in C++0x [meta.unary.prop]
// which requires some of its traits to have the complete type. However,
// the completeness of the type cannot impact these traits' semantics, and
// so they don't require it. This matches the comments on these traits in
// Table 49.
case UTT_IsConst:
case UTT_IsVolatile:
case UTT_IsSigned:
case UTT_IsUnsigned:
return true;
// C++0x [meta.unary.prop] Table 49 requires the following traits to be
// applied to a complete type.
case UTT_IsTrivial:
case UTT_IsTriviallyCopyable:
case UTT_IsStandardLayout:
case UTT_IsPOD:
case UTT_IsLiteral:
case UTT_IsEmpty:
case UTT_IsPolymorphic:
case UTT_IsAbstract:
case UTT_IsInterfaceClass:
// Fall-through
// These traits require a complete type.
case UTT_IsFinal:
case UTT_IsSealed:
// These trait expressions are designed to help implement predicates in
// [meta.unary.prop] despite not being named the same. They are specified
// by both GCC and the Embarcadero C++ compiler, and require the complete
// type due to the overarching C++0x type predicates being implemented
// requiring the complete type.
case UTT_HasNothrowAssign:
case UTT_HasNothrowMoveAssign:
case UTT_HasNothrowConstructor:
case UTT_HasNothrowCopy:
case UTT_HasTrivialAssign:
case UTT_HasTrivialMoveAssign:
case UTT_HasTrivialDefaultConstructor:
case UTT_HasTrivialMoveConstructor:
case UTT_HasTrivialCopy:
case UTT_HasTrivialDestructor:
case UTT_HasVirtualDestructor:
// Arrays of unknown bound are expressly allowed.
QualType ElTy = ArgTy;
if (ArgTy->isIncompleteArrayType())
ElTy = S.Context.getAsArrayType(ArgTy)->getElementType();
// The void type is expressly allowed.
if (ElTy->isVoidType())
return true;
return !S.RequireCompleteType(
Loc, ElTy, diag::err_incomplete_type_used_in_type_trait_expr);
}
llvm_unreachable("Type trait not handled by switch");
}
static bool HasNoThrowOperator(const RecordType *RT, OverloadedOperatorKind Op,
Sema &Self, SourceLocation KeyLoc, ASTContext &C,
bool (CXXRecordDecl::*HasTrivial)() const,
bool (CXXRecordDecl::*HasNonTrivial)() const,
bool (CXXMethodDecl::*IsDesiredOp)() const)
{
CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
if ((RD->*HasTrivial)() && !(RD->*HasNonTrivial)())
return true;
DeclarationName Name = C.DeclarationNames.getCXXOperatorName(Op);
DeclarationNameInfo NameInfo(Name, KeyLoc);
LookupResult Res(Self, NameInfo, Sema::LookupOrdinaryName);
if (Self.LookupQualifiedName(Res, RD)) {
bool FoundOperator = false;
Res.suppressDiagnostics();
for (LookupResult::iterator Op = Res.begin(), OpEnd = Res.end();
Op != OpEnd; ++Op) {
if (isa<FunctionTemplateDecl>(*Op))
continue;
CXXMethodDecl *Operator = cast<CXXMethodDecl>(*Op);
if((Operator->*IsDesiredOp)()) {
FoundOperator = true;
const FunctionProtoType *CPT =
Operator->getType()->getAs<FunctionProtoType>();
CPT = Self.ResolveExceptionSpec(KeyLoc, CPT);
if (!CPT || !CPT->isNothrow(Self.Context))
return false;
}
}
return FoundOperator;
}
return false;
}
static bool EvaluateUnaryTypeTrait(Sema &Self, UnaryTypeTrait UTT,
SourceLocation KeyLoc, QualType T) {
assert(!T->isDependentType() && "Cannot evaluate traits of dependent type");
ASTContext &C = Self.Context;
switch(UTT) {
// Type trait expressions corresponding to the primary type category
// predicates in C++0x [meta.unary.cat].
case UTT_IsVoid:
return T->isVoidType();
case UTT_IsIntegral:
return T->isIntegralType(C);
case UTT_IsFloatingPoint:
return T->isFloatingType();
case UTT_IsArray:
return T->isArrayType();
case UTT_IsPointer:
return T->isPointerType();
case UTT_IsLvalueReference:
return T->isLValueReferenceType();
case UTT_IsRvalueReference:
return T->isRValueReferenceType();
case UTT_IsMemberFunctionPointer:
return T->isMemberFunctionPointerType();
case UTT_IsMemberObjectPointer:
return T->isMemberDataPointerType();
case UTT_IsEnum:
return T->isEnumeralType();
case UTT_IsUnion:
return T->isUnionType();
case UTT_IsClass:
return T->isClassType() || T->isStructureType() || T->isInterfaceType();
case UTT_IsFunction:
return T->isFunctionType();
// Type trait expressions which correspond to the convenient composition
// predicates in C++0x [meta.unary.comp].
case UTT_IsReference:
return T->isReferenceType();
case UTT_IsArithmetic:
return T->isArithmeticType() && !T->isEnumeralType();
case UTT_IsFundamental:
return T->isFundamentalType();
case UTT_IsObject:
return T->isObjectType();
case UTT_IsScalar:
// Note: semantic analysis depends on Objective-C lifetime types to be
// considered scalar types. However, such types do not actually behave
// like scalar types at run time (since they may require retain/release
// operations), so we report them as non-scalar.
if (T->isObjCLifetimeType()) {
switch (T.getObjCLifetime()) {
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
return true;
case Qualifiers::OCL_Strong:
case Qualifiers::OCL_Weak:
case Qualifiers::OCL_Autoreleasing:
return false;
}
}
return T->isScalarType();
case UTT_IsCompound:
return T->isCompoundType();
case UTT_IsMemberPointer:
return T->isMemberPointerType();
// Type trait expressions which correspond to the type property predicates
// in C++0x [meta.unary.prop].
case UTT_IsConst:
return T.isConstQualified();
case UTT_IsVolatile:
return T.isVolatileQualified();
case UTT_IsTrivial:
return T.isTrivialType(Self.Context);
case UTT_IsTriviallyCopyable:
return T.isTriviallyCopyableType(Self.Context);
case UTT_IsStandardLayout:
return T->isStandardLayoutType();
case UTT_IsPOD:
return T.isPODType(Self.Context);
case UTT_IsLiteral:
return T->isLiteralType(Self.Context);
case UTT_IsEmpty:
if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
return !RD->isUnion() && RD->isEmpty();
return false;
case UTT_IsPolymorphic:
if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
return RD->isPolymorphic();
return false;
case UTT_IsAbstract:
if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
return RD->isAbstract();
return false;
case UTT_IsInterfaceClass:
if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
return RD->isInterface();
return false;
case UTT_IsFinal:
if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
return RD->hasAttr<FinalAttr>();
return false;
case UTT_IsSealed:
if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
if (FinalAttr *FA = RD->getAttr<FinalAttr>())
return FA->isSpelledAsSealed();
return false;
case UTT_IsSigned:
return T->isSignedIntegerType();
case UTT_IsUnsigned:
return T->isUnsignedIntegerType();
// Type trait expressions which query classes regarding their construction,
// destruction, and copying. Rather than being based directly on the
// related type predicates in the standard, they are specified by both
// GCC[1] and the Embarcadero C++ compiler[2], and Clang implements those
// specifications.
//
// 1: http://gcc.gnu/.org/onlinedocs/gcc/Type-Traits.html
// 2: http://docwiki.embarcadero.com/RADStudio/XE/en/Type_Trait_Functions_(C%2B%2B0x)_Index
//
// Note that these builtins do not behave as documented in g++: if a class
// has both a trivial and a non-trivial special member of a particular kind,
// they return false! For now, we emulate this behavior.
// FIXME: This appears to be a g++ bug: more complex cases reveal that it
// does not correctly compute triviality in the presence of multiple special
// members of the same kind. Revisit this once the g++ bug is fixed.
case UTT_HasTrivialDefaultConstructor:
// http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html:
// If __is_pod (type) is true then the trait is true, else if type is
// a cv class or union type (or array thereof) with a trivial default
// constructor ([class.ctor]) then the trait is true, else it is false.
if (T.isPODType(Self.Context))
return true;
if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl())
return RD->hasTrivialDefaultConstructor() &&
!RD->hasNonTrivialDefaultConstructor();
return false;
case UTT_HasTrivialMoveConstructor:
// This trait is implemented by MSVC 2012 and needed to parse the
// standard library headers. Specifically this is used as the logic
// behind std::is_trivially_move_constructible (20.9.4.3).
if (T.isPODType(Self.Context))
return true;
if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl())
return RD->hasTrivialMoveConstructor() && !RD->hasNonTrivialMoveConstructor();
return false;
case UTT_HasTrivialCopy:
// http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html:
// If __is_pod (type) is true or type is a reference type then
// the trait is true, else if type is a cv class or union type
// with a trivial copy constructor ([class.copy]) then the trait
// is true, else it is false.
if (T.isPODType(Self.Context) || T->isReferenceType())
return true;
if (CXXRecordDecl *RD = T->getAsCXXRecordDecl())
return RD->hasTrivialCopyConstructor() &&
!RD->hasNonTrivialCopyConstructor();
return false;
case UTT_HasTrivialMoveAssign:
// This trait is implemented by MSVC 2012 and needed to parse the
// standard library headers. Specifically it is used as the logic
// behind std::is_trivially_move_assignable (20.9.4.3)
if (T.isPODType(Self.Context))
return true;
if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl())
return RD->hasTrivialMoveAssignment() && !RD->hasNonTrivialMoveAssignment();
return false;
case UTT_HasTrivialAssign:
// http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html:
// If type is const qualified or is a reference type then the
// trait is false. Otherwise if __is_pod (type) is true then the
// trait is true, else if type is a cv class or union type with
// a trivial copy assignment ([class.copy]) then the trait is
// true, else it is false.
// Note: the const and reference restrictions are interesting,
// given that const and reference members don't prevent a class
// from having a trivial copy assignment operator (but do cause
// errors if the copy assignment operator is actually used, q.v.
// [class.copy]p12).
if (T.isConstQualified())
return false;
if (T.isPODType(Self.Context))
return true;
if (CXXRecordDecl *RD = T->getAsCXXRecordDecl())
return RD->hasTrivialCopyAssignment() &&
!RD->hasNonTrivialCopyAssignment();
return false;
case UTT_HasTrivialDestructor:
// http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html:
// If __is_pod (type) is true or type is a reference type
// then the trait is true, else if type is a cv class or union
// type (or array thereof) with a trivial destructor
// ([class.dtor]) then the trait is true, else it is
// false.
if (T.isPODType(Self.Context) || T->isReferenceType())
return true;
// Objective-C++ ARC: autorelease types don't require destruction.
if (T->isObjCLifetimeType() &&
T.getObjCLifetime() == Qualifiers::OCL_Autoreleasing)
return true;
if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl())
return RD->hasTrivialDestructor();
return false;
// TODO: Propagate nothrowness for implicitly declared special members.
case UTT_HasNothrowAssign:
// http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html:
// If type is const qualified or is a reference type then the
// trait is false. Otherwise if __has_trivial_assign (type)
// is true then the trait is true, else if type is a cv class
// or union type with copy assignment operators that are known
// not to throw an exception then the trait is true, else it is
// false.
if (C.getBaseElementType(T).isConstQualified())
return false;
if (T->isReferenceType())
return false;
if (T.isPODType(Self.Context) || T->isObjCLifetimeType())
return true;
if (const RecordType *RT = T->getAs<RecordType>())
return HasNoThrowOperator(RT, OO_Equal, Self, KeyLoc, C,
&CXXRecordDecl::hasTrivialCopyAssignment,
&CXXRecordDecl::hasNonTrivialCopyAssignment,
&CXXMethodDecl::isCopyAssignmentOperator);
return false;
case UTT_HasNothrowMoveAssign:
// This trait is implemented by MSVC 2012 and needed to parse the
// standard library headers. Specifically this is used as the logic
// behind std::is_nothrow_move_assignable (20.9.4.3).
if (T.isPODType(Self.Context))
return true;
if (const RecordType *RT = C.getBaseElementType(T)->getAs<RecordType>())
return HasNoThrowOperator(RT, OO_Equal, Self, KeyLoc, C,
&CXXRecordDecl::hasTrivialMoveAssignment,
&CXXRecordDecl::hasNonTrivialMoveAssignment,
&CXXMethodDecl::isMoveAssignmentOperator);
return false;
case UTT_HasNothrowCopy:
// http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html:
// If __has_trivial_copy (type) is true then the trait is true, else
// if type is a cv class or union type with copy constructors that are
// known not to throw an exception then the trait is true, else it is
// false.
if (T.isPODType(C) || T->isReferenceType() || T->isObjCLifetimeType())
return true;
if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) {
if (RD->hasTrivialCopyConstructor() &&
!RD->hasNonTrivialCopyConstructor())
return true;
bool FoundConstructor = false;
unsigned FoundTQs;
DeclContext::lookup_const_result R = Self.LookupConstructors(RD);
for (DeclContext::lookup_const_iterator Con = R.begin(),
ConEnd = R.end(); Con != ConEnd; ++Con) {
// A template constructor is never a copy constructor.
// FIXME: However, it may actually be selected at the actual overload
// resolution point.
if (isa<FunctionTemplateDecl>(*Con))
continue;
CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(*Con);
if (Constructor->isCopyConstructor(FoundTQs)) {
FoundConstructor = true;
const FunctionProtoType *CPT
= Constructor->getType()->getAs<FunctionProtoType>();
CPT = Self.ResolveExceptionSpec(KeyLoc, CPT);
if (!CPT)
return false;
// FIXME: check whether evaluating default arguments can throw.
// For now, we'll be conservative and assume that they can throw.
if (!CPT->isNothrow(Self.Context) || CPT->getNumArgs() > 1)
return false;
}
}
return FoundConstructor;
}
return false;
case UTT_HasNothrowConstructor:
// http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html:
// If __has_trivial_constructor (type) is true then the trait is
// true, else if type is a cv class or union type (or array
// thereof) with a default constructor that is known not to
// throw an exception then the trait is true, else it is false.
if (T.isPODType(C) || T->isObjCLifetimeType())
return true;
if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl()) {
if (RD->hasTrivialDefaultConstructor() &&
!RD->hasNonTrivialDefaultConstructor())
return true;
DeclContext::lookup_const_result R = Self.LookupConstructors(RD);
for (DeclContext::lookup_const_iterator Con = R.begin(),
ConEnd = R.end(); Con != ConEnd; ++Con) {
// FIXME: In C++0x, a constructor template can be a default constructor.
if (isa<FunctionTemplateDecl>(*Con))
continue;
CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(*Con);
if (Constructor->isDefaultConstructor()) {
const FunctionProtoType *CPT
= Constructor->getType()->getAs<FunctionProtoType>();
CPT = Self.ResolveExceptionSpec(KeyLoc, CPT);
if (!CPT)
return false;
// TODO: check whether evaluating default arguments can throw.
// For now, we'll be conservative and assume that they can throw.
return CPT->isNothrow(Self.Context) && CPT->getNumArgs() == 0;
}
}
}
return false;
case UTT_HasVirtualDestructor:
// http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html:
// If type is a class type with a virtual destructor ([class.dtor])
// then the trait is true, else it is false.
if (CXXRecordDecl *RD = T->getAsCXXRecordDecl())
if (CXXDestructorDecl *Destructor = Self.LookupDestructor(RD))
return Destructor->isVirtual();
return false;
// These type trait expressions are modeled on the specifications for the
// Embarcadero C++0x type trait functions:
// http://docwiki.embarcadero.com/RADStudio/XE/en/Type_Trait_Functions_(C%2B%2B0x)_Index
case UTT_IsCompleteType:
// http://docwiki.embarcadero.com/RADStudio/XE/en/Is_complete_type_(typename_T_):
// Returns True if and only if T is a complete type at the point of the
// function call.
return !T->isIncompleteType();
}
llvm_unreachable("Type trait not covered by switch");
}
ExprResult Sema::BuildUnaryTypeTrait(UnaryTypeTrait UTT,
SourceLocation KWLoc,
TypeSourceInfo *TSInfo,
SourceLocation RParen) {
QualType T = TSInfo->getType();
if (!CheckUnaryTypeTraitTypeCompleteness(*this, UTT, KWLoc, T))
return ExprError();
bool Value = false;
if (!T->isDependentType())
Value = EvaluateUnaryTypeTrait(*this, UTT, KWLoc, T);
return Owned(new (Context) UnaryTypeTraitExpr(KWLoc, UTT, TSInfo, Value,
RParen, Context.BoolTy));
}
ExprResult Sema::ActOnBinaryTypeTrait(BinaryTypeTrait BTT,
SourceLocation KWLoc,
ParsedType LhsTy,
ParsedType RhsTy,
SourceLocation RParen) {
TypeSourceInfo *LhsTSInfo;
QualType LhsT = GetTypeFromParser(LhsTy, &LhsTSInfo);
if (!LhsTSInfo)
LhsTSInfo = Context.getTrivialTypeSourceInfo(LhsT);
TypeSourceInfo *RhsTSInfo;
QualType RhsT = GetTypeFromParser(RhsTy, &RhsTSInfo);
if (!RhsTSInfo)
RhsTSInfo = Context.getTrivialTypeSourceInfo(RhsT);
return BuildBinaryTypeTrait(BTT, KWLoc, LhsTSInfo, RhsTSInfo, RParen);
}
/// \brief Determine whether T has a non-trivial Objective-C lifetime in
/// ARC mode.
static bool hasNontrivialObjCLifetime(QualType T) {
switch (T.getObjCLifetime()) {
case Qualifiers::OCL_ExplicitNone:
return false;
case Qualifiers::OCL_Strong:
case Qualifiers::OCL_Weak:
case Qualifiers::OCL_Autoreleasing:
return true;
case Qualifiers::OCL_None:
return T->isObjCLifetimeType();
}
llvm_unreachable("Unknown ObjC lifetime qualifier");
}
static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc,
ArrayRef<TypeSourceInfo *> Args,
SourceLocation RParenLoc) {
switch (Kind) {
case clang::TT_IsTriviallyConstructible: {
// C++11 [meta.unary.prop]:
// is_trivially_constructible is defined as:
//
// is_constructible<T, Args...>::value is true and the variable
// definition for is_constructible, as defined below, is known to call
// no operation that is not trivial.
//
// The predicate condition for a template specialization
// is_constructible<T, Args...> shall be satisfied if and only if the
// following variable definition would be well-formed for some invented
// variable t:
//
// T t(create<Args>()...);
if (Args.empty()) {
S.Diag(KWLoc, diag::err_type_trait_arity)
<< 1 << 1 << 1 << (int)Args.size();
return false;
}
// Precondition: T and all types in the parameter pack Args shall be
// complete types, (possibly cv-qualified) void, or arrays of
// unknown bound.
for (unsigned I = 0, N = Args.size(); I != N; ++I) {
QualType ArgTy = Args[I]->getType();
if (ArgTy->isVoidType() || ArgTy->isIncompleteArrayType())
continue;
if (S.RequireCompleteType(KWLoc, ArgTy,
diag::err_incomplete_type_used_in_type_trait_expr))
return false;
}
// Make sure the first argument is a complete type.
if (Args[0]->getType()->isIncompleteType())
return false;
SmallVector<OpaqueValueExpr, 2> OpaqueArgExprs;
SmallVector<Expr *, 2> ArgExprs;
ArgExprs.reserve(Args.size() - 1);
for (unsigned I = 1, N = Args.size(); I != N; ++I) {
QualType T = Args[I]->getType();
if (T->isObjectType() || T->isFunctionType())
T = S.Context.getRValueReferenceType(T);
OpaqueArgExprs.push_back(
OpaqueValueExpr(Args[I]->getTypeLoc().getLocStart(),
T.getNonLValueExprType(S.Context),
Expr::getValueKindForType(T)));
ArgExprs.push_back(&OpaqueArgExprs.back());
}
// Perform the initialization in an unevaluated context within a SFINAE
// trap at translation unit scope.
EnterExpressionEvaluationContext Unevaluated(S, Sema::Unevaluated);
Sema::SFINAETrap SFINAE(S, /*AccessCheckingSFINAE=*/true);
Sema::ContextRAII TUContext(S, S.Context.getTranslationUnitDecl());
InitializedEntity To(InitializedEntity::InitializeTemporary(Args[0]));
InitializationKind InitKind(InitializationKind::CreateDirect(KWLoc, KWLoc,
RParenLoc));
InitializationSequence Init(S, To, InitKind, ArgExprs);
if (Init.Failed())
return false;
ExprResult Result = Init.Perform(S, To, InitKind, ArgExprs);
if (Result.isInvalid() || SFINAE.hasErrorOccurred())
return false;
// Under Objective-C ARC, if the destination has non-trivial Objective-C
// lifetime, this is a non-trivial construction.
if (S.getLangOpts().ObjCAutoRefCount &&
hasNontrivialObjCLifetime(Args[0]->getType().getNonReferenceType()))
return false;
// The initialization succeeded; now make sure there are no non-trivial
// calls.
return !Result.get()->hasNonTrivialCall(S.Context);
}
}
return false;
}
ExprResult Sema::BuildTypeTrait(TypeTrait Kind, SourceLocation KWLoc,
ArrayRef<TypeSourceInfo *> Args,
SourceLocation RParenLoc) {
bool Dependent = false;
for (unsigned I = 0, N = Args.size(); I != N; ++I) {
if (Args[I]->getType()->isDependentType()) {
Dependent = true;
break;
}
}
bool Value = false;
if (!Dependent)
Value = evaluateTypeTrait(*this, Kind, KWLoc, Args, RParenLoc);
return TypeTraitExpr::Create(Context, Context.BoolTy, KWLoc, Kind,
Args, RParenLoc, Value);
}
ExprResult Sema::ActOnTypeTrait(TypeTrait Kind, SourceLocation KWLoc,
ArrayRef<ParsedType> Args,
SourceLocation RParenLoc) {
SmallVector<TypeSourceInfo *, 4> ConvertedArgs;
ConvertedArgs.reserve(Args.size());
for (unsigned I = 0, N = Args.size(); I != N; ++I) {
TypeSourceInfo *TInfo;
QualType T = GetTypeFromParser(Args[I], &TInfo);
if (!TInfo)
TInfo = Context.getTrivialTypeSourceInfo(T, KWLoc);
ConvertedArgs.push_back(TInfo);
}
return BuildTypeTrait(Kind, KWLoc, ConvertedArgs, RParenLoc);
}
static bool EvaluateBinaryTypeTrait(Sema &Self, BinaryTypeTrait BTT,
QualType LhsT, QualType RhsT,
SourceLocation KeyLoc) {
assert(!LhsT->isDependentType() && !RhsT->isDependentType() &&
"Cannot evaluate traits of dependent types");
switch(BTT) {
case BTT_IsBaseOf: {
// C++0x [meta.rel]p2
// Base is a base class of Derived without regard to cv-qualifiers or
// Base and Derived are not unions and name the same class type without
// regard to cv-qualifiers.
const RecordType *lhsRecord = LhsT->getAs<RecordType>();
if (!lhsRecord) return false;
const RecordType *rhsRecord = RhsT->getAs<RecordType>();
if (!rhsRecord) return false;
assert(Self.Context.hasSameUnqualifiedType(LhsT, RhsT)
== (lhsRecord == rhsRecord));
if (lhsRecord == rhsRecord)
return !lhsRecord->getDecl()->isUnion();
// C++0x [meta.rel]p2:
// If Base and Derived are class types and are different types
// (ignoring possible cv-qualifiers) then Derived shall be a
// complete type.
if (Self.RequireCompleteType(KeyLoc, RhsT,
diag::err_incomplete_type_used_in_type_trait_expr))
return false;
return cast<CXXRecordDecl>(rhsRecord->getDecl())
->isDerivedFrom(cast<CXXRecordDecl>(lhsRecord->getDecl()));
}
case BTT_IsSame:
return Self.Context.hasSameType(LhsT, RhsT);
case BTT_TypeCompatible:
return Self.Context.typesAreCompatible(LhsT.getUnqualifiedType(),
RhsT.getUnqualifiedType());
case BTT_IsConvertible:
case BTT_IsConvertibleTo: {
// C++0x [meta.rel]p4:
// Given the following function prototype:
//
// template <class T>
// typename add_rvalue_reference<T>::type create();
//
// the predicate condition for a template specialization
// is_convertible<From, To> shall be satisfied if and only if
// the return expression in the following code would be
// well-formed, including any implicit conversions to the return
// type of the function:
//
// To test() {
// return create<From>();
// }
//
// Access checking is performed as if in a context unrelated to To and
// From. Only the validity of the immediate context of the expression
// of the return-statement (including conversions to the return type)
// is considered.
//
// We model the initialization as a copy-initialization of a temporary
// of the appropriate type, which for this expression is identical to the
// return statement (since NRVO doesn't apply).
// Functions aren't allowed to return function or array types.
if (RhsT->isFunctionType() || RhsT->isArrayType())
return false;
// A return statement in a void function must have void type.
if (RhsT->isVoidType())
return LhsT->isVoidType();
// A function definition requires a complete, non-abstract return type.
if (Self.RequireCompleteType(KeyLoc, RhsT, 0) ||
Self.RequireNonAbstractType(KeyLoc, RhsT, 0))
return false;
// Compute the result of add_rvalue_reference.
if (LhsT->isObjectType() || LhsT->isFunctionType())
LhsT = Self.Context.getRValueReferenceType(LhsT);
// Build a fake source and destination for initialization.
InitializedEntity To(InitializedEntity::InitializeTemporary(RhsT));
OpaqueValueExpr From(KeyLoc, LhsT.getNonLValueExprType(Self.Context),
Expr::getValueKindForType(LhsT));
Expr *FromPtr = &From;
InitializationKind Kind(InitializationKind::CreateCopy(KeyLoc,
SourceLocation()));
// Perform the initialization in an unevaluated context within a SFINAE
// trap at translation unit scope.
EnterExpressionEvaluationContext Unevaluated(Self, Sema::Unevaluated);
Sema::SFINAETrap SFINAE(Self, /*AccessCheckingSFINAE=*/true);
Sema::ContextRAII TUContext(Self, Self.Context.getTranslationUnitDecl());
InitializationSequence Init(Self, To, Kind, FromPtr);
if (Init.Failed())
return false;
ExprResult Result = Init.Perform(Self, To, Kind, FromPtr);
return !Result.isInvalid() && !SFINAE.hasErrorOccurred();
}
case BTT_IsTriviallyAssignable: {
// C++11 [meta.unary.prop]p3:
// is_trivially_assignable is defined as:
// is_assignable<T, U>::value is true and the assignment, as defined by
// is_assignable, is known to call no operation that is not trivial
//
// is_assignable is defined as:
// The expression declval<T>() = declval<U>() is well-formed when
// treated as an unevaluated operand (Clause 5).
//
// For both, T and U shall be complete types, (possibly cv-qualified)
// void, or arrays of unknown bound.
if (!LhsT->isVoidType() && !LhsT->isIncompleteArrayType() &&
Self.RequireCompleteType(KeyLoc, LhsT,
diag::err_incomplete_type_used_in_type_trait_expr))
return false;
if (!RhsT->isVoidType() && !RhsT->isIncompleteArrayType() &&
Self.RequireCompleteType(KeyLoc, RhsT,
diag::err_incomplete_type_used_in_type_trait_expr))
return false;
// cv void is never assignable.
if (LhsT->isVoidType() || RhsT->isVoidType())
return false;
// Build expressions that emulate the effect of declval<T>() and
// declval<U>().
if (LhsT->isObjectType() || LhsT->isFunctionType())
LhsT = Self.Context.getRValueReferenceType(LhsT);
if (RhsT->isObjectType() || RhsT->isFunctionType())
RhsT = Self.Context.getRValueReferenceType(RhsT);
OpaqueValueExpr Lhs(KeyLoc, LhsT.getNonLValueExprType(Self.Context),
Expr::getValueKindForType(LhsT));
OpaqueValueExpr Rhs(KeyLoc, RhsT.getNonLValueExprType(Self.Context),
Expr::getValueKindForType(RhsT));
// Attempt the assignment in an unevaluated context within a SFINAE
// trap at translation unit scope.
EnterExpressionEvaluationContext Unevaluated(Self, Sema::Unevaluated);
Sema::SFINAETrap SFINAE(Self, /*AccessCheckingSFINAE=*/true);
Sema::ContextRAII TUContext(Self, Self.Context.getTranslationUnitDecl());
ExprResult Result = Self.BuildBinOp(/*S=*/0, KeyLoc, BO_Assign, &Lhs, &Rhs);
if (Result.isInvalid() || SFINAE.hasErrorOccurred())
return false;
// Under Objective-C ARC, if the destination has non-trivial Objective-C
// lifetime, this is a non-trivial assignment.
if (Self.getLangOpts().ObjCAutoRefCount &&
hasNontrivialObjCLifetime(LhsT.getNonReferenceType()))
return false;
return !Result.get()->hasNonTrivialCall(Self.Context);
}
}
llvm_unreachable("Unknown type trait or not implemented");
}
ExprResult Sema::BuildBinaryTypeTrait(BinaryTypeTrait BTT,
SourceLocation KWLoc,
TypeSourceInfo *LhsTSInfo,
TypeSourceInfo *RhsTSInfo,
SourceLocation RParen) {
QualType LhsT = LhsTSInfo->getType();
QualType RhsT = RhsTSInfo->getType();
if (BTT == BTT_TypeCompatible) {
if (getLangOpts().CPlusPlus) {
Diag(KWLoc, diag::err_types_compatible_p_in_cplusplus)
<< SourceRange(KWLoc, RParen);
return ExprError();
}
}
bool Value = false;
if (!LhsT->isDependentType() && !RhsT->isDependentType())
Value = EvaluateBinaryTypeTrait(*this, BTT, LhsT, RhsT, KWLoc);
// Select trait result type.
QualType ResultType;
switch (BTT) {
case BTT_IsBaseOf: ResultType = Context.BoolTy; break;
case BTT_IsConvertible: ResultType = Context.BoolTy; break;
case BTT_IsSame: ResultType = Context.BoolTy; break;
case BTT_TypeCompatible: ResultType = Context.IntTy; break;
case BTT_IsConvertibleTo: ResultType = Context.BoolTy; break;
case BTT_IsTriviallyAssignable: ResultType = Context.BoolTy;
}
return Owned(new (Context) BinaryTypeTraitExpr(KWLoc, BTT, LhsTSInfo,
RhsTSInfo, Value, RParen,
ResultType));
}
ExprResult Sema::ActOnArrayTypeTrait(ArrayTypeTrait ATT,
SourceLocation KWLoc,
ParsedType Ty,
Expr* DimExpr,
SourceLocation RParen) {
TypeSourceInfo *TSInfo;
QualType T = GetTypeFromParser(Ty, &TSInfo);
if (!TSInfo)
TSInfo = Context.getTrivialTypeSourceInfo(T);
return BuildArrayTypeTrait(ATT, KWLoc, TSInfo, DimExpr, RParen);
}
static uint64_t EvaluateArrayTypeTrait(Sema &Self, ArrayTypeTrait ATT,
QualType T, Expr *DimExpr,
SourceLocation KeyLoc) {
assert(!T->isDependentType() && "Cannot evaluate traits of dependent type");
switch(ATT) {
case ATT_ArrayRank:
if (T->isArrayType()) {
unsigned Dim = 0;
while (const ArrayType *AT = Self.Context.getAsArrayType(T)) {
++Dim;
T = AT->getElementType();
}
return Dim;
}
return 0;
case ATT_ArrayExtent: {
llvm::APSInt Value;
uint64_t Dim;
if (Self.VerifyIntegerConstantExpression(DimExpr, &Value,
diag::err_dimension_expr_not_constant_integer,
false).isInvalid())
return 0;
if (Value.isSigned() && Value.isNegative()) {
Self.Diag(KeyLoc, diag::err_dimension_expr_not_constant_integer)
<< DimExpr->getSourceRange();
return 0;
}
Dim = Value.getLimitedValue();
if (T->isArrayType()) {
unsigned D = 0;
bool Matched = false;
while (const ArrayType *AT = Self.Context.getAsArrayType(T)) {
if (Dim == D) {
Matched = true;
break;
}
++D;
T = AT->getElementType();
}
if (Matched && T->isArrayType()) {
if (const ConstantArrayType *CAT = Self.Context.getAsConstantArrayType(T))
return CAT->getSize().getLimitedValue();
}
}
return 0;
}
}
llvm_unreachable("Unknown type trait or not implemented");
}
ExprResult Sema::BuildArrayTypeTrait(ArrayTypeTrait ATT,
SourceLocation KWLoc,
TypeSourceInfo *TSInfo,
Expr* DimExpr,
SourceLocation RParen) {
QualType T = TSInfo->getType();
// FIXME: This should likely be tracked as an APInt to remove any host
// assumptions about the width of size_t on the target.
uint64_t Value = 0;
if (!T->isDependentType())
Value = EvaluateArrayTypeTrait(*this, ATT, T, DimExpr, KWLoc);
// While the specification for these traits from the Embarcadero C++
// compiler's documentation says the return type is 'unsigned int', Clang
// returns 'size_t'. On Windows, the primary platform for the Embarcadero
// compiler, there is no difference. On several other platforms this is an
// important distinction.
return Owned(new (Context) ArrayTypeTraitExpr(KWLoc, ATT, TSInfo, Value,
DimExpr, RParen,
Context.getSizeType()));
}
ExprResult Sema::ActOnExpressionTrait(ExpressionTrait ET,
SourceLocation KWLoc,
Expr *Queried,
SourceLocation RParen) {
// If error parsing the expression, ignore.
if (!Queried)
return ExprError();
ExprResult Result = BuildExpressionTrait(ET, KWLoc, Queried, RParen);
return Result;
}
static bool EvaluateExpressionTrait(ExpressionTrait ET, Expr *E) {
switch (ET) {
case ET_IsLValueExpr: return E->isLValue();
case ET_IsRValueExpr: return E->isRValue();
}
llvm_unreachable("Expression trait not covered by switch");
}
ExprResult Sema::BuildExpressionTrait(ExpressionTrait ET,
SourceLocation KWLoc,
Expr *Queried,
SourceLocation RParen) {
if (Queried->isTypeDependent()) {
// Delay type-checking for type-dependent expressions.
} else if (Queried->getType()->isPlaceholderType()) {
ExprResult PE = CheckPlaceholderExpr(Queried);
if (PE.isInvalid()) return ExprError();
return BuildExpressionTrait(ET, KWLoc, PE.take(), RParen);
}
bool Value = EvaluateExpressionTrait(ET, Queried);
return Owned(new (Context) ExpressionTraitExpr(KWLoc, ET, Queried, Value,
RParen, Context.BoolTy));
}
QualType Sema::CheckPointerToMemberOperands(ExprResult &LHS, ExprResult &RHS,
ExprValueKind &VK,
SourceLocation Loc,
bool isIndirect) {
assert(!LHS.get()->getType()->isPlaceholderType() &&
!RHS.get()->getType()->isPlaceholderType() &&
"placeholders should have been weeded out by now");
// The LHS undergoes lvalue conversions if this is ->*.
if (isIndirect) {
LHS = DefaultLvalueConversion(LHS.take());
if (LHS.isInvalid()) return QualType();
}
// The RHS always undergoes lvalue conversions.
RHS = DefaultLvalueConversion(RHS.take());
if (RHS.isInvalid()) return QualType();
const char *OpSpelling = isIndirect ? "->*" : ".*";
// C++ 5.5p2
// The binary operator .* [p3: ->*] binds its second operand, which shall
// be of type "pointer to member of T" (where T is a completely-defined
// class type) [...]
QualType RHSType = RHS.get()->getType();
const MemberPointerType *MemPtr = RHSType->getAs<MemberPointerType>();
if (!MemPtr) {
Diag(Loc, diag::err_bad_memptr_rhs)
<< OpSpelling << RHSType << RHS.get()->getSourceRange();
return QualType();
}
QualType Class(MemPtr->getClass(), 0);
// Note: C++ [expr.mptr.oper]p2-3 says that the class type into which the
// member pointer points must be completely-defined. However, there is no
// reason for this semantic distinction, and the rule is not enforced by
// other compilers. Therefore, we do not check this property, as it is
// likely to be considered a defect.
// C++ 5.5p2
// [...] to its first operand, which shall be of class T or of a class of
// which T is an unambiguous and accessible base class. [p3: a pointer to
// such a class]
QualType LHSType = LHS.get()->getType();
if (isIndirect) {
if (const PointerType *Ptr = LHSType->getAs<PointerType>())
LHSType = Ptr->getPointeeType();
else {
Diag(Loc, diag::err_bad_memptr_lhs)
<< OpSpelling << 1 << LHSType
<< FixItHint::CreateReplacement(SourceRange(Loc), ".*");
return QualType();
}
}
if (!Context.hasSameUnqualifiedType(Class, LHSType)) {
// If we want to check the hierarchy, we need a complete type.
if (RequireCompleteType(Loc, LHSType, diag::err_bad_memptr_lhs,
OpSpelling, (int)isIndirect)) {
return QualType();
}
CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
/*DetectVirtual=*/false);
// FIXME: Would it be useful to print full ambiguity paths, or is that
// overkill?
if (!IsDerivedFrom(LHSType, Class, Paths) ||
Paths.isAmbiguous(Context.getCanonicalType(Class))) {
Diag(Loc, diag::err_bad_memptr_lhs) << OpSpelling
<< (int)isIndirect << LHS.get()->getType();
return QualType();
}
// Cast LHS to type of use.
QualType UseType = isIndirect ? Context.getPointerType(Class) : Class;
ExprValueKind VK = isIndirect ? VK_RValue : LHS.get()->getValueKind();
CXXCastPath BasePath;
BuildBasePathArray(Paths, BasePath);
LHS = ImpCastExprToType(LHS.take(), UseType, CK_DerivedToBase, VK,
&BasePath);
}
if (isa<CXXScalarValueInitExpr>(RHS.get()->IgnoreParens())) {
// Diagnose use of pointer-to-member type which when used as
// the functional cast in a pointer-to-member expression.
Diag(Loc, diag::err_pointer_to_member_type) << isIndirect;
return QualType();
}
// C++ 5.5p2
// The result is an object or a function of the type specified by the
// second operand.
// The cv qualifiers are the union of those in the pointer and the left side,
// in accordance with 5.5p5 and 5.2.5.
QualType Result = MemPtr->getPointeeType();
Result = Context.getCVRQualifiedType(Result, LHSType.getCVRQualifiers());
// C++0x [expr.mptr.oper]p6:
// In a .* expression whose object expression is an rvalue, the program is
// ill-formed if the second operand is a pointer to member function with
// ref-qualifier &. In a ->* expression or in a .* expression whose object
// expression is an lvalue, the program is ill-formed if the second operand
// is a pointer to member function with ref-qualifier &&.
if (const FunctionProtoType *Proto = Result->getAs<FunctionProtoType>()) {
switch (Proto->getRefQualifier()) {
case RQ_None:
// Do nothing
break;
case RQ_LValue:
if (!isIndirect && !LHS.get()->Classify(Context).isLValue())
Diag(Loc, diag::err_pointer_to_member_oper_value_classify)
<< RHSType << 1 << LHS.get()->getSourceRange();
break;
case RQ_RValue:
if (isIndirect || !LHS.get()->Classify(Context).isRValue())
Diag(Loc, diag::err_pointer_to_member_oper_value_classify)
<< RHSType << 0 << LHS.get()->getSourceRange();
break;
}
}
// C++ [expr.mptr.oper]p6:
// The result of a .* expression whose second operand is a pointer
// to a data member is of the same value category as its
// first operand. The result of a .* expression whose second
// operand is a pointer to a member function is a prvalue. The
// result of an ->* expression is an lvalue if its second operand
// is a pointer to data member and a prvalue otherwise.
if (Result->isFunctionType()) {
VK = VK_RValue;
return Context.BoundMemberTy;
} else if (isIndirect) {
VK = VK_LValue;
} else {
VK = LHS.get()->getValueKind();
}
return Result;
}
/// \brief Try to convert a type to another according to C++0x 5.16p3.
///
/// This is part of the parameter validation for the ? operator. If either
/// value operand is a class type, the two operands are attempted to be
/// converted to each other. This function does the conversion in one direction.
/// It returns true if the program is ill-formed and has already been diagnosed
/// as such.
static bool TryClassUnification(Sema &Self, Expr *From, Expr *To,
SourceLocation QuestionLoc,
bool &HaveConversion,
QualType &ToType) {
HaveConversion = false;
ToType = To->getType();
InitializationKind Kind = InitializationKind::CreateCopy(To->getLocStart(),
SourceLocation());
// C++0x 5.16p3
// The process for determining whether an operand expression E1 of type T1
// can be converted to match an operand expression E2 of type T2 is defined
// as follows:
// -- If E2 is an lvalue:
bool ToIsLvalue = To->isLValue();
if (ToIsLvalue) {
// E1 can be converted to match E2 if E1 can be implicitly converted to
// type "lvalue reference to T2", subject to the constraint that in the
// conversion the reference must bind directly to E1.
QualType T = Self.Context.getLValueReferenceType(ToType);
InitializedEntity Entity = InitializedEntity::InitializeTemporary(T);
InitializationSequence InitSeq(Self, Entity, Kind, From);
if (InitSeq.isDirectReferenceBinding()) {
ToType = T;
HaveConversion = true;
return false;
}
if (InitSeq.isAmbiguous())
return InitSeq.Diagnose(Self, Entity, Kind, From);
}
// -- If E2 is an rvalue, or if the conversion above cannot be done:
// -- if E1 and E2 have class type, and the underlying class types are
// the same or one is a base class of the other:
QualType FTy = From->getType();
QualType TTy = To->getType();
const RecordType *FRec = FTy->getAs<RecordType>();
const RecordType *TRec = TTy->getAs<RecordType>();
bool FDerivedFromT = FRec && TRec && FRec != TRec &&
Self.IsDerivedFrom(FTy, TTy);
if (FRec && TRec &&
(FRec == TRec || FDerivedFromT || Self.IsDerivedFrom(TTy, FTy))) {
// E1 can be converted to match E2 if the class of T2 is the
// same type as, or a base class of, the class of T1, and
// [cv2 > cv1].
if (FRec == TRec || FDerivedFromT) {
if (TTy.isAtLeastAsQualifiedAs(FTy)) {
InitializedEntity Entity = InitializedEntity::InitializeTemporary(TTy);
InitializationSequence InitSeq(Self, Entity, Kind, From);
if (InitSeq) {
HaveConversion = true;
return false;
}
if (InitSeq.isAmbiguous())
return InitSeq.Diagnose(Self, Entity, Kind, From);
}
}
return false;
}
// -- Otherwise: E1 can be converted to match E2 if E1 can be
// implicitly converted to the type that expression E2 would have
// if E2 were converted to an rvalue (or the type it has, if E2 is
// an rvalue).
//
// This actually refers very narrowly to the lvalue-to-rvalue conversion, not
// to the array-to-pointer or function-to-pointer conversions.
if (!TTy->getAs<TagType>())
TTy = TTy.getUnqualifiedType();
InitializedEntity Entity = InitializedEntity::InitializeTemporary(TTy);
InitializationSequence InitSeq(Self, Entity, Kind, From);
HaveConversion = !InitSeq.Failed();
ToType = TTy;
if (InitSeq.isAmbiguous())
return InitSeq.Diagnose(Self, Entity, Kind, From);
return false;
}
/// \brief Try to find a common type for two according to C++0x 5.16p5.
///
/// This is part of the parameter validation for the ? operator. If either
/// value operand is a class type, overload resolution is used to find a
/// conversion to a common type.
static bool FindConditionalOverload(Sema &Self, ExprResult &LHS, ExprResult &RHS,
SourceLocation QuestionLoc) {
Expr *Args[2] = { LHS.get(), RHS.get() };
OverloadCandidateSet CandidateSet(QuestionLoc);
Self.AddBuiltinOperatorCandidates(OO_Conditional, QuestionLoc, Args,
CandidateSet);
OverloadCandidateSet::iterator Best;
switch (CandidateSet.BestViableFunction(Self, QuestionLoc, Best)) {
case OR_Success: {
// We found a match. Perform the conversions on the arguments and move on.
ExprResult LHSRes =
Self.PerformImplicitConversion(LHS.get(), Best->BuiltinTypes.ParamTypes[0],
Best->Conversions[0], Sema::AA_Converting);
if (LHSRes.isInvalid())
break;
LHS = LHSRes;
ExprResult RHSRes =
Self.PerformImplicitConversion(RHS.get(), Best->BuiltinTypes.ParamTypes[1],
Best->Conversions[1], Sema::AA_Converting);
if (RHSRes.isInvalid())
break;
RHS = RHSRes;
if (Best->Function)
Self.MarkFunctionReferenced(QuestionLoc, Best->Function);
return false;
}
case OR_No_Viable_Function:
// Emit a better diagnostic if one of the expressions is a null pointer
// constant and the other is a pointer type. In this case, the user most
// likely forgot to take the address of the other expression.
if (Self.DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc))
return true;
Self.Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands)
<< LHS.get()->getType() << RHS.get()->getType()
<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
return true;
case OR_Ambiguous:
Self.Diag(QuestionLoc, diag::err_conditional_ambiguous_ovl)
<< LHS.get()->getType() << RHS.get()->getType()
<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
// FIXME: Print the possible common types by printing the return types of
// the viable candidates.
break;
case OR_Deleted:
llvm_unreachable("Conditional operator has only built-in overloads");
}
return true;
}
/// \brief Perform an "extended" implicit conversion as returned by
/// TryClassUnification.
static bool ConvertForConditional(Sema &Self, ExprResult &E, QualType T) {
InitializedEntity Entity = InitializedEntity::InitializeTemporary(T);
InitializationKind Kind = InitializationKind::CreateCopy(E.get()->getLocStart(),
SourceLocation());
Expr *Arg = E.take();
InitializationSequence InitSeq(Self, Entity, Kind, Arg);
ExprResult Result = InitSeq.Perform(Self, Entity, Kind, Arg);
if (Result.isInvalid())
return true;
E = Result;
return false;
}
/// \brief Check the operands of ?: under C++ semantics.
///
/// See C++ [expr.cond]. Note that LHS is never null, even for the GNU x ?: y
/// extension. In this case, LHS == Cond. (But they're not aliases.)
QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
ExprResult &RHS, ExprValueKind &VK,
ExprObjectKind &OK,
SourceLocation QuestionLoc) {
// FIXME: Handle C99's complex types, vector types, block pointers and Obj-C++
// interface pointers.
// C++11 [expr.cond]p1
// The first expression is contextually converted to bool.
if (!Cond.get()->isTypeDependent()) {
ExprResult CondRes = CheckCXXBooleanCondition(Cond.take());
if (CondRes.isInvalid())
return QualType();
Cond = CondRes;
}
// Assume r-value.
VK = VK_RValue;
OK = OK_Ordinary;
// Either of the arguments dependent?
if (LHS.get()->isTypeDependent() || RHS.get()->isTypeDependent())
return Context.DependentTy;
// C++11 [expr.cond]p2
// If either the second or the third operand has type (cv) void, ...
QualType LTy = LHS.get()->getType();
QualType RTy = RHS.get()->getType();
bool LVoid = LTy->isVoidType();
bool RVoid = RTy->isVoidType();
if (LVoid || RVoid) {
// ... then the [l2r] conversions are performed on the second and third
// operands ...
LHS = DefaultFunctionArrayLvalueConversion(LHS.take());
RHS = DefaultFunctionArrayLvalueConversion(RHS.take());
if (LHS.isInvalid() || RHS.isInvalid())
return QualType();
// Finish off the lvalue-to-rvalue conversion by copy-initializing a
// temporary if necessary. DefaultFunctionArrayLvalueConversion doesn't
// do this part for us.
ExprResult &NonVoid = LVoid ? RHS : LHS;
if (NonVoid.get()->getType()->isRecordType() &&
NonVoid.get()->isGLValue()) {
if (RequireNonAbstractType(QuestionLoc, NonVoid.get()->getType(),
diag::err_allocation_of_abstract_type))
return QualType();
InitializedEntity Entity =
InitializedEntity::InitializeTemporary(NonVoid.get()->getType());
NonVoid = PerformCopyInitialization(Entity, SourceLocation(), NonVoid);
if (NonVoid.isInvalid())
return QualType();
}
LTy = LHS.get()->getType();
RTy = RHS.get()->getType();
// ... and one of the following shall hold:
// -- The second or the third operand (but not both) is a throw-
// expression; the result is of the type of the other and is a prvalue.
bool LThrow = isa<CXXThrowExpr>(LHS.get()->IgnoreParenCasts());
bool RThrow = isa<CXXThrowExpr>(RHS.get()->IgnoreParenCasts());
if (LThrow && !RThrow)
return RTy;
if (RThrow && !LThrow)
return LTy;
// -- Both the second and third operands have type void; the result is of
// type void and is a prvalue.
if (LVoid && RVoid)
return Context.VoidTy;
// Neither holds, error.
Diag(QuestionLoc, diag::err_conditional_void_nonvoid)
<< (LVoid ? RTy : LTy) << (LVoid ? 0 : 1)
<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
return QualType();
}
// Neither is void.
// C++11 [expr.cond]p3
// Otherwise, if the second and third operand have different types, and
// either has (cv) class type [...] an attempt is made to convert each of
// those operands to the type of the other.
if (!Context.hasSameType(LTy, RTy) &&
(LTy->isRecordType() || RTy->isRecordType())) {
ImplicitConversionSequence ICSLeftToRight, ICSRightToLeft;
// These return true if a single direction is already ambiguous.
QualType L2RType, R2LType;
bool HaveL2R, HaveR2L;
if (TryClassUnification(*this, LHS.get(), RHS.get(), QuestionLoc, HaveL2R, L2RType))
return QualType();
if (TryClassUnification(*this, RHS.get(), LHS.get(), QuestionLoc, HaveR2L, R2LType))
return QualType();
// If both can be converted, [...] the program is ill-formed.
if (HaveL2R && HaveR2L) {
Diag(QuestionLoc, diag::err_conditional_ambiguous)
<< LTy << RTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
return QualType();
}
// If exactly one conversion is possible, that conversion is applied to
// the chosen operand and the converted operands are used in place of the
// original operands for the remainder of this section.
if (HaveL2R) {
if (ConvertForConditional(*this, LHS, L2RType) || LHS.isInvalid())
return QualType();
LTy = LHS.get()->getType();
} else if (HaveR2L) {
if (ConvertForConditional(*this, RHS, R2LType) || RHS.isInvalid())
return QualType();
RTy = RHS.get()->getType();
}
}
// C++11 [expr.cond]p3
// if both are glvalues of the same value category and the same type except
// for cv-qualification, an attempt is made to convert each of those
// operands to the type of the other.
ExprValueKind LVK = LHS.get()->getValueKind();
ExprValueKind RVK = RHS.get()->getValueKind();
if (!Context.hasSameType(LTy, RTy) &&
Context.hasSameUnqualifiedType(LTy, RTy) &&
LVK == RVK && LVK != VK_RValue) {
// Since the unqualified types are reference-related and we require the
// result to be as if a reference bound directly, the only conversion
// we can perform is to add cv-qualifiers.
Qualifiers LCVR = Qualifiers::fromCVRMask(LTy.getCVRQualifiers());
Qualifiers RCVR = Qualifiers::fromCVRMask(RTy.getCVRQualifiers());
if (RCVR.isStrictSupersetOf(LCVR)) {
LHS = ImpCastExprToType(LHS.take(), RTy, CK_NoOp, LVK);
LTy = LHS.get()->getType();
}
else if (LCVR.isStrictSupersetOf(RCVR)) {
RHS = ImpCastExprToType(RHS.take(), LTy, CK_NoOp, RVK);
RTy = RHS.get()->getType();
}
}
// C++11 [expr.cond]p4
// If the second and third operands are glvalues of the same value
// category and have the same type, the result is of that type and
// value category and it is a bit-field if the second or the third
// operand is a bit-field, or if both are bit-fields.
// We only extend this to bitfields, not to the crazy other kinds of
// l-values.
bool Same = Context.hasSameType(LTy, RTy);
if (Same && LVK == RVK && LVK != VK_RValue &&
LHS.get()->isOrdinaryOrBitFieldObject() &&
RHS.get()->isOrdinaryOrBitFieldObject()) {
VK = LHS.get()->getValueKind();
if (LHS.get()->getObjectKind() == OK_BitField ||
RHS.get()->getObjectKind() == OK_BitField)
OK = OK_BitField;
return LTy;
}
// C++11 [expr.cond]p5
// Otherwise, the result is a prvalue. If the second and third operands
// do not have the same type, and either has (cv) class type, ...
if (!Same && (LTy->isRecordType() || RTy->isRecordType())) {
// ... overload resolution is used to determine the conversions (if any)
// to be applied to the operands. If the overload resolution fails, the
// program is ill-formed.
if (FindConditionalOverload(*this, LHS, RHS, QuestionLoc))
return QualType();
}
// C++11 [expr.cond]p6
// Lvalue-to-rvalue, array-to-pointer, and function-to-pointer standard
// conversions are performed on the second and third operands.
LHS = DefaultFunctionArrayLvalueConversion(LHS.take());
RHS = DefaultFunctionArrayLvalueConversion(RHS.take());
if (LHS.isInvalid() || RHS.isInvalid())
return QualType();
LTy = LHS.get()->getType();
RTy = RHS.get()->getType();
// After those conversions, one of the following shall hold:
// -- The second and third operands have the same type; the result
// is of that type. If the operands have class type, the result
// is a prvalue temporary of the result type, which is
// copy-initialized from either the second operand or the third
// operand depending on the value of the first operand.
if (Context.getCanonicalType(LTy) == Context.getCanonicalType(RTy)) {
if (LTy->isRecordType()) {
// The operands have class type. Make a temporary copy.
if (RequireNonAbstractType(QuestionLoc, LTy,
diag::err_allocation_of_abstract_type))
return QualType();
InitializedEntity Entity = InitializedEntity::InitializeTemporary(LTy);
ExprResult LHSCopy = PerformCopyInitialization(Entity,
SourceLocation(),
LHS);
if (LHSCopy.isInvalid())
return QualType();
ExprResult RHSCopy = PerformCopyInitialization(Entity,
SourceLocation(),
RHS);
if (RHSCopy.isInvalid())
return QualType();
LHS = LHSCopy;
RHS = RHSCopy;
}
return LTy;
}
// Extension: conditional operator involving vector types.
if (LTy->isVectorType() || RTy->isVectorType())
return CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/false);
// -- The second and third operands have arithmetic or enumeration type;
// the usual arithmetic conversions are performed to bring them to a
// common type, and the result is of that type.
if (LTy->isArithmeticType() && RTy->isArithmeticType()) {
UsualArithmeticConversions(LHS, RHS);
if (LHS.isInvalid() || RHS.isInvalid())
return QualType();
return LHS.get()->getType();
}
// -- The second and third operands have pointer type, or one has pointer
// type and the other is a null pointer constant, or both are null
// pointer constants, at least one of which is non-integral; pointer
// conversions and qualification conversions are performed to bring them
// to their composite pointer type. The result is of the composite
// pointer type.
// -- The second and third operands have pointer to member type, or one has
// pointer to member type and the other is a null pointer constant;
// pointer to member conversions and qualification conversions are
// performed to bring them to a common type, whose cv-qualification
// shall match the cv-qualification of either the second or the third
// operand. The result is of the common type.
bool NonStandardCompositeType = false;
QualType Composite = FindCompositePointerType(QuestionLoc, LHS, RHS,
isSFINAEContext()? 0 : &NonStandardCompositeType);
if (!Composite.isNull()) {
if (NonStandardCompositeType)
Diag(QuestionLoc,
diag::ext_typecheck_cond_incompatible_operands_nonstandard)
<< LTy << RTy << Composite
<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
return Composite;
}
// Similarly, attempt to find composite type of two objective-c pointers.
Composite = FindCompositeObjCPointerType(LHS, RHS, QuestionLoc);
if (!Composite.isNull())
return Composite;
// Check if we are using a null with a non-pointer type.
if (DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc))
return QualType();
Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands)
<< LHS.get()->getType() << RHS.get()->getType()
<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
return QualType();
}
/// \brief Find a merged pointer type and convert the two expressions to it.
///
/// This finds the composite pointer type (or member pointer type) for @p E1
/// and @p E2 according to C++11 5.9p2. It converts both expressions to this
/// type and returns it.
/// It does not emit diagnostics.
///
/// \param Loc The location of the operator requiring these two expressions to
/// be converted to the composite pointer type.
///
/// If \p NonStandardCompositeType is non-NULL, then we are permitted to find
/// a non-standard (but still sane) composite type to which both expressions
/// can be converted. When such a type is chosen, \c *NonStandardCompositeType
/// will be set true.
QualType Sema::FindCompositePointerType(SourceLocation Loc,
Expr *&E1, Expr *&E2,
bool *NonStandardCompositeType) {
if (NonStandardCompositeType)
*NonStandardCompositeType = false;
assert(getLangOpts().CPlusPlus && "This function assumes C++");
QualType T1 = E1->getType(), T2 = E2->getType();
// C++11 5.9p2
// Pointer conversions and qualification conversions are performed on
// pointer operands to bring them to their composite pointer type. If
// one operand is a null pointer constant, the composite pointer type is
// std::nullptr_t if the other operand is also a null pointer constant or,
// if the other operand is a pointer, the type of the other operand.
if (!T1->isAnyPointerType() && !T1->isMemberPointerType() &&
!T2->isAnyPointerType() && !T2->isMemberPointerType()) {
if (T1->isNullPtrType() &&
E2->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) {
E2 = ImpCastExprToType(E2, T1, CK_NullToPointer).take();
return T1;
}
if (T2->isNullPtrType() &&
E1->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) {
E1 = ImpCastExprToType(E1, T2, CK_NullToPointer).take();
return T2;
}
return QualType();
}
if (E1->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) {
if (T2->isMemberPointerType())
E1 = ImpCastExprToType(E1, T2, CK_NullToMemberPointer).take();
else
E1 = ImpCastExprToType(E1, T2, CK_NullToPointer).take();
return T2;
}
if (E2->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) {
if (T1->isMemberPointerType())
E2 = ImpCastExprToType(E2, T1, CK_NullToMemberPointer).take();
else
E2 = ImpCastExprToType(E2, T1, CK_NullToPointer).take();
return T1;
}
// Now both have to be pointers or member pointers.
if ((!T1->isPointerType() && !T1->isMemberPointerType()) ||
(!T2->isPointerType() && !T2->isMemberPointerType()))
return QualType();
// Otherwise, of one of the operands has type "pointer to cv1 void," then
// the other has type "pointer to cv2 T" and the composite pointer type is
// "pointer to cv12 void," where cv12 is the union of cv1 and cv2.
// Otherwise, the composite pointer type is a pointer type similar to the
// type of one of the operands, with a cv-qualification signature that is
// the union of the cv-qualification signatures of the operand types.
// In practice, the first part here is redundant; it's subsumed by the second.
// What we do here is, we build the two possible composite types, and try the
// conversions in both directions. If only one works, or if the two composite
// types are the same, we have succeeded.
// FIXME: extended qualifiers?
typedef SmallVector<unsigned, 4> QualifierVector;
QualifierVector QualifierUnion;
typedef SmallVector<std::pair<const Type *, const Type *>, 4>
ContainingClassVector;
ContainingClassVector MemberOfClass;
QualType Composite1 = Context.getCanonicalType(T1),
Composite2 = Context.getCanonicalType(T2);
unsigned NeedConstBefore = 0;
do {
const PointerType *Ptr1, *Ptr2;
if ((Ptr1 = Composite1->getAs<PointerType>()) &&
(Ptr2 = Composite2->getAs<PointerType>())) {
Composite1 = Ptr1->getPointeeType();
Composite2 = Ptr2->getPointeeType();
// If we're allowed to create a non-standard composite type, keep track
// of where we need to fill in additional 'const' qualifiers.
if (NonStandardCompositeType &&
Composite1.getCVRQualifiers() != Composite2.getCVRQualifiers())
NeedConstBefore = QualifierUnion.size();
QualifierUnion.push_back(
Composite1.getCVRQualifiers() | Composite2.getCVRQualifiers());
MemberOfClass.push_back(std::make_pair((const Type *)0, (const Type *)0));
continue;
}
const MemberPointerType *MemPtr1, *MemPtr2;
if ((MemPtr1 = Composite1->getAs<MemberPointerType>()) &&
(MemPtr2 = Composite2->getAs<MemberPointerType>())) {
Composite1 = MemPtr1->getPointeeType();
Composite2 = MemPtr2->getPointeeType();
// If we're allowed to create a non-standard composite type, keep track
// of where we need to fill in additional 'const' qualifiers.
if (NonStandardCompositeType &&
Composite1.getCVRQualifiers() != Composite2.getCVRQualifiers())
NeedConstBefore = QualifierUnion.size();
QualifierUnion.push_back(
Composite1.getCVRQualifiers() | Composite2.getCVRQualifiers());
MemberOfClass.push_back(std::make_pair(MemPtr1->getClass(),
MemPtr2->getClass()));
continue;
}
// FIXME: block pointer types?
// Cannot unwrap any more types.
break;
} while (true);
if (NeedConstBefore && NonStandardCompositeType) {
// Extension: Add 'const' to qualifiers that come before the first qualifier
// mismatch, so that our (non-standard!) composite type meets the
// requirements of C++ [conv.qual]p4 bullet 3.
for (unsigned I = 0; I != NeedConstBefore; ++I) {
if ((QualifierUnion[I] & Qualifiers::Const) == 0) {
QualifierUnion[I] = QualifierUnion[I] | Qualifiers::Const;
*NonStandardCompositeType = true;
}
}
}
// Rewrap the composites as pointers or member pointers with the union CVRs.
ContainingClassVector::reverse_iterator MOC
= MemberOfClass.rbegin();
for (QualifierVector::reverse_iterator
I = QualifierUnion.rbegin(),
E = QualifierUnion.rend();
I != E; (void)++I, ++MOC) {
Qualifiers Quals = Qualifiers::fromCVRMask(*I);
if (MOC->first && MOC->second) {
// Rebuild member pointer type
Composite1 = Context.getMemberPointerType(
Context.getQualifiedType(Composite1, Quals),
MOC->first);
Composite2 = Context.getMemberPointerType(
Context.getQualifiedType(Composite2, Quals),
MOC->second);
} else {
// Rebuild pointer type
Composite1
= Context.getPointerType(Context.getQualifiedType(Composite1, Quals));
Composite2
= Context.getPointerType(Context.getQualifiedType(Composite2, Quals));
}
}
// Try to convert to the first composite pointer type.
InitializedEntity Entity1
= InitializedEntity::InitializeTemporary(Composite1);
InitializationKind Kind
= InitializationKind::CreateCopy(Loc, SourceLocation());
InitializationSequence E1ToC1(*this, Entity1, Kind, E1);
InitializationSequence E2ToC1(*this, Entity1, Kind, E2);
if (E1ToC1 && E2ToC1) {
// Conversion to Composite1 is viable.
if (!Context.hasSameType(Composite1, Composite2)) {
// Composite2 is a different type from Composite1. Check whether
// Composite2 is also viable.
InitializedEntity Entity2
= InitializedEntity::InitializeTemporary(Composite2);
InitializationSequence E1ToC2(*this, Entity2, Kind, E1);
InitializationSequence E2ToC2(*this, Entity2, Kind, E2);
if (E1ToC2 && E2ToC2) {
// Both Composite1 and Composite2 are viable and are different;
// this is an ambiguity.
return QualType();
}
}
// Convert E1 to Composite1
ExprResult E1Result
= E1ToC1.Perform(*this, Entity1, Kind, E1);
if (E1Result.isInvalid())
return QualType();
E1 = E1Result.takeAs<Expr>();
// Convert E2 to Composite1
ExprResult E2Result
= E2ToC1.Perform(*this, Entity1, Kind, E2);
if (E2Result.isInvalid())
return QualType();
E2 = E2Result.takeAs<Expr>();
return Composite1;
}
// Check whether Composite2 is viable.
InitializedEntity Entity2
= InitializedEntity::InitializeTemporary(Composite2);
InitializationSequence E1ToC2(*this, Entity2, Kind, E1);
InitializationSequence E2ToC2(*this, Entity2, Kind, E2);
if (!E1ToC2 || !E2ToC2)
return QualType();
// Convert E1 to Composite2
ExprResult E1Result
= E1ToC2.Perform(*this, Entity2, Kind, E1);
if (E1Result.isInvalid())
return QualType();
E1 = E1Result.takeAs<Expr>();
// Convert E2 to Composite2
ExprResult E2Result
= E2ToC2.Perform(*this, Entity2, Kind, E2);
if (E2Result.isInvalid())
return QualType();
E2 = E2Result.takeAs<Expr>();
return Composite2;
}
ExprResult Sema::MaybeBindToTemporary(Expr *E) {
if (!E)
return ExprError();
assert(!isa<CXXBindTemporaryExpr>(E) && "Double-bound temporary?");
// If the result is a glvalue, we shouldn't bind it.
if (!E->isRValue())
return Owned(E);
// In ARC, calls that return a retainable type can return retained,
// in which case we have to insert a consuming cast.
if (getLangOpts().ObjCAutoRefCount &&
E->getType()->isObjCRetainableType()) {
bool ReturnsRetained;
// For actual calls, we compute this by examining the type of the
// called value.
if (CallExpr *Call = dyn_cast<CallExpr>(E)) {
Expr *Callee = Call->getCallee()->IgnoreParens();
QualType T = Callee->getType();
if (T == Context.BoundMemberTy) {
// Handle pointer-to-members.
if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Callee))
T = BinOp->getRHS()->getType();
else if (MemberExpr *Mem = dyn_cast<MemberExpr>(Callee))
T = Mem->getMemberDecl()->getType();
}
if (const PointerType *Ptr = T->getAs<PointerType>())
T = Ptr->getPointeeType();
else if (const BlockPointerType *Ptr = T->getAs<BlockPointerType>())
T = Ptr->getPointeeType();
else if (const MemberPointerType *MemPtr = T->getAs<MemberPointerType>())
T = MemPtr->getPointeeType();
const FunctionType *FTy = T->getAs<FunctionType>();
assert(FTy && "call to value not of function type?");
ReturnsRetained = FTy->getExtInfo().getProducesResult();
// ActOnStmtExpr arranges things so that StmtExprs of retainable
// type always produce a +1 object.
} else if (isa<StmtExpr>(E)) {
ReturnsRetained = true;
// We hit this case with the lambda conversion-to-block optimization;
// we don't want any extra casts here.
} else if (isa<CastExpr>(E) &&
isa<BlockExpr>(cast<CastExpr>(E)->getSubExpr())) {
return Owned(E);
// For message sends and property references, we try to find an
// actual method. FIXME: we should infer retention by selector in
// cases where we don't have an actual method.
} else {
ObjCMethodDecl *D = 0;
if (ObjCMessageExpr *Send = dyn_cast<ObjCMessageExpr>(E)) {
D = Send->getMethodDecl();
} else if (ObjCBoxedExpr *BoxedExpr = dyn_cast<ObjCBoxedExpr>(E)) {
D = BoxedExpr->getBoxingMethod();
} else if (ObjCArrayLiteral *ArrayLit = dyn_cast<ObjCArrayLiteral>(E)) {
D = ArrayLit->getArrayWithObjectsMethod();
} else if (ObjCDictionaryLiteral *DictLit
= dyn_cast<ObjCDictionaryLiteral>(E)) {
D = DictLit->getDictWithObjectsMethod();
}
ReturnsRetained = (D && D->hasAttr<NSReturnsRetainedAttr>());
// Don't do reclaims on performSelector calls; despite their
// return type, the invoked method doesn't necessarily actually
// return an object.
if (!ReturnsRetained &&
D && D->getMethodFamily() == OMF_performSelector)
return Owned(E);
}
// Don't reclaim an object of Class type.
if (!ReturnsRetained && E->getType()->isObjCARCImplicitlyUnretainedType())
return Owned(E);
ExprNeedsCleanups = true;
CastKind ck = (ReturnsRetained ? CK_ARCConsumeObject
: CK_ARCReclaimReturnedObject);
return Owned(ImplicitCastExpr::Create(Context, E->getType(), ck, E, 0,
VK_RValue));
}
if (!getLangOpts().CPlusPlus)
return Owned(E);
// Search for the base element type (cf. ASTContext::getBaseElementType) with
// a fast path for the common case that the type is directly a RecordType.
const Type *T = Context.getCanonicalType(E->getType().getTypePtr());
const RecordType *RT = 0;
while (!RT) {
switch (T->getTypeClass()) {
case Type::Record:
RT = cast<RecordType>(T);
break;
case Type::ConstantArray:
case Type::IncompleteArray:
case Type::VariableArray:
case Type::DependentSizedArray:
T = cast<ArrayType>(T)->getElementType().getTypePtr();
break;
default:
return Owned(E);
}
}
// That should be enough to guarantee that this type is complete, if we're
// not processing a decltype expression.
CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
if (RD->isInvalidDecl() || RD->isDependentContext())
return Owned(E);
bool IsDecltype = ExprEvalContexts.back().IsDecltype;
CXXDestructorDecl *Destructor = IsDecltype ? 0 : LookupDestructor(RD);
if (Destructor) {
MarkFunctionReferenced(E->getExprLoc(), Destructor);
CheckDestructorAccess(E->getExprLoc(), Destructor,
PDiag(diag::err_access_dtor_temp)
<< E->getType());
if (DiagnoseUseOfDecl(Destructor, E->getExprLoc()))
return ExprError();
// If destructor is trivial, we can avoid the extra copy.
if (Destructor->isTrivial())
return Owned(E);
// We need a cleanup, but we don't need to remember the temporary.
ExprNeedsCleanups = true;
}
CXXTemporary *Temp = CXXTemporary::Create(Context, Destructor);
CXXBindTemporaryExpr *Bind = CXXBindTemporaryExpr::Create(Context, Temp, E);
if (IsDecltype)
ExprEvalContexts.back().DelayedDecltypeBinds.push_back(Bind);
return Owned(Bind);
}
ExprResult
Sema::MaybeCreateExprWithCleanups(ExprResult SubExpr) {
if (SubExpr.isInvalid())
return ExprError();
return Owned(MaybeCreateExprWithCleanups(SubExpr.take()));
}
Expr *Sema::MaybeCreateExprWithCleanups(Expr *SubExpr) {
assert(SubExpr && "sub expression can't be null!");
CleanupVarDeclMarking();
unsigned FirstCleanup = ExprEvalContexts.back().NumCleanupObjects;
assert(ExprCleanupObjects.size() >= FirstCleanup);
assert(ExprNeedsCleanups || ExprCleanupObjects.size() == FirstCleanup);
if (!ExprNeedsCleanups)
return SubExpr;
ArrayRef<ExprWithCleanups::CleanupObject> Cleanups
= llvm::makeArrayRef(ExprCleanupObjects.begin() + FirstCleanup,
ExprCleanupObjects.size() - FirstCleanup);
Expr *E = ExprWithCleanups::Create(Context, SubExpr, Cleanups);
DiscardCleanupsInEvaluationContext();
return E;
}
Stmt *Sema::MaybeCreateStmtWithCleanups(Stmt *SubStmt) {
assert(SubStmt && "sub statement can't be null!");
CleanupVarDeclMarking();
if (!ExprNeedsCleanups)
return SubStmt;
// FIXME: In order to attach the temporaries, wrap the statement into
// a StmtExpr; currently this is only used for asm statements.
// This is hacky, either create a new CXXStmtWithTemporaries statement or
// a new AsmStmtWithTemporaries.
CompoundStmt *CompStmt = new (Context) CompoundStmt(Context, SubStmt,
SourceLocation(),
SourceLocation());
Expr *E = new (Context) StmtExpr(CompStmt, Context.VoidTy, SourceLocation(),
SourceLocation());
return MaybeCreateExprWithCleanups(E);
}
/// Process the expression contained within a decltype. For such expressions,
/// certain semantic checks on temporaries are delayed until this point, and
/// are omitted for the 'topmost' call in the decltype expression. If the
/// topmost call bound a temporary, strip that temporary off the expression.
ExprResult Sema::ActOnDecltypeExpression(Expr *E) {
assert(ExprEvalContexts.back().IsDecltype && "not in a decltype expression");
// C++11 [expr.call]p11:
// If a function call is a prvalue of object type,
// -- if the function call is either
// -- the operand of a decltype-specifier, or
// -- the right operand of a comma operator that is the operand of a
// decltype-specifier,
// a temporary object is not introduced for the prvalue.
// Recursively rebuild ParenExprs and comma expressions to strip out the
// outermost CXXBindTemporaryExpr, if any.
if (ParenExpr *PE = dyn_cast<ParenExpr>(E)) {
ExprResult SubExpr = ActOnDecltypeExpression(PE->getSubExpr());
if (SubExpr.isInvalid())
return ExprError();
if (SubExpr.get() == PE->getSubExpr())
return Owned(E);
return ActOnParenExpr(PE->getLParen(), PE->getRParen(), SubExpr.take());
}
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) {
if (BO->getOpcode() == BO_Comma) {
ExprResult RHS = ActOnDecltypeExpression(BO->getRHS());
if (RHS.isInvalid())
return ExprError();
if (RHS.get() == BO->getRHS())
return Owned(E);
return Owned(new (Context) BinaryOperator(BO->getLHS(), RHS.take(),
BO_Comma, BO->getType(),
BO->getValueKind(),
BO->getObjectKind(),
BO->getOperatorLoc(),
BO->isFPContractable()));
}
}
CXXBindTemporaryExpr *TopBind = dyn_cast<CXXBindTemporaryExpr>(E);
if (TopBind)
E = TopBind->getSubExpr();
// Disable the special decltype handling now.
ExprEvalContexts.back().IsDecltype = false;
// In MS mode, don't perform any extra checking of call return types within a
// decltype expression.
if (getLangOpts().MicrosoftMode)
return Owned(E);
// Perform the semantic checks we delayed until this point.
CallExpr *TopCall = dyn_cast<CallExpr>(E);
for (unsigned I = 0, N = ExprEvalContexts.back().DelayedDecltypeCalls.size();
I != N; ++I) {
CallExpr *Call = ExprEvalContexts.back().DelayedDecltypeCalls[I];
if (Call == TopCall)
continue;
if (CheckCallReturnType(Call->getCallReturnType(),
Call->getLocStart(),
Call, Call->getDirectCallee()))
return ExprError();
}
// Now all relevant types are complete, check the destructors are accessible
// and non-deleted, and annotate them on the temporaries.
for (unsigned I = 0, N = ExprEvalContexts.back().DelayedDecltypeBinds.size();
I != N; ++I) {
CXXBindTemporaryExpr *Bind =
ExprEvalContexts.back().DelayedDecltypeBinds[I];
if (Bind == TopBind)
continue;
CXXTemporary *Temp = Bind->getTemporary();
CXXRecordDecl *RD =
Bind->getType()->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
CXXDestructorDecl *Destructor = LookupDestructor(RD);
Temp->setDestructor(Destructor);
MarkFunctionReferenced(Bind->getExprLoc(), Destructor);
CheckDestructorAccess(Bind->getExprLoc(), Destructor,
PDiag(diag::err_access_dtor_temp)
<< Bind->getType());
if (DiagnoseUseOfDecl(Destructor, Bind->getExprLoc()))
return ExprError();
// We need a cleanup, but we don't need to remember the temporary.
ExprNeedsCleanups = true;
}
// Possibly strip off the top CXXBindTemporaryExpr.
return Owned(E);
}
/// Note a set of 'operator->' functions that were used for a member access.
static void noteOperatorArrows(Sema &S,
llvm::ArrayRef<FunctionDecl *> OperatorArrows) {
unsigned SkipStart = OperatorArrows.size(), SkipCount = 0;
// FIXME: Make this configurable?
unsigned Limit = 9;
if (OperatorArrows.size() > Limit) {
// Produce Limit-1 normal notes and one 'skipping' note.
SkipStart = (Limit - 1) / 2 + (Limit - 1) % 2;
SkipCount = OperatorArrows.size() - (Limit - 1);
}
for (unsigned I = 0; I < OperatorArrows.size(); /**/) {
if (I == SkipStart) {
S.Diag(OperatorArrows[I]->getLocation(),
diag::note_operator_arrows_suppressed)
<< SkipCount;
I += SkipCount;
} else {
S.Diag(OperatorArrows[I]->getLocation(), diag::note_operator_arrow_here)
<< OperatorArrows[I]->getCallResultType();
++I;
}
}
}
ExprResult
Sema::ActOnStartCXXMemberReference(Scope *S, Expr *Base, SourceLocation OpLoc,
tok::TokenKind OpKind, ParsedType &ObjectType,
bool &MayBePseudoDestructor) {
// Since this might be a postfix expression, get rid of ParenListExprs.
ExprResult Result = MaybeConvertParenListExprToParenExpr(S, Base);
if (Result.isInvalid()) return ExprError();
Base = Result.get();
Result = CheckPlaceholderExpr(Base);
if (Result.isInvalid()) return ExprError();
Base = Result.take();
QualType BaseType = Base->getType();
MayBePseudoDestructor = false;
if (BaseType->isDependentType()) {
// If we have a pointer to a dependent type and are using the -> operator,
// the object type is the type that the pointer points to. We might still
// have enough information about that type to do something useful.
if (OpKind == tok::arrow)
if (const PointerType *Ptr = BaseType->getAs<PointerType>())
BaseType = Ptr->getPointeeType();
ObjectType = ParsedType::make(BaseType);
MayBePseudoDestructor = true;
return Owned(Base);
}
// C++ [over.match.oper]p8:
// [...] When operator->returns, the operator-> is applied to the value
// returned, with the original second operand.
if (OpKind == tok::arrow) {
QualType StartingType = BaseType;
bool NoArrowOperatorFound = false;
bool FirstIteration = true;
FunctionDecl *CurFD = dyn_cast<FunctionDecl>(CurContext);
// The set of types we've considered so far.
llvm::SmallPtrSet<CanQualType,8> CTypes;
SmallVector<FunctionDecl*, 8> OperatorArrows;
CTypes.insert(Context.getCanonicalType(BaseType));
while (BaseType->isRecordType()) {
if (OperatorArrows.size() >= getLangOpts().ArrowDepth) {
Diag(OpLoc, diag::err_operator_arrow_depth_exceeded)
<< StartingType << getLangOpts().ArrowDepth << Base->getSourceRange();
noteOperatorArrows(*this, OperatorArrows);
Diag(OpLoc, diag::note_operator_arrow_depth)
<< getLangOpts().ArrowDepth;
return ExprError();
}
Result = BuildOverloadedArrowExpr(
S, Base, OpLoc,
// When in a template specialization and on the first loop iteration,
// potentially give the default diagnostic (with the fixit in a
// separate note) instead of having the error reported back to here
// and giving a diagnostic with a fixit attached to the error itself.
(FirstIteration && CurFD && CurFD->isFunctionTemplateSpecialization())
? 0
: &NoArrowOperatorFound);
if (Result.isInvalid()) {
if (NoArrowOperatorFound) {
if (FirstIteration) {
Diag(OpLoc, diag::err_typecheck_member_reference_suggestion)
<< BaseType << 1 << Base->getSourceRange()
<< FixItHint::CreateReplacement(OpLoc, ".");
OpKind = tok::period;
break;
}
Diag(OpLoc, diag::err_typecheck_member_reference_arrow)
<< BaseType << Base->getSourceRange();
CallExpr *CE = dyn_cast<CallExpr>(Base);
if (Decl *CD = (CE ? CE->getCalleeDecl() : 0)) {
Diag(CD->getLocStart(),
diag::note_member_reference_arrow_from_operator_arrow);
}
}
return ExprError();
}
Base = Result.get();
if (CXXOperatorCallExpr *OpCall = dyn_cast<CXXOperatorCallExpr>(Base))
OperatorArrows.push_back(OpCall->getDirectCallee());
BaseType = Base->getType();
CanQualType CBaseType = Context.getCanonicalType(BaseType);
if (!CTypes.insert(CBaseType)) {
Diag(OpLoc, diag::err_operator_arrow_circular) << StartingType;
noteOperatorArrows(*this, OperatorArrows);
return ExprError();
}
FirstIteration = false;
}
if (OpKind == tok::arrow &&
(BaseType->isPointerType() || BaseType->isObjCObjectPointerType()))
BaseType = BaseType->getPointeeType();
}
// Objective-C properties allow "." access on Objective-C pointer types,
// so adjust the base type to the object type itself.
if (BaseType->isObjCObjectPointerType())
BaseType = BaseType->getPointeeType();
// C++ [basic.lookup.classref]p2:
// [...] If the type of the object expression is of pointer to scalar
// type, the unqualified-id is looked up in the context of the complete
// postfix-expression.
//
// This also indicates that we could be parsing a pseudo-destructor-name.
// Note that Objective-C class and object types can be pseudo-destructor
// expressions or normal member (ivar or property) access expressions.
if (BaseType->isObjCObjectOrInterfaceType()) {
MayBePseudoDestructor = true;
} else if (!BaseType->isRecordType()) {
ObjectType = ParsedType();
MayBePseudoDestructor = true;
return Owned(Base);
}
// The object type must be complete (or dependent), or
// C++11 [expr.prim.general]p3:
// Unlike the object expression in other contexts, *this is not required to
// be of complete type for purposes of class member access (5.2.5) outside
// the member function body.
if (!BaseType->isDependentType() &&
!isThisOutsideMemberFunctionBody(BaseType) &&
RequireCompleteType(OpLoc, BaseType, diag::err_incomplete_member_access))
return ExprError();
// C++ [basic.lookup.classref]p2:
// If the id-expression in a class member access (5.2.5) is an
// unqualified-id, and the type of the object expression is of a class
// type C (or of pointer to a class type C), the unqualified-id is looked
// up in the scope of class C. [...]
ObjectType = ParsedType::make(BaseType);
return Base;
}
ExprResult Sema::DiagnoseDtorReference(SourceLocation NameLoc,
Expr *MemExpr) {
SourceLocation ExpectedLParenLoc = PP.getLocForEndOfToken(NameLoc);
Diag(MemExpr->getLocStart(), diag::err_dtor_expr_without_call)
<< isa<CXXPseudoDestructorExpr>(MemExpr)
<< FixItHint::CreateInsertion(ExpectedLParenLoc, "()");
return ActOnCallExpr(/*Scope*/ 0,
MemExpr,
/*LPLoc*/ ExpectedLParenLoc,
None,
/*RPLoc*/ ExpectedLParenLoc);
}
static bool CheckArrow(Sema& S, QualType& ObjectType, Expr *&Base,
tok::TokenKind& OpKind, SourceLocation OpLoc) {
if (Base->hasPlaceholderType()) {
ExprResult result = S.CheckPlaceholderExpr(Base);
if (result.isInvalid()) return true;
Base = result.take();
}
ObjectType = Base->getType();
// C++ [expr.pseudo]p2:
// The left-hand side of the dot operator shall be of scalar type. The
// left-hand side of the arrow operator shall be of pointer to scalar type.
// This scalar type is the object type.
// Note that this is rather different from the normal handling for the
// arrow operator.
if (OpKind == tok::arrow) {
if (const PointerType *Ptr = ObjectType->getAs<PointerType>()) {
ObjectType = Ptr->getPointeeType();
} else if (!Base->isTypeDependent()) {
// The user wrote "p->" when she probably meant "p."; fix it.
S.Diag(OpLoc, diag::err_typecheck_member_reference_suggestion)
<< ObjectType << true
<< FixItHint::CreateReplacement(OpLoc, ".");
if (S.isSFINAEContext())
return true;
OpKind = tok::period;
}
}
return false;
}
ExprResult Sema::BuildPseudoDestructorExpr(Expr *Base,
SourceLocation OpLoc,
tok::TokenKind OpKind,
const CXXScopeSpec &SS,
TypeSourceInfo *ScopeTypeInfo,
SourceLocation CCLoc,
SourceLocation TildeLoc,
PseudoDestructorTypeStorage Destructed,
bool HasTrailingLParen) {
TypeSourceInfo *DestructedTypeInfo = Destructed.getTypeSourceInfo();
QualType ObjectType;
if (CheckArrow(*this, ObjectType, Base, OpKind, OpLoc))
return ExprError();
if (!ObjectType->isDependentType() && !ObjectType->isScalarType() &&
!ObjectType->isVectorType()) {
if (getLangOpts().MicrosoftMode && ObjectType->isVoidType())
Diag(OpLoc, diag::ext_pseudo_dtor_on_void) << Base->getSourceRange();
else
Diag(OpLoc, diag::err_pseudo_dtor_base_not_scalar)
<< ObjectType << Base->getSourceRange();
return ExprError();
}
// C++ [expr.pseudo]p2:
// [...] The cv-unqualified versions of the object type and of the type
// designated by the pseudo-destructor-name shall be the same type.
if (DestructedTypeInfo) {
QualType DestructedType = DestructedTypeInfo->getType();
SourceLocation DestructedTypeStart
= DestructedTypeInfo->getTypeLoc().getLocalSourceRange().getBegin();
if (!DestructedType->isDependentType() && !ObjectType->isDependentType()) {
if (!Context.hasSameUnqualifiedType(DestructedType, ObjectType)) {
Diag(DestructedTypeStart, diag::err_pseudo_dtor_type_mismatch)
<< ObjectType << DestructedType << Base->getSourceRange()
<< DestructedTypeInfo->getTypeLoc().getLocalSourceRange();
// Recover by setting the destructed type to the object type.
DestructedType = ObjectType;
DestructedTypeInfo = Context.getTrivialTypeSourceInfo(ObjectType,
DestructedTypeStart);
Destructed = PseudoDestructorTypeStorage(DestructedTypeInfo);
} else if (DestructedType.getObjCLifetime() !=
ObjectType.getObjCLifetime()) {
if (DestructedType.getObjCLifetime() == Qualifiers::OCL_None) {
// Okay: just pretend that the user provided the correctly-qualified
// type.
} else {
Diag(DestructedTypeStart, diag::err_arc_pseudo_dtor_inconstant_quals)
<< ObjectType << DestructedType << Base->getSourceRange()
<< DestructedTypeInfo->getTypeLoc().getLocalSourceRange();
}
// Recover by setting the destructed type to the object type.
DestructedType = ObjectType;
DestructedTypeInfo = Context.getTrivialTypeSourceInfo(ObjectType,
DestructedTypeStart);
Destructed = PseudoDestructorTypeStorage(DestructedTypeInfo);
}
}
}
// C++ [expr.pseudo]p2:
// [...] Furthermore, the two type-names in a pseudo-destructor-name of the
// form
//
// ::[opt] nested-name-specifier[opt] type-name :: ~ type-name
//
// shall designate the same scalar type.
if (ScopeTypeInfo) {
QualType ScopeType = ScopeTypeInfo->getType();
if (!ScopeType->isDependentType() && !ObjectType->isDependentType() &&
!Context.hasSameUnqualifiedType(ScopeType, ObjectType)) {
Diag(ScopeTypeInfo->getTypeLoc().getLocalSourceRange().getBegin(),
diag::err_pseudo_dtor_type_mismatch)
<< ObjectType << ScopeType << Base->getSourceRange()
<< ScopeTypeInfo->getTypeLoc().getLocalSourceRange();
ScopeType = QualType();
ScopeTypeInfo = 0;
}
}
Expr *Result
= new (Context) CXXPseudoDestructorExpr(Context, Base,
OpKind == tok::arrow, OpLoc,
SS.getWithLocInContext(Context),
ScopeTypeInfo,
CCLoc,
TildeLoc,
Destructed);
if (HasTrailingLParen)
return Owned(Result);
return DiagnoseDtorReference(Destructed.getLocation(), Result);
}
ExprResult Sema::ActOnPseudoDestructorExpr(Scope *S, Expr *Base,
SourceLocation OpLoc,
tok::TokenKind OpKind,
CXXScopeSpec &SS,
UnqualifiedId &FirstTypeName,
SourceLocation CCLoc,
SourceLocation TildeLoc,
UnqualifiedId &SecondTypeName,
bool HasTrailingLParen) {
assert((FirstTypeName.getKind() == UnqualifiedId::IK_TemplateId ||
FirstTypeName.getKind() == UnqualifiedId::IK_Identifier) &&
"Invalid first type name in pseudo-destructor");
assert((SecondTypeName.getKind() == UnqualifiedId::IK_TemplateId ||
SecondTypeName.getKind() == UnqualifiedId::IK_Identifier) &&
"Invalid second type name in pseudo-destructor");
QualType ObjectType;
if (CheckArrow(*this, ObjectType, Base, OpKind, OpLoc))
return ExprError();
// Compute the object type that we should use for name lookup purposes. Only
// record types and dependent types matter.
ParsedType ObjectTypePtrForLookup;
if (!SS.isSet()) {
if (ObjectType->isRecordType())
ObjectTypePtrForLookup = ParsedType::make(ObjectType);
else if (ObjectType->isDependentType())
ObjectTypePtrForLookup = ParsedType::make(Context.DependentTy);
}
// Convert the name of the type being destructed (following the ~) into a
// type (with source-location information).
QualType DestructedType;
TypeSourceInfo *DestructedTypeInfo = 0;
PseudoDestructorTypeStorage Destructed;
if (SecondTypeName.getKind() == UnqualifiedId::IK_Identifier) {
ParsedType T = getTypeName(*SecondTypeName.Identifier,
SecondTypeName.StartLocation,
S, &SS, true, false, ObjectTypePtrForLookup);
if (!T &&
((SS.isSet() && !computeDeclContext(SS, false)) ||
(!SS.isSet() && ObjectType->isDependentType()))) {
// The name of the type being destroyed is a dependent name, and we
// couldn't find anything useful in scope. Just store the identifier and
// it's location, and we'll perform (qualified) name lookup again at
// template instantiation time.
Destructed = PseudoDestructorTypeStorage(SecondTypeName.Identifier,
SecondTypeName.StartLocation);
} else if (!T) {
Diag(SecondTypeName.StartLocation,
diag::err_pseudo_dtor_destructor_non_type)
<< SecondTypeName.Identifier << ObjectType;
if (isSFINAEContext())
return ExprError();
// Recover by assuming we had the right type all along.
DestructedType = ObjectType;
} else
DestructedType = GetTypeFromParser(T, &DestructedTypeInfo);
} else {
// Resolve the template-id to a type.
TemplateIdAnnotation *TemplateId = SecondTypeName.TemplateId;
ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
TemplateId->NumArgs);
TypeResult T = ActOnTemplateIdType(TemplateId->SS,
TemplateId->TemplateKWLoc,
TemplateId->Template,
TemplateId->TemplateNameLoc,
TemplateId->LAngleLoc,
TemplateArgsPtr,
TemplateId->RAngleLoc);
if (T.isInvalid() || !T.get()) {
// Recover by assuming we had the right type all along.
DestructedType = ObjectType;
} else
DestructedType = GetTypeFromParser(T.get(), &DestructedTypeInfo);
}
// If we've performed some kind of recovery, (re-)build the type source
// information.
if (!DestructedType.isNull()) {
if (!DestructedTypeInfo)
DestructedTypeInfo = Context.getTrivialTypeSourceInfo(DestructedType,
SecondTypeName.StartLocation);
Destructed = PseudoDestructorTypeStorage(DestructedTypeInfo);
}
// Convert the name of the scope type (the type prior to '::') into a type.
TypeSourceInfo *ScopeTypeInfo = 0;
QualType ScopeType;
if (FirstTypeName.getKind() == UnqualifiedId::IK_TemplateId ||
FirstTypeName.Identifier) {
if (FirstTypeName.getKind() == UnqualifiedId::IK_Identifier) {
ParsedType T = getTypeName(*FirstTypeName.Identifier,
FirstTypeName.StartLocation,
S, &SS, true, false, ObjectTypePtrForLookup);
if (!T) {
Diag(FirstTypeName.StartLocation,
diag::err_pseudo_dtor_destructor_non_type)
<< FirstTypeName.Identifier << ObjectType;
if (isSFINAEContext())
return ExprError();
// Just drop this type. It's unnecessary anyway.
ScopeType = QualType();
} else
ScopeType = GetTypeFromParser(T, &ScopeTypeInfo);
} else {
// Resolve the template-id to a type.
TemplateIdAnnotation *TemplateId = FirstTypeName.TemplateId;
ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
TemplateId->NumArgs);
TypeResult T = ActOnTemplateIdType(TemplateId->SS,
TemplateId->TemplateKWLoc,
TemplateId->Template,
TemplateId->TemplateNameLoc,
TemplateId->LAngleLoc,
TemplateArgsPtr,
TemplateId->RAngleLoc);
if (T.isInvalid() || !T.get()) {
// Recover by dropping this type.
ScopeType = QualType();
} else
ScopeType = GetTypeFromParser(T.get(), &ScopeTypeInfo);
}
}
if (!ScopeType.isNull() && !ScopeTypeInfo)
ScopeTypeInfo = Context.getTrivialTypeSourceInfo(ScopeType,
FirstTypeName.StartLocation);
return BuildPseudoDestructorExpr(Base, OpLoc, OpKind, SS,
ScopeTypeInfo, CCLoc, TildeLoc,
Destructed, HasTrailingLParen);
}
ExprResult Sema::ActOnPseudoDestructorExpr(Scope *S, Expr *Base,
SourceLocation OpLoc,
tok::TokenKind OpKind,
SourceLocation TildeLoc,
const DeclSpec& DS,
bool HasTrailingLParen) {
QualType ObjectType;
if (CheckArrow(*this, ObjectType, Base, OpKind, OpLoc))
return ExprError();
QualType T = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc());
TypeLocBuilder TLB;
DecltypeTypeLoc DecltypeTL = TLB.push<DecltypeTypeLoc>(T);
DecltypeTL.setNameLoc(DS.getTypeSpecTypeLoc());
TypeSourceInfo *DestructedTypeInfo = TLB.getTypeSourceInfo(Context, T);
PseudoDestructorTypeStorage Destructed(DestructedTypeInfo);
return BuildPseudoDestructorExpr(Base, OpLoc, OpKind, CXXScopeSpec(),
0, SourceLocation(), TildeLoc,
Destructed, HasTrailingLParen);
}
ExprResult Sema::BuildCXXMemberCallExpr(Expr *E, NamedDecl *FoundDecl,
CXXConversionDecl *Method,
bool HadMultipleCandidates) {
if (Method->getParent()->isLambda() &&
Method->getConversionType()->isBlockPointerType()) {
// This is a lambda coversion to block pointer; check if the argument
// is a LambdaExpr.
Expr *SubE = E;
CastExpr *CE = dyn_cast<CastExpr>(SubE);
if (CE && CE->getCastKind() == CK_NoOp)
SubE = CE->getSubExpr();
SubE = SubE->IgnoreParens();
if (CXXBindTemporaryExpr *BE = dyn_cast<CXXBindTemporaryExpr>(SubE))
SubE = BE->getSubExpr();
if (isa<LambdaExpr>(SubE)) {
// For the conversion to block pointer on a lambda expression, we
// construct a special BlockLiteral instead; this doesn't really make
// a difference in ARC, but outside of ARC the resulting block literal
// follows the normal lifetime rules for block literals instead of being
// autoreleased.
DiagnosticErrorTrap Trap(Diags);
ExprResult Exp = BuildBlockForLambdaConversion(E->getExprLoc(),
E->getExprLoc(),
Method, E);
if (Exp.isInvalid())
Diag(E->getExprLoc(), diag::note_lambda_to_block_conv);
return Exp;
}
}
ExprResult Exp = PerformObjectArgumentInitialization(E, /*Qualifier=*/0,
FoundDecl, Method);
if (Exp.isInvalid())
return true;
MemberExpr *ME =
new (Context) MemberExpr(Exp.take(), /*IsArrow=*/false, Method,
SourceLocation(), Context.BoundMemberTy,
VK_RValue, OK_Ordinary);
if (HadMultipleCandidates)
ME->setHadMultipleCandidates(true);
MarkMemberReferenced(ME);
QualType ResultType = Method->getResultType();
ExprValueKind VK = Expr::getValueKindForType(ResultType);
ResultType = ResultType.getNonLValueExprType(Context);
CXXMemberCallExpr *CE =
new (Context) CXXMemberCallExpr(Context, ME, None, ResultType, VK,
Exp.get()->getLocEnd());
return CE;
}
ExprResult Sema::BuildCXXNoexceptExpr(SourceLocation KeyLoc, Expr *Operand,
SourceLocation RParen) {
CanThrowResult CanThrow = canThrow(Operand);
return Owned(new (Context) CXXNoexceptExpr(Context.BoolTy, Operand,
CanThrow, KeyLoc, RParen));
}
ExprResult Sema::ActOnNoexceptExpr(SourceLocation KeyLoc, SourceLocation,
Expr *Operand, SourceLocation RParen) {
return BuildCXXNoexceptExpr(KeyLoc, Operand, RParen);
}
static bool IsSpecialDiscardedValue(Expr *E) {
// In C++11, discarded-value expressions of a certain form are special,
// according to [expr]p10:
// The lvalue-to-rvalue conversion (4.1) is applied only if the
// expression is an lvalue of volatile-qualified type and it has
// one of the following forms:
E = E->IgnoreParens();
// - id-expression (5.1.1),
if (isa<DeclRefExpr>(E))
return true;
// - subscripting (5.2.1),
if (isa<ArraySubscriptExpr>(E))
return true;
// - class member access (5.2.5),
if (isa<MemberExpr>(E))
return true;
// - indirection (5.3.1),
if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E))
if (UO->getOpcode() == UO_Deref)
return true;
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) {
// - pointer-to-member operation (5.5),
if (BO->isPtrMemOp())
return true;
// - comma expression (5.18) where the right operand is one of the above.
if (BO->getOpcode() == BO_Comma)
return IsSpecialDiscardedValue(BO->getRHS());
}
// - conditional expression (5.16) where both the second and the third
// operands are one of the above, or
if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E))
return IsSpecialDiscardedValue(CO->getTrueExpr()) &&
IsSpecialDiscardedValue(CO->getFalseExpr());
// The related edge case of "*x ?: *x".
if (BinaryConditionalOperator *BCO =
dyn_cast<BinaryConditionalOperator>(E)) {
if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(BCO->getTrueExpr()))
return IsSpecialDiscardedValue(OVE->getSourceExpr()) &&
IsSpecialDiscardedValue(BCO->getFalseExpr());
}
// Objective-C++ extensions to the rule.
if (isa<PseudoObjectExpr>(E) || isa<ObjCIvarRefExpr>(E))
return true;
return false;
}
/// Perform the conversions required for an expression used in a
/// context that ignores the result.
ExprResult Sema::IgnoredValueConversions(Expr *E) {
if (E->hasPlaceholderType()) {
ExprResult result = CheckPlaceholderExpr(E);
if (result.isInvalid()) return Owned(E);
E = result.take();
}
// C99 6.3.2.1:
// [Except in specific positions,] an lvalue that does not have
// array type is converted to the value stored in the
// designated object (and is no longer an lvalue).
if (E->isRValue()) {
// In C, function designators (i.e. expressions of function type)
// are r-values, but we still want to do function-to-pointer decay
// on them. This is both technically correct and convenient for
// some clients.
if (!getLangOpts().CPlusPlus && E->getType()->isFunctionType())
return DefaultFunctionArrayConversion(E);
return Owned(E);
}
if (getLangOpts().CPlusPlus) {
// The C++11 standard defines the notion of a discarded-value expression;
// normally, we don't need to do anything to handle it, but if it is a
// volatile lvalue with a special form, we perform an lvalue-to-rvalue
// conversion.
if (getLangOpts().CPlusPlus11 && E->isGLValue() &&
E->getType().isVolatileQualified() &&
IsSpecialDiscardedValue(E)) {
ExprResult Res = DefaultLvalueConversion(E);
if (Res.isInvalid())
return Owned(E);
E = Res.take();
}
return Owned(E);
}
// GCC seems to also exclude expressions of incomplete enum type.
if (const EnumType *T = E->getType()->getAs<EnumType>()) {
if (!T->getDecl()->isComplete()) {
// FIXME: stupid workaround for a codegen bug!
E = ImpCastExprToType(E, Context.VoidTy, CK_ToVoid).take();
return Owned(E);
}
}
ExprResult Res = DefaultFunctionArrayLvalueConversion(E);
if (Res.isInvalid())
return Owned(E);
E = Res.take();
if (!E->getType()->isVoidType())
RequireCompleteType(E->getExprLoc(), E->getType(),
diag::err_incomplete_type);
return Owned(E);
}
// If we can unambiguously determine whether Var can never be used
// in a constant expression, return true.
// - if the variable and its initializer are non-dependent, then
// we can unambiguously check if the variable is a constant expression.
// - if the initializer is not value dependent - we can determine whether
// it can be used to initialize a constant expression. If Init can not
// be used to initialize a constant expression we conclude that Var can
// never be a constant expression.
// - FXIME: if the initializer is dependent, we can still do some analysis and
// identify certain cases unambiguously as non-const by using a Visitor:
// - such as those that involve odr-use of a ParmVarDecl, involve a new
// delete, lambda-expr, dynamic-cast, reinterpret-cast etc...
static inline bool VariableCanNeverBeAConstantExpression(VarDecl *Var,
ASTContext &Context) {
if (isa<ParmVarDecl>(Var)) return true;
const VarDecl *DefVD = 0;
// If there is no initializer - this can not be a constant expression.
if (!Var->getAnyInitializer(DefVD)) return true;
assert(DefVD);
if (DefVD->isWeak()) return false;
EvaluatedStmt *Eval = DefVD->ensureEvaluatedStmt();
-
+
Expr *Init = cast<Expr>(Eval->Value);
if (Var->getType()->isDependentType() || Init->isValueDependent()) {
- if (!Init->isValueDependent())
- return !DefVD->checkInitIsICE();
- // FIXME: We might still be able to do some analysis of Init here
- // to conclude that even in a dependent setting, Init can never
- // be a constexpr - but for now admit agnosticity.
+ // FIXME: Teach the constant evaluator to deal with the non-dependent parts
+ // of value-dependent expressions, and use it here to determine whether the
+ // initializer is a potential constant expression.
return false;
- }
+ }
+
return !IsVariableAConstantExpression(Var, Context);
}
/// \brief Check if the current lambda scope has any potential captures, and
/// whether they can be captured by any of the enclosing lambdas that are
/// ready to capture. If there is a lambda that can capture a nested
/// potential-capture, go ahead and do so. Also, check to see if any
/// variables are uncaptureable or do not involve an odr-use so do not
/// need to be captured.
static void CheckLambdaCaptures(Expr *const FE,
LambdaScopeInfo *const CurrentLSI, Sema &S) {
assert(!S.isUnevaluatedContext());
assert(S.CurContext->isDependentContext());
const bool IsFullExprInstantiationDependent =
FE->isInstantiationDependent();
// All the potentially captureable variables in the current nested
// lambda (within a generic outer lambda), must be captured by an
// outer lambda that is enclosed within a non-dependent context.
for (size_t I = 0, N = CurrentLSI->getNumPotentialVariableCaptures();
I != N; ++I) {
Expr *VarExpr = 0;
VarDecl *Var = 0;
CurrentLSI->getPotentialVariableCapture(I, Var, VarExpr);
//
if (CurrentLSI->isVariableExprMarkedAsNonODRUsed(VarExpr) &&
!IsFullExprInstantiationDependent)
continue;
// Climb up until we find a lambda that can capture:
// - a generic-or-non-generic lambda call operator that is enclosed
// within a non-dependent context.
unsigned FunctionScopeIndexOfCapturableLambda = 0;
if (GetInnermostEnclosingCapturableLambda(
S.FunctionScopes, FunctionScopeIndexOfCapturableLambda,
S.CurContext, Var, S)) {
MarkVarDeclODRUsed(Var, VarExpr->getExprLoc(),
S, &FunctionScopeIndexOfCapturableLambda);
}
const bool IsVarNeverAConstantExpression =
VariableCanNeverBeAConstantExpression(Var, S.Context);
if (!IsFullExprInstantiationDependent || IsVarNeverAConstantExpression) {
// This full expression is not instantiation dependent or the variable
// can not be used in a constant expression - which means
// this variable must be odr-used here, so diagnose a
// capture violation early, if the variable is un-captureable.
// This is purely for diagnosing errors early. Otherwise, this
// error would get diagnosed when the lambda becomes capture ready.
QualType CaptureType, DeclRefType;
SourceLocation ExprLoc = VarExpr->getExprLoc();
if (S.tryCaptureVariable(Var, ExprLoc, S.TryCapture_Implicit,
/*EllipsisLoc*/ SourceLocation(),
/*BuildAndDiagnose*/false, CaptureType,
DeclRefType, 0)) {
// We will never be able to capture this variable, and we need
// to be able to in any and all instantiations, so diagnose it.
S.tryCaptureVariable(Var, ExprLoc, S.TryCapture_Implicit,
/*EllipsisLoc*/ SourceLocation(),
/*BuildAndDiagnose*/true, CaptureType,
DeclRefType, 0);
}
}
}
if (CurrentLSI->hasPotentialThisCapture()) {
unsigned FunctionScopeIndexOfCapturableLambda = 0;
if (GetInnermostEnclosingCapturableLambda(
S.FunctionScopes, FunctionScopeIndexOfCapturableLambda,
S.CurContext, /*0 is 'this'*/ 0, S)) {
S.CheckCXXThisCapture(CurrentLSI->PotentialThisCaptureLocation,
/*Explicit*/false, /*BuildAndDiagnose*/true,
&FunctionScopeIndexOfCapturableLambda);
}
}
CurrentLSI->clearPotentialCaptures();
}
ExprResult Sema::ActOnFinishFullExpr(Expr *FE, SourceLocation CC,
bool DiscardedValue,
bool IsConstexpr,
bool IsLambdaInitCaptureInitializer) {
ExprResult FullExpr = Owned(FE);
if (!FullExpr.get())
return ExprError();
// If we are an init-expression in a lambdas init-capture, we should not
// diagnose an unexpanded pack now (will be diagnosed once lambda-expr
// containing full-expression is done).
// template<class ... Ts> void test(Ts ... t) {
// test([&a(t)]() { <-- (t) is an init-expr that shouldn't be diagnosed now.
// return a;
// }() ...);
// }
// FIXME: This is a hack. It would be better if we pushed the lambda scope
// when we parse the lambda introducer, and teach capturing (but not
// unexpanded pack detection) to walk over LambdaScopeInfos which don't have a
// corresponding class yet (that is, have LambdaScopeInfo either represent a
// lambda where we've entered the introducer but not the body, or represent a
// lambda where we've entered the body, depending on where the
// parser/instantiation has got to).
if (!IsLambdaInitCaptureInitializer &&
DiagnoseUnexpandedParameterPack(FullExpr.get()))
return ExprError();
// Top-level expressions default to 'id' when we're in a debugger.
if (DiscardedValue && getLangOpts().DebuggerCastResultToId &&
FullExpr.get()->getType() == Context.UnknownAnyTy) {
FullExpr = forceUnknownAnyToType(FullExpr.take(), Context.getObjCIdType());
if (FullExpr.isInvalid())
return ExprError();
}
if (DiscardedValue) {
FullExpr = CheckPlaceholderExpr(FullExpr.take());
if (FullExpr.isInvalid())
return ExprError();
FullExpr = IgnoredValueConversions(FullExpr.take());
if (FullExpr.isInvalid())
return ExprError();
}
CheckCompletedExpr(FullExpr.get(), CC, IsConstexpr);
// At the end of this full expression (which could be a deeply nested
// lambda), if there is a potential capture within the nested lambda,
// have the outer capture-able lambda try and capture it.
// Consider the following code:
// void f(int, int);
// void f(const int&, double);
// void foo() {
// const int x = 10, y = 20;
// auto L = [=](auto a) {
// auto M = [=](auto b) {
// f(x, b); <-- requires x to be captured by L and M
// f(y, a); <-- requires y to be captured by L, but not all Ms
// };
// };
// }
// FIXME: Also consider what happens for something like this that involves
// the gnu-extension statement-expressions or even lambda-init-captures:
// void f() {
// const int n = 0;
// auto L = [&](auto a) {
// +n + ({ 0; a; });
// };
// }
//
// Here, we see +n, and then the full-expression 0; ends, so we don't
// capture n (and instead remove it from our list of potential captures),
// and then the full-expression +n + ({ 0; }); ends, but it's too late
// for us to see that we need to capture n after all.
LambdaScopeInfo *const CurrentLSI = getCurLambda();
// FIXME: PR 17877 showed that getCurLambda() can return a valid pointer
// even if CurContext is not a lambda call operator. Refer to that Bug Report
// for an example of the code that might cause this asynchrony.
// By ensuring we are in the context of a lambda's call operator
// we can fix the bug (we only need to check whether we need to capture
// if we are within a lambda's body); but per the comments in that
// PR, a proper fix would entail :
// "Alternative suggestion:
// - Add to Sema an integer holding the smallest (outermost) scope
// index that we are *lexically* within, and save/restore/set to
// FunctionScopes.size() in InstantiatingTemplate's
// constructor/destructor.
// - Teach the handful of places that iterate over FunctionScopes to
// stop at the outermost enclosing lexical scope."
const bool IsInLambdaDeclContext = isLambdaCallOperator(CurContext);
if (IsInLambdaDeclContext && CurrentLSI &&
CurrentLSI->hasPotentialCaptures() && !FullExpr.isInvalid())
CheckLambdaCaptures(FE, CurrentLSI, *this);
return MaybeCreateExprWithCleanups(FullExpr);
}
StmtResult Sema::ActOnFinishFullStmt(Stmt *FullStmt) {
if (!FullStmt) return StmtError();
return MaybeCreateStmtWithCleanups(FullStmt);
}
Sema::IfExistsResult
Sema::CheckMicrosoftIfExistsSymbol(Scope *S,
CXXScopeSpec &SS,
const DeclarationNameInfo &TargetNameInfo) {
DeclarationName TargetName = TargetNameInfo.getName();
if (!TargetName)
return IER_DoesNotExist;
// If the name itself is dependent, then the result is dependent.
if (TargetName.isDependentName())
return IER_Dependent;
// Do the redeclaration lookup in the current scope.
LookupResult R(*this, TargetNameInfo, Sema::LookupAnyName,
Sema::NotForRedeclaration);
LookupParsedName(R, S, &SS);
R.suppressDiagnostics();
switch (R.getResultKind()) {
case LookupResult::Found:
case LookupResult::FoundOverloaded:
case LookupResult::FoundUnresolvedValue:
case LookupResult::Ambiguous:
return IER_Exists;
case LookupResult::NotFound:
return IER_DoesNotExist;
case LookupResult::NotFoundInCurrentInstantiation:
return IER_Dependent;
}
llvm_unreachable("Invalid LookupResult Kind!");
}
Sema::IfExistsResult
Sema::CheckMicrosoftIfExistsSymbol(Scope *S, SourceLocation KeywordLoc,
bool IsIfExists, CXXScopeSpec &SS,
UnqualifiedId &Name) {
DeclarationNameInfo TargetNameInfo = GetNameFromUnqualifiedId(Name);
// Check for unexpanded parameter packs.
SmallVector<UnexpandedParameterPack, 4> Unexpanded;
collectUnexpandedParameterPacks(SS, Unexpanded);
collectUnexpandedParameterPacks(TargetNameInfo, Unexpanded);
if (!Unexpanded.empty()) {
DiagnoseUnexpandedParameterPacks(KeywordLoc,
IsIfExists? UPPC_IfExists
: UPPC_IfNotExists,
Unexpanded);
return IER_Error;
}
return CheckMicrosoftIfExistsSymbol(S, SS, TargetNameInfo);
}
Index: head/contrib/llvm/tools/clang
===================================================================
--- head/contrib/llvm/tools/clang (revision 265924)
+++ head/contrib/llvm/tools/clang (revision 265925)
Property changes on: head/contrib/llvm/tools/clang
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /vendor/clang/dist:r260150-265892
Index: head/contrib/llvm
===================================================================
--- head/contrib/llvm (revision 265924)
+++ head/contrib/llvm (revision 265925)
Property changes on: head/contrib/llvm
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /vendor/llvm/dist:r260150-265892
Index: head/etc/mtree/BSD.include.dist
===================================================================
--- head/etc/mtree/BSD.include.dist (revision 265924)
+++ head/etc/mtree/BSD.include.dist (revision 265925)
@@ -1,342 +1,342 @@
# $FreeBSD$
#
# Please see the file src/etc/mtree/README before making changes to this file.
#
/set type=dir uname=root gname=wheel mode=0755
.
altq
..
arpa
..
bsm
..
bsnmp
..
c++
4.2
backward
..
bits
..
debug
..
ext
pb_ds
detail
basic_tree_policy
..
bin_search_tree_
..
binary_heap_
..
binomial_heap_
..
binomial_heap_base_
..
cc_hash_table_map_
..
eq_fn
..
gp_hash_table_map_
..
hash_fn
..
left_child_next_sibling_heap_
..
list_update_map_
..
list_update_policy
..
ov_tree_map_
..
pairing_heap_
..
pat_trie_
..
rb_tree_map_
..
rc_binomial_heap_
..
resize_policy
..
splay_tree_
..
thin_heap_
..
tree_policy
..
trie_policy
..
unordered_iterator
..
..
..
..
tr1
..
..
v1
experimental
..
ext
..
tr1
..
..
..
cam
ata
..
scsi
..
..
clang
- 3.4
+ 3.4.1
..
..
crypto
..
dev
acpica
..
agp
..
an
..
bktr
..
ciss
..
filemon
..
firewire
..
hwpmc
..
ic
..
ieee488
..
iicbus
..
io
..
lmc
..
mfi
..
mpt
mpilib
..
..
nand
..
nvme
..
ofw
..
pbio
..
pci
..
powermac_nvram
..
ppbus
..
smbus
..
speaker
..
usb
..
utopia
..
vkbd
..
wi
..
..
edit
readline
..
..
fs
devfs
..
fdescfs
..
msdosfs
..
nandfs
..
nfs
..
nullfs
..
procfs
..
smbfs
..
udf
..
unionfs
..
..
gcc
4.2
..
..
geom
cache
..
concat
..
eli
..
gate
..
journal
..
label
..
mirror
..
mountver
..
multipath
..
nop
..
raid
..
raid3
..
shsec
..
stripe
..
virstor
..
..
gnu
posix
..
..
gpib
..
gssapi
..
infiniband
complib
..
iba
..
opensm
..
vendor
..
..
isofs
cd9660
..
..
kadm5
..
krb5
..
libmilter
..
lzma
..
machine
pc
..
..
net
..
net80211
..
netgraph
atm
..
bluetooth
include
..
..
netflow
..
..
netinet
..
netinet6
..
netipsec
..
netnatm
api
..
msg
..
saal
..
sig
..
..
netpfil
pf
..
..
netsmb
..
nfs
..
nfsclient
..
nfsserver
..
openssl
..
pcap
..
protocols
..
rdma
..
readline
..
rpc
..
rpcsvc
..
security
audit
..
mac_biba
..
mac_bsdextended
..
mac_lomac
..
mac_mls
..
mac_partition
..
..
ssp
..
sys
..
teken
..
ufs
ffs
..
ufs
..
..
vm
..
xlocale
..
..
Index: head/lib/clang/include/Makefile
===================================================================
--- head/lib/clang/include/Makefile (revision 265924)
+++ head/lib/clang/include/Makefile (revision 265925)
@@ -1,50 +1,50 @@
# $FreeBSD$
.include <bsd.own.mk>
LLVM_SRCS= ${.CURDIR}/../../../contrib/llvm
.include "../clang.build.mk"
.PATH: ${LLVM_SRCS}/tools/clang/lib/Headers
-INCSDIR=${INCLUDEDIR}/clang/3.4
+INCSDIR=${INCLUDEDIR}/clang/3.4.1
INCS= __wmmintrin_aes.h \
__wmmintrin_pclmul.h \
altivec.h \
ammintrin.h \
avx2intrin.h \
avxintrin.h \
bmi2intrin.h \
bmiintrin.h \
cpuid.h \
emmintrin.h \
f16cintrin.h \
fma4intrin.h \
fmaintrin.h \
immintrin.h \
lzcntintrin.h \
mm3dnow.h \
mm_malloc.h \
mmintrin.h \
module.map \
nmmintrin.h \
pmmintrin.h \
popcntintrin.h \
prfchwintrin.h \
rdseedintrin.h \
rtmintrin.h \
shaintrin.h \
smmintrin.h \
tbmintrin.h \
tmmintrin.h \
wmmintrin.h \
x86intrin.h \
xmmintrin.h \
xopintrin.h \
${GENINCS}
GENINCS= arm_neon.h
CLEANFILES= ${GENINCS}
.include <bsd.prog.mk>
Index: head/lib/clang/include/clang/Basic/Version.inc
===================================================================
--- head/lib/clang/include/clang/Basic/Version.inc (revision 265924)
+++ head/lib/clang/include/clang/Basic/Version.inc (revision 265925)
@@ -1,10 +1,11 @@
/* $FreeBSD$ */
-#define CLANG_VERSION 3.4
-#define CLANG_VERSION_MAJOR 3
-#define CLANG_VERSION_MINOR 4
+#define CLANG_VERSION 3.4.1
+#define CLANG_VERSION_MAJOR 3
+#define CLANG_VERSION_MINOR 4
+#define CLANG_VERSION_PATCHLEVEL 1
-#define CLANG_VENDOR "FreeBSD "
-#define CLANG_VENDOR_SUFFIX " 20140216"
+#define CLANG_VENDOR "FreeBSD "
+#define CLANG_VENDOR_SUFFIX " 20140512"
-#define SVN_REVISION "197956"
+#define SVN_REVISION "208032"
Index: head/lib/clang/include/llvm/Config/config.h
===================================================================
--- head/lib/clang/include/llvm/Config/config.h (revision 265924)
+++ head/lib/clang/include/llvm/Config/config.h (revision 265925)
@@ -1,734 +1,719 @@
/* $FreeBSD$ */
/* include/llvm/Config/config.h. Generated from config.h.in by configure. */
/* include/llvm/Config/config.h.in. Generated from autoconf/configure.ac by autoheader. */
#ifndef CONFIG_H
#define CONFIG_H
/* Get __FreeBSD_version. */
#include <osreldate.h>
-/* Define if building universal (internal helper macro) */
-/* #undef AC_APPLE_UNIVERSAL_BUILD */
-
/* Bug report URL. */
#define BUG_REPORT_URL "http://llvm.org/bugs/"
/* Define if we have libxml2 */
/* #undef CLANG_HAVE_LIBXML */
/* Relative directory for resource files */
#define CLANG_RESOURCE_DIR ""
/* Directories clang will search for headers */
#define C_INCLUDE_DIRS ""
/* Default <path> to all compiler invocations for --sysroot=<path>. */
/* #undef DEFAULT_SYSROOT */
/* Define if you want backtraces on crash */
#define ENABLE_BACKTRACES 1
/* Define to enable crash handling overrides */
#define ENABLE_CRASH_OVERRIDES 1
/* Define if position independent code is enabled */
#define ENABLE_PIC 0
/* Define if timestamp information (e.g., __DATE__) is allowed */
#define ENABLE_TIMESTAMPS 0
/* Directory where gcc is installed. */
#define GCC_INSTALL_PREFIX ""
/* Define to 1 if you have the `arc4random' function. */
#define HAVE_ARC4RANDOM 1
/* Define to 1 if you have the `argz_append' function. */
/* #undef HAVE_ARGZ_APPEND */
/* Define to 1 if you have the `argz_create_sep' function. */
/* #undef HAVE_ARGZ_CREATE_SEP */
/* Define to 1 if you have the <argz.h> header file. */
/* #undef HAVE_ARGZ_H */
/* Define to 1 if you have the `argz_insert' function. */
/* #undef HAVE_ARGZ_INSERT */
/* Define to 1 if you have the `argz_next' function. */
/* #undef HAVE_ARGZ_NEXT */
/* Define to 1 if you have the `argz_stringify' function. */
/* #undef HAVE_ARGZ_STRINGIFY */
/* Define to 1 if you have the `backtrace' function. */
/* #undef HAVE_BACKTRACE */
/* Define to 1 if you have the `ceilf' function. */
#define HAVE_CEILF 1
/* Define if the neat program is available */
/* #undef HAVE_CIRCO */
/* Define to 1 if you have the `closedir' function. */
#define HAVE_CLOSEDIR 1
/* Define to 1 if you have the <CrashReporterClient.h> header file. */
/* #undef HAVE_CRASHREPORTERCLIENT_H */
/* can use __crashreporter_info__ */
#define HAVE_CRASHREPORTER_INFO 0
/* Define to 1 if you have the <cxxabi.h> header file. */
#define HAVE_CXXABI_H 1
/* Define to 1 if you have the declaration of `FE_ALL_EXCEPT', and to 0 if you
don't. */
#define HAVE_DECL_FE_ALL_EXCEPT 1
/* Define to 1 if you have the declaration of `FE_INEXACT', and to 0 if you
don't. */
#define HAVE_DECL_FE_INEXACT 1
/* Define to 1 if you have the declaration of `strerror_s', and to 0 if you
don't. */
#define HAVE_DECL_STRERROR_S 0
/* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
*/
#define HAVE_DIRENT_H 1
/* Define if you have the GNU dld library. */
/* #undef HAVE_DLD */
/* Define to 1 if you have the `dlerror' function. */
#define HAVE_DLERROR 1
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1
/* Define if dlopen() is available on this platform. */
#define HAVE_DLOPEN 1
/* Define if the dot program is available */
/* #undef HAVE_DOT */
/* Define if the dotty program is available */
/* #undef HAVE_DOTTY */
/* Define if you have the _dyld_func_lookup function. */
/* #undef HAVE_DYLD */
/* Define to 1 if you have the <errno.h> header file. */
#define HAVE_ERRNO_H 1
/* Define to 1 if the system has the type `error_t'. */
/* #undef HAVE_ERROR_T */
/* Define to 1 if you have the <execinfo.h> header file. */
/* #undef HAVE_EXECINFO_H */
/* Define to 1 if you have the `exp' function. */
#define HAVE_EXP 1
/* Define to 1 if you have the `exp2' function. */
#define HAVE_EXP2 1
/* Define to 1 if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H 1
/* Define if the neat program is available */
/* #undef HAVE_FDP */
/* Define to 1 if you have the <fenv.h> header file. */
#define HAVE_FENV_H 1
/* Define if libffi is available on this platform. */
/* #undef HAVE_FFI_CALL */
/* Define to 1 if you have the <ffi/ffi.h> header file. */
/* #undef HAVE_FFI_FFI_H */
/* Define to 1 if you have the <ffi.h> header file. */
/* #undef HAVE_FFI_H */
/* Set to 1 if the finite function is found in <ieeefp.h> */
/* #undef HAVE_FINITE_IN_IEEEFP_H */
/* Define to 1 if you have the `floorf' function. */
#define HAVE_FLOORF 1
/* Define to 1 if you have the `fmodf' function. */
#define HAVE_FMODF 1
/* Define to 1 if you have the `futimens' function. */
/* #undef HAVE_FUTIMENS */
/* Define to 1 if you have the `futimes' function. */
#define HAVE_FUTIMES 1
/* Define to 1 if you have the `getcwd' function. */
#define HAVE_GETCWD 1
/* Define to 1 if you have the `getpagesize' function. */
#define HAVE_GETPAGESIZE 1
/* Define to 1 if you have the `getrlimit' function. */
#define HAVE_GETRLIMIT 1
/* Define to 1 if you have the `getrusage' function. */
#define HAVE_GETRUSAGE 1
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY 1
/* Define if the Graphviz program is available */
/* #undef HAVE_GRAPHVIZ */
/* Define if the gv program is available */
/* #undef HAVE_GV */
/* Define to 1 if the system has the type `int64_t'. */
#define HAVE_INT64_T 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the `isatty' function. */
#define HAVE_ISATTY 1
/* Set to 1 if the isinf function is found in <cmath> */
#define HAVE_ISINF_IN_CMATH 1
/* Set to 1 if the isinf function is found in <math.h> */
#define HAVE_ISINF_IN_MATH_H 1
/* Set to 1 if the isnan function is found in <cmath> */
#define HAVE_ISNAN_IN_CMATH 1
/* Set to 1 if the isnan function is found in <math.h> */
#define HAVE_ISNAN_IN_MATH_H 1
/* Define if you have the libdl library or equivalent. */
#define HAVE_LIBDL 1
/* Define to 1 if you have the `imagehlp' library (-limagehlp). */
/* #undef HAVE_LIBIMAGEHLP */
/* Define to 1 if you have the `m' library (-lm). */
#define HAVE_LIBM 1
/* Define to 1 if you have the `psapi' library (-lpsapi). */
/* #undef HAVE_LIBPSAPI */
/* Define to 1 if you have the `pthread' library (-lpthread). */
/* #undef HAVE_LIBPTHREAD */
/* Define to 1 if you have the `shell32' library (-lshell32). */
/* #undef HAVE_LIBSHELL32 */
/* Define to 1 if you have the `udis86' library (-ludis86). */
/* #undef HAVE_LIBUDIS86 */
/* Define to 1 if you have the `z' library (-lz). */
#define HAVE_LIBZ 1
/* Define if you can use -rdynamic. */
#define HAVE_LINK_EXPORT_DYNAMIC 1
/* Define to 1 if you have the <link.h> header file. */
#define HAVE_LINK_H 1
/* Define if you can use -Wl,-R. to pass -R. to the linker, in order to add
the current directory to the dynamic linker search path. */
#define HAVE_LINK_R 1
/* Define to 1 if you have the `log' function. */
#define HAVE_LOG 1
/* Define to 1 if you have the `log10' function. */
#define HAVE_LOG10 1
/* Define to 1 if you have the `log2' function. */
#if __FreeBSD_version >= 900027 || (__FreeBSD_version < 900000 && __FreeBSD_version >= 802502)
#define HAVE_LOG2 1
#endif
/* Define to 1 if you have the `longjmp' function. */
#define HAVE_LONGJMP 1
/* Define to 1 if you have the <mach/mach.h> header file. */
/* #undef HAVE_MACH_MACH_H */
/* Define to 1 if you have the <mach-o/dyld.h> header file. */
/* #undef HAVE_MACH_O_DYLD_H */
/* Define if mallinfo() is available on this platform. */
/* #undef HAVE_MALLINFO */
/* Define to 1 if you have the <malloc.h> header file. */
/* #undef HAVE_MALLOC_H */
/* Define to 1 if you have the <malloc/malloc.h> header file. */
/* #undef HAVE_MALLOC_MALLOC_H */
/* Define to 1 if you have the `malloc_zone_statistics' function. */
/* #undef HAVE_MALLOC_ZONE_STATISTICS */
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the `mkdtemp' function. */
#define HAVE_MKDTEMP 1
/* Define to 1 if you have the `mkstemp' function. */
#define HAVE_MKSTEMP 1
/* Define to 1 if you have the `mktemp' function. */
#define HAVE_MKTEMP 1
/* Define to 1 if you have a working `mmap' system call. */
#define HAVE_MMAP 1
/* Define if mmap() uses MAP_ANONYMOUS to map anonymous pages, or undefine if
it uses MAP_ANON */
/* #undef HAVE_MMAP_ANONYMOUS */
/* Define if mmap() can map files into memory */
#define HAVE_MMAP_FILE
/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
/* #undef HAVE_NDIR_H */
/* Define to 1 if you have the `nearbyintf' function. */
#define HAVE_NEARBYINTF 1
/* Define if the neat program is available */
/* #undef HAVE_NEATO */
/* Define to 1 if you have the `opendir' function. */
#define HAVE_OPENDIR 1
/* Define to 1 if you have the `posix_spawn' function. */
/* #undef HAVE_POSIX_SPAWN */
/* Define to 1 if you have the `powf' function. */
#define HAVE_POWF 1
/* Define to 1 if you have the `pread' function. */
#define HAVE_PREAD 1
/* Define if libtool can extract symbol lists from object files. */
#define HAVE_PRELOADED_SYMBOLS 1
/* Define to have the %a format string */
#define HAVE_PRINTF_A 1
/* Have pthread_getspecific */
/* #undef HAVE_PTHREAD_GETSPECIFIC */
/* Define to 1 if you have the <pthread.h> header file. */
/* #undef HAVE_PTHREAD_H */
/* Have pthread_mutex_lock */
/* #undef HAVE_PTHREAD_MUTEX_LOCK */
/* Have pthread_rwlock_init */
/* #undef HAVE_PTHREAD_RWLOCK_INIT */
/* Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h> */
#define HAVE_RAND48 1
/* Define to 1 if you have the `readdir' function. */
#define HAVE_READDIR 1
/* Define to 1 if you have the `realpath' function. */
#define HAVE_REALPATH 1
/* Define to 1 if you have the `rintf' function. */
#define HAVE_RINTF 1
/* Define to 1 if you have the `round' function. */
#define HAVE_ROUND 1
/* Define to 1 if you have the `roundf' function. */
#define HAVE_ROUNDF 1
/* Define to 1 if you have the `sbrk' function. */
#define HAVE_SBRK 1
/* Define to 1 if you have the `setenv' function. */
#define HAVE_SETENV 1
/* Define to 1 if you have the `setjmp' function. */
#define HAVE_SETJMP 1
/* Define to 1 if you have the <setjmp.h> header file. */
#define HAVE_SETJMP_H 1
/* Define to 1 if you have the `setrlimit' function. */
#define HAVE_SETRLIMIT 1
/* Define if you have the shl_load function. */
/* #undef HAVE_SHL_LOAD */
/* Define to 1 if you have the `siglongjmp' function. */
#define HAVE_SIGLONGJMP 1
/* Define to 1 if you have the <signal.h> header file. */
#define HAVE_SIGNAL_H 1
/* Define to 1 if you have the `sigsetjmp' function. */
#define HAVE_SIGSETJMP 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Set to 1 if the std::isinf function is found in <cmath> */
#define HAVE_STD_ISINF_IN_CMATH 1
/* Set to 1 if the std::isnan function is found in <cmath> */
#define HAVE_STD_ISNAN_IN_CMATH 1
/* Define to 1 if you have the `strerror' function. */
#define HAVE_STRERROR 1
/* Define to 1 if you have the `strerror_r' function. */
#define HAVE_STRERROR_R 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the `strtof' function. */
#define HAVE_STRTOF 1
/* Define to 1 if you have the `strtoll' function. */
#define HAVE_STRTOLL 1
/* Define to 1 if you have the `strtoq' function. */
#define HAVE_STRTOQ 1
/* Define to 1 if you have the `sysconf' function. */
#define HAVE_SYSCONF 1
/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
*/
/* #undef HAVE_SYS_DIR_H */
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#define HAVE_SYS_IOCTL_H 1
/* Define to 1 if you have the <sys/mman.h> header file. */
#define HAVE_SYS_MMAN_H 1
/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
*/
/* #undef HAVE_SYS_NDIR_H */
/* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H 1
/* Define to 1 if you have the <sys/resource.h> header file. */
#define HAVE_SYS_RESOURCE_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <sys/uio.h> header file. */
#define HAVE_SYS_UIO_H 1
/* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
#define HAVE_SYS_WAIT_H 1
/* Define if the setupterm() function is supported this platform. */
#define HAVE_TERMINFO 1
/* Define to 1 if you have the <termios.h> header file. */
#define HAVE_TERMIOS_H 1
/* Define if the neat program is available */
/* #undef HAVE_TWOPI */
/* Define to 1 if the system has the type `uint64_t'. */
#define HAVE_UINT64_T 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to 1 if you have the <utime.h> header file. */
#define HAVE_UTIME_H 1
/* Define to 1 if the system has the type `u_int64_t'. */
/* #undef HAVE_U_INT64_T */
/* Define to 1 if you have the <valgrind/valgrind.h> header file. */
/* #undef HAVE_VALGRIND_VALGRIND_H */
/* Define to 1 if you have the `writev' function. */
#define HAVE_WRITEV 1
/* Define if the xdot program is available */
/* #undef HAVE_XDOT */
/* Define to 1 if you have the <zlib.h> header file. */
#define HAVE_ZLIB_H 1
/* Have host's _alloca */
/* #undef HAVE__ALLOCA */
/* Have host's __alloca */
/* #undef HAVE___ALLOCA */
/* Have host's __ashldi3 */
/* #undef HAVE___ASHLDI3 */
/* Have host's __ashrdi3 */
/* #undef HAVE___ASHRDI3 */
/* Have host's __chkstk */
/* #undef HAVE___CHKSTK */
/* Have host's __cmpdi2 */
/* #undef HAVE___CMPDI2 */
/* Have host's __divdi3 */
/* #undef HAVE___DIVDI3 */
/* Define to 1 if you have the `__dso_handle' function. */
#define HAVE___DSO_HANDLE 1
/* Have host's __fixdfdi */
/* #undef HAVE___FIXDFDI */
/* Have host's __fixsfdi */
/* #undef HAVE___FIXSFDI */
/* Have host's __floatdidf */
/* #undef HAVE___FLOATDIDF */
/* Have host's __lshrdi3 */
/* #undef HAVE___LSHRDI3 */
/* Have host's __main */
/* #undef HAVE___MAIN */
/* Have host's __moddi3 */
/* #undef HAVE___MODDI3 */
/* Have host's __udivdi3 */
/* #undef HAVE___UDIVDI3 */
/* Have host's __umoddi3 */
/* #undef HAVE___UMODDI3 */
/* Have host's ___chkstk */
/* #undef HAVE____CHKSTK */
/* Linker version detected at compile time. */
/* #undef HOST_LINK_VERSION */
/* Installation directory for binary executables */
/* #undef LLVM_BINDIR */
/* Time at which LLVM was configured */
/* #undef LLVM_CONFIGTIME */
/* Installation directory for data files */
/* #undef LLVM_DATADIR */
/* Target triple LLVM will generate code for by default */
/* #undef LLVM_DEFAULT_TARGET_TRIPLE */
/* Installation directory for documentation */
/* #undef LLVM_DOCSDIR */
/* Define if threads enabled */
#define LLVM_ENABLE_THREADS 0
/* Define if zlib is enabled */
#define LLVM_ENABLE_ZLIB 1
/* Installation directory for config files */
/* #undef LLVM_ETCDIR */
/* Has gcc/MSVC atomic intrinsics */
#define LLVM_HAS_ATOMICS 0
/* Host triple LLVM will be executed on */
/* #undef LLVM_HOST_TRIPLE */
/* Installation directory for include files */
/* #undef LLVM_INCLUDEDIR */
/* Installation directory for .info files */
/* #undef LLVM_INFODIR */
/* Installation directory for man pages */
/* #undef LLVM_MANDIR */
/* LLVM architecture name for the native architecture, if available */
#define LLVM_NATIVE_ARCH X86
/* LLVM name for the native AsmParser init function, if available */
#define LLVM_NATIVE_ASMPARSER LLVMInitializeX86AsmParser
/* LLVM name for the native AsmPrinter init function, if available */
#define LLVM_NATIVE_ASMPRINTER LLVMInitializeX86AsmPrinter
/* LLVM name for the native Disassembler init function, if available */
#define LLVM_NATIVE_DISASSEMBLER LLVMInitializeX86Disassembler
/* LLVM name for the native Target init function, if available */
#define LLVM_NATIVE_TARGET LLVMInitializeX86Target
/* LLVM name for the native TargetInfo init function, if available */
#define LLVM_NATIVE_TARGETINFO LLVMInitializeX86TargetInfo
/* LLVM name for the native target MC init function, if available */
#define LLVM_NATIVE_TARGETMC LLVMInitializeX86TargetMC
/* Define if this is Unixish platform */
#define LLVM_ON_UNIX 1
/* Define if this is Win32ish platform */
/* #undef LLVM_ON_WIN32 */
/* Define to path to circo program if found or 'echo circo' otherwise */
/* #undef LLVM_PATH_CIRCO */
/* Define to path to dot program if found or 'echo dot' otherwise */
/* #undef LLVM_PATH_DOT */
/* Define to path to dotty program if found or 'echo dotty' otherwise */
/* #undef LLVM_PATH_DOTTY */
/* Define to path to fdp program if found or 'echo fdp' otherwise */
/* #undef LLVM_PATH_FDP */
/* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */
/* #undef LLVM_PATH_GRAPHVIZ */
/* Define to path to gv program if found or 'echo gv' otherwise */
/* #undef LLVM_PATH_GV */
/* Define to path to neato program if found or 'echo neato' otherwise */
/* #undef LLVM_PATH_NEATO */
/* Define to path to twopi program if found or 'echo twopi' otherwise */
/* #undef LLVM_PATH_TWOPI */
/* Define to path to xdot program if found or 'echo xdot' otherwise */
/* #undef LLVM_PATH_XDOT */
/* Installation prefix directory */
-#define LLVM_PREFIX ""
+#define LLVM_PREFIX "/usr"
/* Define if we have the Intel JIT API runtime support library */
#define LLVM_USE_INTEL_JITEVENTS 0
/* Define if we have the oprofile JIT-support library */
#define LLVM_USE_OPROFILE 0
/* Major version of the LLVM API */
#define LLVM_VERSION_MAJOR 3
/* Minor version of the LLVM API */
#define LLVM_VERSION_MINOR 4
+/* Patch version of the LLVM API */
+#define LLVM_VERSION_PATCH 1
+
/* Define if the OS needs help to load dependent libraries for dlopen(). */
#define LTDL_DLOPEN_DEPLIBS 1
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#define LTDL_OBJDIR ".libs/"
/* Define to the extension used for shared libraries, say, ".so". */
#define LTDL_SHLIB_EXT ".so"
/* Define to the system default library search path. */
#define LTDL_SYSSEARCHPATH "/lib:/usr/lib"
/* Define if /dev/zero should be used when mapping RWX memory, or undefine if
its not necessary */
/* #undef NEED_DEV_ZERO_FOR_MMAP */
/* Define if dlsym() requires a leading underscore in symbol names. */
/* #undef NEED_USCORE */
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "http://llvm.org/bugs/"
/* Define to the full name of this package. */
#define PACKAGE_NAME "LLVM"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "LLVM 3.4"
+#define PACKAGE_STRING "LLVM 3.4.1"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "llvm"
-/* Define to the home page for this package. */
-/* #undef PACKAGE_URL */
-
/* Define to the version of this package. */
-#define PACKAGE_VERSION "3.4"
+#define PACKAGE_VERSION "3.4.1"
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE void
/* Define to 1 if the `S_IS*' macros in <sys/stat.h> do not work properly. */
/* #undef STAT_MACROS_BROKEN */
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#define TIME_WITH_SYS_TIME 1
/* Define to 1 if your <sys/time.h> declares `struct tm'. */
/* #undef TM_IN_SYS_TIME */
/* Define if use udis86 library */
#define USE_UDIS86 0
/* Type of 1st arg on ELM Callback */
/* #undef WIN32_ELMCB_PCSTR */
-
-/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
- significant byte first (like Motorola and SPARC, unlike Intel). */
-#if defined AC_APPLE_UNIVERSAL_BUILD
-# if defined __BIG_ENDIAN__
-# define WORDS_BIGENDIAN 1
-# endif
-#else
-# ifndef WORDS_BIGENDIAN
-/* # undef WORDS_BIGENDIAN */
-# endif
-#endif
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
/* Define to a type to use for `error_t' if it is not otherwise available. */
#define error_t int
/* Define to `int' if <sys/types.h> does not define. */
/* #undef pid_t */
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */
#endif
Index: head/lib/clang/include/llvm/Config/llvm-config.h
===================================================================
--- head/lib/clang/include/llvm/Config/llvm-config.h (revision 265924)
+++ head/lib/clang/include/llvm/Config/llvm-config.h (revision 265925)
@@ -1,126 +1,126 @@
/* $FreeBSD$ */
/* include/llvm/Config/llvm-config.h. Generated from llvm-config.h.in by configure. */
/*===-- llvm/config/llvm-config.h - llvm configure variable -------*- C -*-===*/
/* */
/* The LLVM Compiler Infrastructure */
/* */
/* This file is distributed under the University of Illinois Open Source */
/* License. See LICENSE.TXT for details. */
/* */
/*===----------------------------------------------------------------------===*/
/* This file enumerates all of the llvm variables from configure so that
they can be in exported headers and won't override package specific
directives. This is a C file so we can include it in the llvm-c headers. */
/* To avoid multiple inclusions of these variables when we include the exported
headers and config.h, conditionally include these. */
/* TODO: This is a bit of a hack. */
#ifndef CONFIG_H
/* Installation directory for binary executables */
/* #undef LLVM_BINDIR */
/* Time at which LLVM was configured */
/* #undef LLVM_CONFIGTIME */
/* Installation directory for data files */
/* #undef LLVM_DATADIR */
/* Target triple LLVM will generate code for by default */
/* #undef LLVM_DEFAULT_TARGET_TRIPLE */
/* Installation directory for documentation */
/* #undef LLVM_DOCSDIR */
/* Define if threads enabled */
#define LLVM_ENABLE_THREADS 0
/* Installation directory for config files */
/* #undef LLVM_ETCDIR */
/* Has gcc/MSVC atomic intrinsics */
#define LLVM_HAS_ATOMICS 0
/* Host triple LLVM will be executed on */
/* #undef LLVM_HOST_TRIPLE */
/* Installation directory for include files */
/* #undef LLVM_INCLUDEDIR */
/* Installation directory for .info files */
/* #undef LLVM_INFODIR */
/* Installation directory for man pages */
/* #undef LLVM_MANDIR */
/* LLVM architecture name for the native architecture, if available */
#define LLVM_NATIVE_ARCH X86
/* LLVM name for the native AsmParser init function, if available */
#define LLVM_NATIVE_ASMPARSER LLVMInitializeX86AsmParser
/* LLVM name for the native AsmPrinter init function, if available */
#define LLVM_NATIVE_ASMPRINTER LLVMInitializeX86AsmPrinter
/* LLVM name for the native Disassembler init function, if available */
#define LLVM_NATIVE_DISASSEMBLER LLVMInitializeX86Disassembler
/* LLVM name for the native Target init function, if available */
#define LLVM_NATIVE_TARGET LLVMInitializeX86Target
/* LLVM name for the native TargetInfo init function, if available */
#define LLVM_NATIVE_TARGETINFO LLVMInitializeX86TargetInfo
/* LLVM name for the native target MC init function, if available */
#define LLVM_NATIVE_TARGETMC LLVMInitializeX86TargetMC
/* Define if this is Unixish platform */
#define LLVM_ON_UNIX 1
/* Define if this is Win32ish platform */
/* #undef LLVM_ON_WIN32 */
/* Define to path to circo program if found or 'echo circo' otherwise */
/* #undef LLVM_PATH_CIRCO */
/* Define to path to dot program if found or 'echo dot' otherwise */
/* #undef LLVM_PATH_DOT */
/* Define to path to dotty program if found or 'echo dotty' otherwise */
/* #undef LLVM_PATH_DOTTY */
/* Define to path to fdp program if found or 'echo fdp' otherwise */
/* #undef LLVM_PATH_FDP */
/* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */
/* #undef LLVM_PATH_GRAPHVIZ */
/* Define to path to gv program if found or 'echo gv' otherwise */
/* #undef LLVM_PATH_GV */
/* Define to path to neato program if found or 'echo neato' otherwise */
/* #undef LLVM_PATH_NEATO */
/* Define to path to twopi program if found or 'echo twopi' otherwise */
/* #undef LLVM_PATH_TWOPI */
/* Define to path to xdot.py program if found or 'echo xdot.py' otherwise */
/* #undef LLVM_PATH_XDOT_PY */
/* Installation prefix directory */
-#define LLVM_PREFIX ""
+#define LLVM_PREFIX "/usr"
/* Define if we have the Intel JIT API runtime support library */
#define LLVM_USE_INTEL_JITEVENTS 0
/* Define if we have the oprofile JIT-support library */
#define LLVM_USE_OPROFILE 0
/* Major version of the LLVM API */
#define LLVM_VERSION_MAJOR 3
/* Minor version of the LLVM API */
#define LLVM_VERSION_MINOR 4
#endif
Index: head/tools/build/mk/OptionalObsoleteFiles.inc
===================================================================
--- head/tools/build/mk/OptionalObsoleteFiles.inc (revision 265924)
+++ head/tools/build/mk/OptionalObsoleteFiles.inc (revision 265925)
@@ -1,4666 +1,4666 @@
#
# $FreeBSD$
#
# This file add support for the WITHOUT_* and WITH_* knobs in src.conf(5) to
# the check-old and delete-old* targets.
#
.if ${MK_ACCT} == no
OLD_FILES+=etc/periodic/daily/310.accounting
OLD_FILES+=usr/sbin/accton
OLD_FILES+=usr/sbin/sa
OLD_FILES+=usr/share/man/man8/accton.8.gz
OLD_FILES+=usr/share/man/man8/sa.8.gz
.endif
.if ${MK_ACPI} == no
OLD_FILES+=usr/sbin/acpiconf
OLD_FILES+=usr/sbin/acpidb
OLD_FILES+=usr/sbin/acpidump
OLD_FILES+=usr/sbin/iasl
OLD_FILES+=usr/share/man/man8/acpiconf.8.gz
OLD_FILES+=usr/share/man/man8/acpidb.8.gz
OLD_FILES+=usr/share/man/man8/acpidump.8.gz
OLD_FILES+=usr/share/man/man8/iasl.8.gz
.endif
.if ${MK_AMD} == no
OLD_FILES+=etc/amd.map
OLD_FILES+=usr/bin/pawd
OLD_FILES+=usr/sbin/amd
OLD_FILES+=usr/sbin/amq
OLD_FILES+=usr/sbin/fixmount
OLD_FILES+=usr/sbin/fsinfo
OLD_FILES+=usr/sbin/hlfsd
OLD_FILES+=usr/sbin/mk-amd-map
OLD_FILES+=usr/sbin/wire-test
OLD_FILES+=usr/share/examples/etc/amd.map
OLD_FILES+=usr/share/info/am-utils.info.gz
OLD_FILES+=usr/share/man/man1/pawd.1.gz
OLD_FILES+=usr/share/man/man5/amd.conf.5.gz
OLD_FILES+=usr/share/man/man8/amd.8.gz
OLD_FILES+=usr/share/man/man8/amq.8.gz
OLD_FILES+=usr/share/man/man8/fixmount.8.gz
OLD_FILES+=usr/share/man/man8/fsinfo.8.gz
OLD_FILES+=usr/share/man/man8/hlfsd.8.gz
OLD_FILES+=usr/share/man/man8/mk-amd-map.8.gz
OLD_FILES+=usr/share/man/man8/wire-test.8.gz
.endif
.if ${MK_APM} == no
OLD_FILES+=etc/apmd.conf
OLD_FILES+=usr/sbin/apm
OLD_FILES+=usr/share/examples/etc/apmd.conf
OLD_FILES+=usr/share/man/man8/amd64/apm.8.gz
OLD_FILES+=usr/share/man/man8/amd64/apmconf.8.gz
.endif
.if ${MK_AT} == no
OLD_FILES+=usr/bin/at
OLD_FILES+=usr/bin/atq
OLD_FILES+=usr/bin/atrm
OLD_FILES+=usr/bin/batch
OLD_FILES+=usr/libexec/atrun
OLD_FILES+=usr/share/man/man1/at.1.gz
OLD_FILES+=usr/share/man/man1/atq.1.gz
OLD_FILES+=usr/share/man/man1/atrm.1.gz
OLD_FILES+=usr/share/man/man1/batch.1.gz
OLD_FILES+=usr/share/man/man8/atrun.8.gz
.endif
.if ${MK_ATM} == no
OLD_FILES+=rescue/atmconfig
OLD_FILES+=sbin/atmconfig
OLD_FILES+=usr/bin/sscop
OLD_FILES+=usr/include/bsnmp/snmp_atm.h
OLD_FILES+=usr/include/netnatm/addr.h
OLD_FILES+=usr/include/netnatm/api/atmapi.h
OLD_FILES+=usr/include/netnatm/api/ccatm.h
OLD_FILES+=usr/include/netnatm/api/unisap.h
OLD_DIRS+=usr/include/netnatm/api
OLD_FILES+=usr/include/netnatm/msg/uni_config.h
OLD_FILES+=usr/include/netnatm/msg/uni_hdr.h
OLD_FILES+=usr/include/netnatm/msg/uni_ie.h
OLD_FILES+=usr/include/netnatm/msg/uni_msg.h
OLD_FILES+=usr/include/netnatm/msg/unimsglib.h
OLD_FILES+=usr/include/netnatm/msg/uniprint.h
OLD_FILES+=usr/include/netnatm/msg/unistruct.h
OLD_DIRS+=usr/include/netnatm/msg
OLD_FILES+=usr/include/netnatm/saal/sscfu.h
OLD_FILES+=usr/include/netnatm/saal/sscfudef.h
OLD_FILES+=usr/include/netnatm/saal/sscop.h
OLD_FILES+=usr/include/netnatm/saal/sscopdef.h
OLD_DIRS+=usr/include/netnatm/saal
OLD_FILES+=usr/include/netnatm/sig/uni.h
OLD_FILES+=usr/include/netnatm/sig/unidef.h
OLD_FILES+=usr/include/netnatm/sig/unisig.h
OLD_DIRS+=usr/include/netnatm/sig
OLD_FILES+=usr/include/netnatm/unimsg.h
OLD_FILES+=usr/lib/libngatm.a
OLD_FILES+=usr/lib/libngatm.so
OLD_LIBS+=usr/lib/libngatm.so.4
OLD_FILES+=usr/lib/libngatm_p.a
OLD_FILES+=usr/lib/snmp_atm.so
OLD_LIBS+=usr/lib/snmp_atm.so.6
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/libngatm.a
OLD_FILES+=usr/lib32/libngatm.so
OLD_LIBS+=usr/lib32/libngatm.so.4
OLD_FILES+=usr/lib32/libngatm_p.a
.endif
OLD_FILES+=usr/share/doc/atm/atmconfig.help
OLD_FILES+=usr/share/doc/atm/atmconfig_device.help
OLD_DIRS+=usr/share/doc/atm
OLD_FILES+=usr/share/man/man1/sscop.1.gz
OLD_FILES+=usr/share/man/man3/libngatm.3.gz
OLD_FILES+=usr/share/man/man3/snmp_atm.3.gz
OLD_FILES+=usr/share/man/man3/uniaddr.3.gz
OLD_FILES+=usr/share/man/man3/unifunc.3.gz
OLD_FILES+=usr/share/man/man3/unimsg.3.gz
OLD_FILES+=usr/share/man/man3/unisap.3.gz
OLD_FILES+=usr/share/man/man3/unistruct.3.gz
OLD_FILES+=usr/share/man/man8/atmconfig.8.gz
OLD_FILES+=usr/share/snmp/defs/atm_freebsd.def
OLD_FILES+=usr/share/snmp/defs/atm_tree.def
OLD_FILES+=usr/share/snmp/mibs/BEGEMOT-ATM-FREEBSD-MIB.txt
OLD_FILES+=usr/share/snmp/mibs/BEGEMOT-ATM.txt
.endif
.if ${MK_AUDIT} == no
OLD_FILES+=usr/sbin/audit
OLD_FILES+=usr/sbin/auditd
OLD_FILES+=usr/sbin/auditreduce
OLD_FILES+=usr/sbin/praudit
OLD_FILES+=usr/share/man/man1/auditreduce.1.gz
OLD_FILES+=usr/share/man/man1/praudit.1.gz
OLD_FILES+=usr/share/man/man8/audit.8.gz
OLD_FILES+=usr/share/man/man8/auditd.8.gz
.endif
.if ${MK_AUTHPF} == no
OLD_FILES+=usr/sbin/authpf
OLD_FILES+=usr/sbin/authpf-noip
OLD_FILES+=usr/share/man/man8/authpf.8.gz
OLD_FILES+=usr/share/man/man8/authpf-noip.8.gz
.endif
.if ${MK_BLUETOOTH} == no
OLD_FILES+=etc/bluetooth/hcsecd.conf
OLD_FILES+=etc/bluetooth/hosts
OLD_FILES+=etc/bluetooth/protocols
OLD_DIRS+=etc/bluetooth
OLD_FILES+=usr/bin/bthost
OLD_FILES+=usr/bin/btsockstat
OLD_FILES+=usr/bin/rfcomm_sppd
OLD_FILES+=usr/include/bluetooth.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_bluetooth.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_bt3c.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_btsocket.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_btsocket_hci_raw.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_btsocket_l2cap.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_btsocket_rfcomm.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_btsocket_sco.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_h4.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_hci.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_l2cap.h
OLD_FILES+=usr/include/netgraph/bluetooth/include/ng_ubt.h
OLD_DIRS+=usr/include/netgraph/bluetooth/include
OLD_DIRS+=usr/include/netgraph/bluetooth
OLD_FILES+=usr/include/sdp.h
OLD_FILES+=usr/lib/libbluetooth.a
OLD_FILES+=usr/lib/libbluetooth.so
OLD_LIBS+=usr/lib/libbluetooth.so.4
OLD_FILES+=usr/lib/libbluetooth_p.a
OLD_FILES+=usr/lib/libsdp.a
OLD_FILES+=usr/lib/libsdp.so
OLD_LIBS+=usr/lib/libsdp.so.4
OLD_FILES+=usr/lib/libsdp_p.a
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/libbluetooth.a
OLD_FILES+=usr/lib32/libbluetooth.so
OLD_LIBS+=usr/lib32/libbluetooth.so.4
OLD_FILES+=usr/lib32/libbluetooth_p.a
OLD_FILES+=usr/lib32/libsdp.a
OLD_FILES+=usr/lib32/libsdp.so
OLD_LIBS+=usr/lib32/libsdp.so.4
OLD_FILES+=usr/lib32/libsdp_p.a
.endif
OLD_FILES+=usr/sbin/bcmfw
OLD_FILES+=usr/sbin/bt3cfw
OLD_FILES+=usr/sbin/bthidcontrol
OLD_FILES+=usr/sbin/bthidd
OLD_FILES+=usr/sbin/btpand
OLD_FILES+=usr/sbin/hccontrol
OLD_FILES+=usr/sbin/hcsecd
OLD_FILES+=usr/sbin/hcseriald
OLD_FILES+=usr/sbin/l2control
OLD_FILES+=usr/sbin/l2ping
OLD_FILES+=usr/sbin/rfcomm_pppd
OLD_FILES+=usr/sbin/sdpcontrol
OLD_FILES+=usr/sbin/sdpd
OLD_FILES+=usr/share/man/man1/bthost.1.gz
OLD_FILES+=usr/share/man/man1/btsockstat.1.gz
OLD_FILES+=usr/share/man/man1/rfcomm_sppd.1.gz
OLD_FILES+=usr/share/man/man3/SDP_GET128.3.gz
OLD_FILES+=usr/share/man/man3/SDP_GET16.3.gz
OLD_FILES+=usr/share/man/man3/SDP_GET32.3.gz
OLD_FILES+=usr/share/man/man3/SDP_GET64.3.gz
OLD_FILES+=usr/share/man/man3/SDP_GET8.3.gz
OLD_FILES+=usr/share/man/man3/SDP_PUT128.3.gz
OLD_FILES+=usr/share/man/man3/SDP_PUT16.3.gz
OLD_FILES+=usr/share/man/man3/SDP_PUT32.3.gz
OLD_FILES+=usr/share/man/man3/SDP_PUT64.3.gz
OLD_FILES+=usr/share/man/man3/SDP_PUT8.3.gz
OLD_FILES+=usr/share/man/man3/bdaddr_any.3.gz
OLD_FILES+=usr/share/man/man3/bdaddr_copy.3.gz
OLD_FILES+=usr/share/man/man3/bdaddr_same.3.gz
OLD_FILES+=usr/share/man/man3/bluetooth.3.gz
OLD_FILES+=usr/share/man/man3/bt_aton.3.gz
OLD_FILES+=usr/share/man/man3/bt_devaddr.3.gz
OLD_FILES+=usr/share/man/man3/bt_devclose.3.gz
OLD_FILES+=usr/share/man/man3/bt_devenum.3.gz
OLD_FILES+=usr/share/man/man3/bt_devfilter.3.gz
OLD_FILES+=usr/share/man/man3/bt_devfilter_evt_clr.3.gz
OLD_FILES+=usr/share/man/man3/bt_devfilter_evt_set.3.gz
OLD_FILES+=usr/share/man/man3/bt_devfilter_evt_tst.3.gz
OLD_FILES+=usr/share/man/man3/bt_devfilter_pkt_clr.3.gz
OLD_FILES+=usr/share/man/man3/bt_devfilter_pkt_set.3.gz
OLD_FILES+=usr/share/man/man3/bt_devfilter_pkt_tst.3.gz
OLD_FILES+=usr/share/man/man3/bt_devinfo.3.gz
OLD_FILES+=usr/share/man/man3/bt_devinquiry.3.gz
OLD_FILES+=usr/share/man/man3/bt_devname.3.gz
OLD_FILES+=usr/share/man/man3/bt_devopen.3.gz
OLD_FILES+=usr/share/man/man3/bt_devreq.3.gz
OLD_FILES+=usr/share/man/man3/bt_devsend.3.gz
OLD_FILES+=usr/share/man/man3/bt_endhostent.3.gz
OLD_FILES+=usr/share/man/man3/bt_endprotoent.3.gz
OLD_FILES+=usr/share/man/man3/bt_gethostbyaddr.3.gz
OLD_FILES+=usr/share/man/man3/bt_gethostbyname.3.gz
OLD_FILES+=usr/share/man/man3/bt_gethostent.3.gz
OLD_FILES+=usr/share/man/man3/bt_getprotobyname.3.gz
OLD_FILES+=usr/share/man/man3/bt_getprotobynumber.3.gz
OLD_FILES+=usr/share/man/man3/bt_getprotoent.3.gz
OLD_FILES+=usr/share/man/man3/bt_ntoa.3.gz
OLD_FILES+=usr/share/man/man3/bt_sethostent.3.gz
OLD_FILES+=usr/share/man/man3/bt_setprotoent.3.gz
OLD_FILES+=usr/share/man/man3/sdp.3.gz
OLD_FILES+=usr/share/man/man3/sdp_attr2desc.3.gz
OLD_FILES+=usr/share/man/man3/sdp_change_service.3.gz
OLD_FILES+=usr/share/man/man3/sdp_close.3.gz
OLD_FILES+=usr/share/man/man3/sdp_error.3.gz
OLD_FILES+=usr/share/man/man3/sdp_open.3.gz
OLD_FILES+=usr/share/man/man3/sdp_open_local.3.gz
OLD_FILES+=usr/share/man/man3/sdp_register_service.3.gz
OLD_FILES+=usr/share/man/man3/sdp_search.3.gz
OLD_FILES+=usr/share/man/man3/sdp_unregister_service.3.gz
OLD_FILES+=usr/share/man/man3/sdp_uuid2desc.3.gz
OLD_FILES+=usr/share/man/man5/hcsecd.conf.5.gz
OLD_FILES+=usr/share/man/man8/bcmfw.8.gz
OLD_FILES+=usr/share/man/man8/bt3cfw.8.gz
OLD_FILES+=usr/share/man/man8/bthidcontrol.8.gz
OLD_FILES+=usr/share/man/man8/bthidd.8.gz
OLD_FILES+=usr/share/man/man8/btpand.8.gz
OLD_FILES+=usr/share/man/man8/hccontrol.8.gz
OLD_FILES+=usr/share/man/man8/hcsecd.8.gz
OLD_FILES+=usr/share/man/man8/hcseriald.8.gz
OLD_FILES+=usr/share/man/man8/l2control.8.gz
OLD_FILES+=usr/share/man/man8/l2ping.8.gz
OLD_FILES+=usr/share/man/man8/rfcomm_pppd.8.gz
OLD_FILES+=usr/share/man/man8/sdpcontrol.8.gz
OLD_FILES+=usr/share/man/man8/sdpd.8.gz
.endif
#.if ${MK_BOOT} == no
# to be filled in
#.endif
.if ${MK_CALENDAR} == no
OLD_FILES+=etc/periodic/daily/300.calendar
OLD_FILES+=usr/bin/calendar
OLD_FILES+=usr/share/calendar/calendar.all
OLD_FILES+=usr/share/calendar/calendar.australia
OLD_FILES+=usr/share/calendar/calendar.birthday
OLD_FILES+=usr/share/calendar/calendar.brazilian
OLD_FILES+=usr/share/calendar/calendar.christian
OLD_FILES+=usr/share/calendar/calendar.computer
OLD_FILES+=usr/share/calendar/calendar.croatian
OLD_FILES+=usr/share/calendar/calendar.dutch
OLD_FILES+=usr/share/calendar/calendar.freebsd
OLD_FILES+=usr/share/calendar/calendar.french
OLD_FILES+=usr/share/calendar/calendar.german
OLD_FILES+=usr/share/calendar/calendar.history
OLD_FILES+=usr/share/calendar/calendar.holiday
OLD_FILES+=usr/share/calendar/calendar.hungarian
OLD_FILES+=usr/share/calendar/calendar.judaic
OLD_FILES+=usr/share/calendar/calendar.lotr
OLD_FILES+=usr/share/calendar/calendar.music
OLD_FILES+=usr/share/calendar/calendar.newzealand
OLD_FILES+=usr/share/calendar/calendar.russian
OLD_FILES+=usr/share/calendar/calendar.southafrica
OLD_FILES+=usr/share/calendar/calendar.ukrainian
OLD_FILES+=usr/share/calendar/calendar.usholiday
OLD_FILES+=usr/share/calendar/calendar.world
OLD_FILES+=usr/share/calendar/de_AT.ISO_8859-15/calendar.feiertag
OLD_DIRS+=usr/share/calendar/de_AT.ISO_8859-15
OLD_FILES+=usr/share/calendar/de_DE.ISO8859-1/calendar.all
OLD_FILES+=usr/share/calendar/de_DE.ISO8859-1/calendar.feiertag
OLD_FILES+=usr/share/calendar/de_DE.ISO8859-1/calendar.geschichte
OLD_FILES+=usr/share/calendar/de_DE.ISO8859-1/calendar.kirche
OLD_FILES+=usr/share/calendar/de_DE.ISO8859-1/calendar.literatur
OLD_FILES+=usr/share/calendar/de_DE.ISO8859-1/calendar.musik
OLD_FILES+=usr/share/calendar/de_DE.ISO8859-1/calendar.wissenschaft
OLD_DIRS+=usr/share/calendar/de_DE.ISO8859-1
OLD_FILES+=usr/share/calendar/de_DE.ISO8859-15
OLD_FILES+=usr/share/calendar/fr_FR.ISO8859-1/calendar.all
OLD_FILES+=usr/share/calendar/fr_FR.ISO8859-1/calendar.fetes
OLD_FILES+=usr/share/calendar/fr_FR.ISO8859-1/calendar.french
OLD_FILES+=usr/share/calendar/fr_FR.ISO8859-1/calendar.jferies
OLD_FILES+=usr/share/calendar/fr_FR.ISO8859-1/calendar.proverbes
OLD_DIRS+=usr/share/calendar/fr_FR.ISO8859-1
OLD_FILES+=usr/share/calendar/fr_FR.ISO8859-15
OLD_FILES+=usr/share/calendar/hr_HR.ISO8859-2/calendar.all
OLD_FILES+=usr/share/calendar/hr_HR.ISO8859-2/calendar.praznici
OLD_DIRS+=usr/share/calendar/hr_HR.ISO8859-2
OLD_FILES+=usr/share/calendar/hu_HU.ISO8859-2/calendar.all
OLD_FILES+=usr/share/calendar/hu_HU.ISO8859-2/calendar.nevnapok
OLD_FILES+=usr/share/calendar/hu_HU.ISO8859-2/calendar.unnepek
OLD_DIRS+=usr/share/calendar/hu_HU.ISO8859-2
OLD_FILES+=usr/share/calendar/pt_BR.ISO8859-1/calendar.all
OLD_FILES+=usr/share/calendar/pt_BR.ISO8859-1/calendar.commemorative
OLD_FILES+=usr/share/calendar/pt_BR.ISO8859-1/calendar.holidays
OLD_FILES+=usr/share/calendar/pt_BR.ISO8859-1/calendar.mcommemorative
OLD_DIRS+=usr/share/calendar/pt_BR.ISO8859-1
OLD_FILES+=usr/share/calendar/pt_BR.UTF-8/calendar.all
OLD_FILES+=usr/share/calendar/pt_BR.UTF-8/calendar.commemorative
OLD_FILES+=usr/share/calendar/pt_BR.UTF-8/calendar.holidays
OLD_FILES+=usr/share/calendar/pt_BR.UTF-8/calendar.mcommemorative
OLD_DIRS+=usr/share/calendar/pt_BR.UTF-8
OLD_FILES+=usr/share/calendar/ru_RU.KOI8-R/calendar.all
OLD_FILES+=usr/share/calendar/ru_RU.KOI8-R/calendar.common
OLD_FILES+=usr/share/calendar/ru_RU.KOI8-R/calendar.holiday
OLD_FILES+=usr/share/calendar/ru_RU.KOI8-R/calendar.military
OLD_FILES+=usr/share/calendar/ru_RU.KOI8-R/calendar.orthodox
OLD_FILES+=usr/share/calendar/ru_RU.KOI8-R/calendar.pagan
OLD_DIRS+=usr/share/calendar/ru_RU.KOI8-R
OLD_FILES+=usr/share/calendar/ru_RU.UTF-8/calendar.all
OLD_FILES+=usr/share/calendar/ru_RU.UTF-8/calendar.common
OLD_FILES+=usr/share/calendar/ru_RU.UTF-8/calendar.holiday
OLD_FILES+=usr/share/calendar/ru_RU.UTF-8/calendar.military
OLD_FILES+=usr/share/calendar/ru_RU.UTF-8/calendar.orthodox
OLD_FILES+=usr/share/calendar/ru_RU.UTF-8/calendar.pagan
OLD_DIRS+=usr/share/calendar/ru_RU.UTF-8
OLD_FILES+=usr/share/calendar/uk_UA.KOI8-U/calendar.all
OLD_FILES+=usr/share/calendar/uk_UA.KOI8-U/calendar.holiday
OLD_FILES+=usr/share/calendar/uk_UA.KOI8-U/calendar.misc
OLD_FILES+=usr/share/calendar/uk_UA.KOI8-U/calendar.orthodox
OLD_DIRS+=usr/share/calendar/uk_UA.KOI8-U
OLD_DIRS+=usr/share/calendar
OLD_FILES+=usr/share/man/man1/calendar.1.gz
.endif
.if ${MK_CASPER} == no
OLD_FILES+=etc/rc.d/casperd
OLD_LIBS+=lib/libcasper.so.0
OLD_FILES+=sbin/casper
OLD_FILES+=usr/lib/libcasper.a
.endif
.if ${MK_CDDL} == no
OLD_LIBS+=lib/libavl.so.2
OLD_LIBS+=lib/libctf.so.2
OLD_LIBS+=lib/libdtrace.so.2
OLD_LIBS+=lib/libnvpair.so.2
OLD_LIBS+=lib/libumem.so.2
OLD_LIBS+=lib/libuutil.so.2
OLD_FILES+=usr/bin/ctfconvert
OLD_FILES+=usr/bin/ctfdump
OLD_FILES+=usr/bin/ctfmerge
OLD_FILES+=usr/bin/sgsmsg
OLD_FILES+=usr/lib/dtrace/drti.o
OLD_FILES+=usr/lib/dtrace/errno.d
OLD_FILES+=usr/lib/dtrace/io.d
OLD_FILES+=usr/lib/dtrace/ip.d
OLD_FILES+=usr/lib/dtrace/psinfo.d
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/lib/dtrace/regs_x86.d
.endif
OLD_FILES+=usr/lib/dtrace/signal.d
OLD_FILES+=usr/lib/dtrace/tcp.d
OLD_FILES+=usr/lib/dtrace/udp.d
OLD_FILES+=usr/lib/dtrace/unistd.d
OLD_DIRS+=usr/lib/dtrace
OLD_FILES+=usr/lib/libavl.a
OLD_FILES+=usr/lib/libavl.so
OLD_FILES+=usr/lib/libavl_p.a
OLD_FILES+=usr/lib/libctf.a
OLD_FILES+=usr/lib/libctf.so
OLD_FILES+=usr/lib/libctf_p.a
OLD_FILES+=usr/lib/libdtrace.a
OLD_FILES+=usr/lib/libdtrace.so
OLD_FILES+=usr/lib/libdtrace_p.a
OLD_FILES+=usr/lib/libnvpair.a
OLD_FILES+=usr/lib/libnvpair.so
OLD_FILES+=usr/lib/libnvpair_p.a
OLD_FILES+=usr/lib/libumem.a
OLD_FILES+=usr/lib/libumem.so
OLD_FILES+=usr/lib/libumem_p.a
OLD_FILES+=usr/lib/libuutil.a
OLD_FILES+=usr/lib/libuutil.so
OLD_FILES+=usr/lib/libuutil_p.a
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/dtrace/drti.o
OLD_DIRS+=usr/lib32/dtrace
OLD_FILES+=usr/lib32/libavl.a
OLD_FILES+=usr/lib32/libavl.so
OLD_LIBS+=usr/lib32/libavl.so.2
OLD_FILES+=usr/lib32/libavl_p.a
OLD_FILES+=usr/lib32/libctf.a
OLD_FILES+=usr/lib32/libctf.so
OLD_LIBS+=usr/lib32/libctf.so.2
OLD_FILES+=usr/lib32/libctf_p.a
OLD_FILES+=usr/lib32/libdtrace.a
OLD_FILES+=usr/lib32/libdtrace.so
OLD_LIBS+=usr/lib32/libdtrace.so.2
OLD_FILES+=usr/lib32/libdtrace_p.a
OLD_FILES+=usr/lib32/libnvpair.a
OLD_FILES+=usr/lib32/libnvpair.so
OLD_LIBS+=usr/lib32/libnvpair.so.2
OLD_FILES+=usr/lib32/libnvpair_p.a
OLD_FILES+=usr/lib32/libumem.a
OLD_FILES+=usr/lib32/libumem.so
OLD_LIBS+=usr/lib32/libumem.so.2
OLD_FILES+=usr/lib32/libumem_p.a
OLD_FILES+=usr/lib32/libuutil.a
OLD_FILES+=usr/lib32/libuutil.so
OLD_LIBS+=usr/lib32/libuutil.so.2
OLD_FILES+=usr/lib32/libuutil_p.a
.endif
OLD_FILES+=usr/sbin/dtrace
OLD_FILES+=usr/sbin/lockstat
OLD_FILES+=usr/share/man/man1/dtrace.1.gz
.endif
.if ${MK_ZFS} == no
OLD_FILES+=boot/gptzfsboot
OLD_FILES+=boot/zfsboot
OLD_FILES+=boot/zfsloader
OLD_FILES+=etc/periodic/daily/404.status-zfs
OLD_FILES+=etc/periodic/daily/800.scrub-zfs
OLD_LIBS+=lib/libzfs.so.2
OLD_LIBS+=lib/libzfs_core.so.2
OLD_LIBS+=lib/libzpool.so.2
OLD_FILES+=rescue/zfs
OLD_FILES+=rescue/zpool
OLD_FILES+=sbin/zfs
OLD_FILES+=sbin/zpool
OLD_FILES+=usr/bin/zinject
OLD_FILES+=usr/bin/ztest
OLD_FILES+=usr/lib/libzfs.a
OLD_FILES+=usr/lib/libzfs.so
OLD_FILES+=usr/lib/libzfs_core.a
OLD_FILES+=usr/lib/libzfs_core.so
OLD_FILES+=usr/lib/libzfs_core_p.a
OLD_FILES+=usr/lib/libzfs_p.a
OLD_FILES+=usr/lib/libzpool.a
OLD_FILES+=usr/lib/libzpool.so
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/libzfs.a
OLD_FILES+=usr/lib32/libzfs.so
OLD_LIBS+=usr/lib32/libzfs.so.2
OLD_FILES+=usr/lib32/libzfs_core.a
OLD_FILES+=usr/lib32/libzfs_core.so
OLD_LIBS+=usr/lib32/libzfs_core.so.2
OLD_FILES+=usr/lib32/libzfs_core_p.a
OLD_FILES+=usr/lib32/libzfs_p.a
OLD_FILES+=usr/lib32/libzpool.a
OLD_FILES+=usr/lib32/libzpool.so
OLD_LIBS+=usr/lib32/libzpool.so.2
.endif
OLD_FILES+=usr/sbin/zdb
OLD_FILES+=usr/share/man/man8/zdb.8.gz
OLD_FILES+=usr/share/man/man8/zfs.8.gz
OLD_FILES+=usr/share/man/man8/zpool.8.gz
.endif
.if ${MK_CLANG} == no
OLD_FILES+=usr/bin/clang
OLD_FILES+=usr/bin/clang++
OLD_FILES+=usr/bin/clang-cpp
OLD_FILES+=usr/bin/clang-tblgen
OLD_FILES+=usr/bin/tblgen
-OLD_FILES+=usr/include/clang/3.4/__wmmintrin_aes.h
-OLD_FILES+=usr/include/clang/3.4/__wmmintrin_pclmul.h
-OLD_FILES+=usr/include/clang/3.4/altivec.h
-OLD_FILES+=usr/include/clang/3.4/ammintrin.h
-OLD_FILES+=usr/include/clang/3.4/arm_neon.h
-OLD_FILES+=usr/include/clang/3.4/avx2intrin.h
-OLD_FILES+=usr/include/clang/3.4/avxintrin.h
-OLD_FILES+=usr/include/clang/3.4/bmi2intrin.h
-OLD_FILES+=usr/include/clang/3.4/bmiintrin.h
-OLD_FILES+=usr/include/clang/3.4/cpuid.h
-OLD_FILES+=usr/include/clang/3.4/emmintrin.h
-OLD_FILES+=usr/include/clang/3.4/f16cintrin.h
-OLD_FILES+=usr/include/clang/3.4/fma4intrin.h
-OLD_FILES+=usr/include/clang/3.4/fmaintrin.h
-OLD_FILES+=usr/include/clang/3.4/immintrin.h
-OLD_FILES+=usr/include/clang/3.4/lzcntintrin.h
-OLD_FILES+=usr/include/clang/3.4/mm3dnow.h
-OLD_FILES+=usr/include/clang/3.4/mm_malloc.h
-OLD_FILES+=usr/include/clang/3.4/mmintrin.h
-OLD_FILES+=usr/include/clang/3.4/module.map
-OLD_FILES+=usr/include/clang/3.4/nmmintrin.h
-OLD_FILES+=usr/include/clang/3.4/pmmintrin.h
-OLD_FILES+=usr/include/clang/3.4/popcntintrin.h
-OLD_FILES+=usr/include/clang/3.4/prfchwintrin.h
-OLD_FILES+=usr/include/clang/3.4/rdseedintrin.h
-OLD_FILES+=usr/include/clang/3.4/rtmintrin.h
-OLD_FILES+=usr/include/clang/3.4/shaintrin.h
-OLD_FILES+=usr/include/clang/3.4/smmintrin.h
-OLD_FILES+=usr/include/clang/3.4/tbmintrin.h
-OLD_FILES+=usr/include/clang/3.4/tmmintrin.h
-OLD_FILES+=usr/include/clang/3.4/wmmintrin.h
-OLD_FILES+=usr/include/clang/3.4/x86intrin.h
-OLD_FILES+=usr/include/clang/3.4/xmmintrin.h
-OLD_FILES+=usr/include/clang/3.4/xopintrin.h
-OLD_DIRS+=usr/include/clang/3.4
+OLD_FILES+=usr/include/clang/3.4.1/__wmmintrin_aes.h
+OLD_FILES+=usr/include/clang/3.4.1/__wmmintrin_pclmul.h
+OLD_FILES+=usr/include/clang/3.4.1/altivec.h
+OLD_FILES+=usr/include/clang/3.4.1/ammintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/arm_neon.h
+OLD_FILES+=usr/include/clang/3.4.1/avx2intrin.h
+OLD_FILES+=usr/include/clang/3.4.1/avxintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/bmi2intrin.h
+OLD_FILES+=usr/include/clang/3.4.1/bmiintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/cpuid.h
+OLD_FILES+=usr/include/clang/3.4.1/emmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/f16cintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/fma4intrin.h
+OLD_FILES+=usr/include/clang/3.4.1/fmaintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/immintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/lzcntintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/mm3dnow.h
+OLD_FILES+=usr/include/clang/3.4.1/mm_malloc.h
+OLD_FILES+=usr/include/clang/3.4.1/mmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/module.map
+OLD_FILES+=usr/include/clang/3.4.1/nmmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/pmmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/popcntintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/prfchwintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/rdseedintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/rtmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/shaintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/smmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/tbmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/tmmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/wmmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/x86intrin.h
+OLD_FILES+=usr/include/clang/3.4.1/xmmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/xopintrin.h
+OLD_DIRS+=usr/include/clang/3.4.1
OLD_DIRS+=usr/include/clang
OLD_FILES+=usr/share/doc/llvm/clang/LICENSE.TXT
OLD_DIRS+=usr/share/doc/llvm/clang
OLD_FILES+=usr/share/doc/llvm/COPYRIGHT.regex
OLD_FILES+=usr/share/doc/llvm/LICENSE.TXT
OLD_DIRS+=usr/share/doc/llvm
OLD_FILES+=usr/share/man/man1/clang.1.gz
OLD_FILES+=usr/share/man/man1/clang++.1.gz
OLD_FILES+=usr/share/man/man1/clang-cpp.1.gz
OLD_FILES+=usr/share/man/man1/tblgen.1.gz
.endif
.if ${MK_CLANG_EXTRAS} == no
OLD_FILES+=usr/bin/bugpoint
OLD_FILES+=usr/bin/llc
OLD_FILES+=usr/bin/lli
OLD_FILES+=usr/bin/llvm-ar
OLD_FILES+=usr/bin/llvm-as
OLD_FILES+=usr/bin/llvm-bcanalyzer
OLD_FILES+=usr/bin/llvm-diff
OLD_FILES+=usr/bin/llvm-dis
OLD_FILES+=usr/bin/llvm-extract
OLD_FILES+=usr/bin/llvm-ld
OLD_FILES+=usr/bin/llvm-link
OLD_FILES+=usr/bin/llvm-mc
OLD_FILES+=usr/bin/llvm-nm
OLD_FILES+=usr/bin/llvm-objdump
OLD_FILES+=usr/bin/llvm-rtdyld
OLD_FILES+=usr/bin/llvm-stub
OLD_FILES+=usr/bin/macho-dump
OLD_FILES+=usr/bin/opt
OLD_FILES+=usr/share/man/man1/bugpoint.1.gz
OLD_FILES+=usr/share/man/man1/llc.1.gz
OLD_FILES+=usr/share/man/man1/lli.1.gz
OLD_FILES+=usr/share/man/man1/llvm-ar.1.gz
OLD_FILES+=usr/share/man/man1/llvm-as.1.gz
OLD_FILES+=usr/share/man/man1/llvm-bcanalyzer.1.gz
OLD_FILES+=usr/share/man/man1/llvm-diff.1.gz
OLD_FILES+=usr/share/man/man1/llvm-dis.1.gz
OLD_FILES+=usr/share/man/man1/llvm-extract.1.gz
OLD_FILES+=usr/share/man/man1/llvm-ld.1.gz
OLD_FILES+=usr/share/man/man1/llvm-link.1.gz
OLD_FILES+=usr/share/man/man1/llvm-nm.1.gz
OLD_FILES+=usr/share/man/man1/opt.1.gz
.endif
.if ${MK_CPP} == no
OLD_FILES+=usr/bin/cpp
OLD_FILES+=usr/share/man/man1/cpp.1.gz
.endif
#.if ${MK_CRYPT} == no
# to be filled in
#.endif
.if ${MK_CTM} == no
OLD_FILES+=usr/sbin/ctm
OLD_FILES+=usr/sbin/ctm_dequeue
OLD_FILES+=usr/sbin/ctm_rmail
OLD_FILES+=usr/sbin/ctm_smail
OLD_FILES+=usr/share/man/man1/ctm.1.gz
OLD_FILES+=usr/share/man/man1/ctm_dequeue.1.gz
OLD_FILES+=usr/share/man/man1/ctm_rmail.1.gz
OLD_FILES+=usr/share/man/man1/ctm_smail.1.gz
OLD_FILES+=usr/share/man/man5/ctm.5.gz
.endif
# devd(8) and gperf(1) not listed here on purpose
.if ${MK_CXX} == no
OLD_FILES+=usr/bin/CC
OLD_FILES+=usr/bin/c++
OLD_FILES+=usr/bin/c++filt
OLD_FILES+=usr/bin/g++
OLD_FILES+=usr/libexec/cc1plus
.endif
.if ${MK_FMTREE} == no
OLD_FILES+=usr/sbin/fmtree
OLD_FILES+=usr/share/man/man8/fmtree.8.gz
.endif
.if ${MK_GNUCXX} == no
OLD_FILES+=usr/bin/g++
OLD_FILES+=usr/include/c++/4.2/algorithm
OLD_FILES+=usr/include/c++/4.2/backward/algo.h
OLD_FILES+=usr/include/c++/4.2/backward/algobase.h
OLD_FILES+=usr/include/c++/4.2/backward/alloc.h
OLD_FILES+=usr/include/c++/4.2/backward/backward_warning.h
OLD_FILES+=usr/include/c++/4.2/backward/bvector.h
OLD_FILES+=usr/include/c++/4.2/backward/complex.h
OLD_FILES+=usr/include/c++/4.2/backward/defalloc.h
OLD_FILES+=usr/include/c++/4.2/backward/deque.h
OLD_FILES+=usr/include/c++/4.2/backward/fstream.h
OLD_FILES+=usr/include/c++/4.2/backward/function.h
OLD_FILES+=usr/include/c++/4.2/backward/hash_map.h
OLD_FILES+=usr/include/c++/4.2/backward/hash_set.h
OLD_FILES+=usr/include/c++/4.2/backward/hashtable.h
OLD_FILES+=usr/include/c++/4.2/backward/heap.h
OLD_FILES+=usr/include/c++/4.2/backward/iomanip.h
OLD_FILES+=usr/include/c++/4.2/backward/iostream.h
OLD_FILES+=usr/include/c++/4.2/backward/istream.h
OLD_FILES+=usr/include/c++/4.2/backward/iterator.h
OLD_FILES+=usr/include/c++/4.2/backward/list.h
OLD_FILES+=usr/include/c++/4.2/backward/map.h
OLD_FILES+=usr/include/c++/4.2/backward/multimap.h
OLD_FILES+=usr/include/c++/4.2/backward/multiset.h
OLD_FILES+=usr/include/c++/4.2/backward/new.h
OLD_FILES+=usr/include/c++/4.2/backward/ostream.h
OLD_FILES+=usr/include/c++/4.2/backward/pair.h
OLD_FILES+=usr/include/c++/4.2/backward/queue.h
OLD_FILES+=usr/include/c++/4.2/backward/rope.h
OLD_FILES+=usr/include/c++/4.2/backward/set.h
OLD_FILES+=usr/include/c++/4.2/backward/slist.h
OLD_FILES+=usr/include/c++/4.2/backward/stack.h
OLD_FILES+=usr/include/c++/4.2/backward/stream.h
OLD_FILES+=usr/include/c++/4.2/backward/streambuf.h
OLD_FILES+=usr/include/c++/4.2/backward/strstream
OLD_FILES+=usr/include/c++/4.2/backward/tempbuf.h
OLD_FILES+=usr/include/c++/4.2/backward/tree.h
OLD_FILES+=usr/include/c++/4.2/backward/vector.h
OLD_FILES+=usr/include/c++/4.2/bits/allocator.h
OLD_FILES+=usr/include/c++/4.2/bits/atomic_word.h
OLD_FILES+=usr/include/c++/4.2/bits/basic_file.h
OLD_FILES+=usr/include/c++/4.2/bits/basic_ios.h
OLD_FILES+=usr/include/c++/4.2/bits/basic_ios.tcc
OLD_FILES+=usr/include/c++/4.2/bits/basic_string.h
OLD_FILES+=usr/include/c++/4.2/bits/basic_string.tcc
OLD_FILES+=usr/include/c++/4.2/bits/boost_concept_check.h
OLD_FILES+=usr/include/c++/4.2/bits/c++allocator.h
OLD_FILES+=usr/include/c++/4.2/bits/c++config.h
OLD_FILES+=usr/include/c++/4.2/bits/c++io.h
OLD_FILES+=usr/include/c++/4.2/bits/c++locale.h
OLD_FILES+=usr/include/c++/4.2/bits/c++locale_internal.h
OLD_FILES+=usr/include/c++/4.2/bits/char_traits.h
OLD_FILES+=usr/include/c++/4.2/bits/cmath.tcc
OLD_FILES+=usr/include/c++/4.2/bits/codecvt.h
OLD_FILES+=usr/include/c++/4.2/bits/compatibility.h
OLD_FILES+=usr/include/c++/4.2/bits/concept_check.h
OLD_FILES+=usr/include/c++/4.2/bits/cpp_type_traits.h
OLD_FILES+=usr/include/c++/4.2/bits/cpu_defines.h
OLD_FILES+=usr/include/c++/4.2/bits/ctype_base.h
OLD_FILES+=usr/include/c++/4.2/bits/ctype_inline.h
OLD_FILES+=usr/include/c++/4.2/bits/ctype_noninline.h
OLD_FILES+=usr/include/c++/4.2/bits/cxxabi_tweaks.h
OLD_FILES+=usr/include/c++/4.2/bits/deque.tcc
OLD_FILES+=usr/include/c++/4.2/bits/fstream.tcc
OLD_FILES+=usr/include/c++/4.2/bits/functexcept.h
OLD_FILES+=usr/include/c++/4.2/bits/gslice.h
OLD_FILES+=usr/include/c++/4.2/bits/gslice_array.h
OLD_FILES+=usr/include/c++/4.2/bits/gthr-default.h
OLD_FILES+=usr/include/c++/4.2/bits/gthr-posix.h
OLD_FILES+=usr/include/c++/4.2/bits/gthr-single.h
OLD_FILES+=usr/include/c++/4.2/bits/gthr-tpf.h
OLD_FILES+=usr/include/c++/4.2/bits/gthr.h
OLD_FILES+=usr/include/c++/4.2/bits/indirect_array.h
OLD_FILES+=usr/include/c++/4.2/bits/ios_base.h
OLD_FILES+=usr/include/c++/4.2/bits/istream.tcc
OLD_FILES+=usr/include/c++/4.2/bits/list.tcc
OLD_FILES+=usr/include/c++/4.2/bits/locale_classes.h
OLD_FILES+=usr/include/c++/4.2/bits/locale_facets.h
OLD_FILES+=usr/include/c++/4.2/bits/locale_facets.tcc
OLD_FILES+=usr/include/c++/4.2/bits/localefwd.h
OLD_FILES+=usr/include/c++/4.2/bits/mask_array.h
OLD_FILES+=usr/include/c++/4.2/bits/messages_members.h
OLD_FILES+=usr/include/c++/4.2/bits/os_defines.h
OLD_FILES+=usr/include/c++/4.2/bits/ostream.tcc
OLD_FILES+=usr/include/c++/4.2/bits/ostream_insert.h
OLD_FILES+=usr/include/c++/4.2/bits/postypes.h
OLD_FILES+=usr/include/c++/4.2/bits/slice_array.h
OLD_FILES+=usr/include/c++/4.2/bits/sstream.tcc
OLD_FILES+=usr/include/c++/4.2/bits/stl_algo.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_algobase.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_bvector.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_construct.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_deque.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_function.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_heap.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_iterator.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_iterator_base_funcs.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_iterator_base_types.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_list.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_map.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_multimap.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_multiset.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_numeric.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_pair.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_queue.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_raw_storage_iter.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_relops.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_set.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_stack.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_tempbuf.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_tree.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_uninitialized.h
OLD_FILES+=usr/include/c++/4.2/bits/stl_vector.h
OLD_FILES+=usr/include/c++/4.2/bits/stream_iterator.h
OLD_FILES+=usr/include/c++/4.2/bits/streambuf.tcc
OLD_FILES+=usr/include/c++/4.2/bits/streambuf_iterator.h
OLD_FILES+=usr/include/c++/4.2/bits/stringfwd.h
OLD_FILES+=usr/include/c++/4.2/bits/time_members.h
OLD_FILES+=usr/include/c++/4.2/bits/valarray_after.h
OLD_FILES+=usr/include/c++/4.2/bits/valarray_array.h
OLD_FILES+=usr/include/c++/4.2/bits/valarray_array.tcc
OLD_FILES+=usr/include/c++/4.2/bits/valarray_before.h
OLD_FILES+=usr/include/c++/4.2/bits/vector.tcc
OLD_FILES+=usr/include/c++/4.2/bitset
OLD_FILES+=usr/include/c++/4.2/cassert
OLD_FILES+=usr/include/c++/4.2/cctype
OLD_FILES+=usr/include/c++/4.2/cerrno
OLD_FILES+=usr/include/c++/4.2/cfloat
OLD_FILES+=usr/include/c++/4.2/ciso646
OLD_FILES+=usr/include/c++/4.2/climits
OLD_FILES+=usr/include/c++/4.2/clocale
OLD_FILES+=usr/include/c++/4.2/cmath
OLD_FILES+=usr/include/c++/4.2/complex
OLD_FILES+=usr/include/c++/4.2/csetjmp
OLD_FILES+=usr/include/c++/4.2/csignal
OLD_FILES+=usr/include/c++/4.2/cstdarg
OLD_FILES+=usr/include/c++/4.2/cstddef
OLD_FILES+=usr/include/c++/4.2/cstdio
OLD_FILES+=usr/include/c++/4.2/cstdlib
OLD_FILES+=usr/include/c++/4.2/cstring
OLD_FILES+=usr/include/c++/4.2/ctime
OLD_FILES+=usr/include/c++/4.2/cwchar
OLD_FILES+=usr/include/c++/4.2/cwctype
OLD_FILES+=usr/include/c++/4.2/cxxabi.h
OLD_FILES+=usr/include/c++/4.2/debug/bitset
OLD_FILES+=usr/include/c++/4.2/debug/debug.h
OLD_FILES+=usr/include/c++/4.2/debug/deque
OLD_FILES+=usr/include/c++/4.2/debug/formatter.h
OLD_FILES+=usr/include/c++/4.2/debug/functions.h
OLD_FILES+=usr/include/c++/4.2/debug/hash_map
OLD_FILES+=usr/include/c++/4.2/debug/hash_map.h
OLD_FILES+=usr/include/c++/4.2/debug/hash_multimap.h
OLD_FILES+=usr/include/c++/4.2/debug/hash_multiset.h
OLD_FILES+=usr/include/c++/4.2/debug/hash_set
OLD_FILES+=usr/include/c++/4.2/debug/hash_set.h
OLD_FILES+=usr/include/c++/4.2/debug/list
OLD_FILES+=usr/include/c++/4.2/debug/macros.h
OLD_FILES+=usr/include/c++/4.2/debug/map
OLD_FILES+=usr/include/c++/4.2/debug/map.h
OLD_FILES+=usr/include/c++/4.2/debug/multimap.h
OLD_FILES+=usr/include/c++/4.2/debug/multiset.h
OLD_FILES+=usr/include/c++/4.2/debug/safe_base.h
OLD_FILES+=usr/include/c++/4.2/debug/safe_iterator.h
OLD_FILES+=usr/include/c++/4.2/debug/safe_iterator.tcc
OLD_FILES+=usr/include/c++/4.2/debug/safe_sequence.h
OLD_FILES+=usr/include/c++/4.2/debug/set
OLD_FILES+=usr/include/c++/4.2/debug/set.h
OLD_FILES+=usr/include/c++/4.2/debug/string
OLD_FILES+=usr/include/c++/4.2/debug/vector
OLD_FILES+=usr/include/c++/4.2/deque
OLD_FILES+=usr/include/c++/4.2/exception
OLD_FILES+=usr/include/c++/4.2/exception_defines.h
OLD_FILES+=usr/include/c++/4.2/ext/algorithm
OLD_FILES+=usr/include/c++/4.2/ext/array_allocator.h
OLD_FILES+=usr/include/c++/4.2/ext/atomicity.h
OLD_FILES+=usr/include/c++/4.2/ext/bitmap_allocator.h
OLD_FILES+=usr/include/c++/4.2/ext/codecvt_specializations.h
OLD_FILES+=usr/include/c++/4.2/ext/concurrence.h
OLD_FILES+=usr/include/c++/4.2/ext/debug_allocator.h
OLD_FILES+=usr/include/c++/4.2/ext/functional
OLD_FILES+=usr/include/c++/4.2/ext/hash_fun.h
OLD_FILES+=usr/include/c++/4.2/ext/hash_map
OLD_FILES+=usr/include/c++/4.2/ext/hash_set
OLD_FILES+=usr/include/c++/4.2/ext/hashtable.h
OLD_FILES+=usr/include/c++/4.2/ext/iterator
OLD_FILES+=usr/include/c++/4.2/ext/malloc_allocator.h
OLD_FILES+=usr/include/c++/4.2/ext/memory
OLD_FILES+=usr/include/c++/4.2/ext/mt_allocator.h
OLD_FILES+=usr/include/c++/4.2/ext/new_allocator.h
OLD_FILES+=usr/include/c++/4.2/ext/numeric
OLD_FILES+=usr/include/c++/4.2/ext/numeric_traits.h
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/assoc_container.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/basic_tree_policy/basic_tree_policy_base.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/basic_tree_policy/null_node_metadata.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/basic_tree_policy/traits.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/basic_types.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/cond_dtor_entry_dealtor.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/cond_key_dtor_entry_dealtor.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/info_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/iterators_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/node_iterators.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/point_iterators.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/policy_access_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/r_erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/bin_search_tree_/traits.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/binary_heap_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/const_iterator.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/const_point_iterator.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/entry_cmp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/entry_pred.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/info_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/iterators_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/policy_access_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/resize_policy.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/split_join_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binary_heap_/trace_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binomial_heap_/binomial_heap_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binomial_heap_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binomial_heap_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binomial_heap_base_/binomial_heap_base_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binomial_heap_base_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binomial_heap_base_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binomial_heap_base_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binomial_heap_base_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binomial_heap_base_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/binomial_heap_base_/split_join_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/cc_ht_map_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/cmp_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/cond_key_dtor_entry_dealtor.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/debug_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/debug_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/entry_list_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/erase_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/erase_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/find_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/info_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/insert_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/insert_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/iterators_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/policy_access_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/resize_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/resize_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/resize_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/size_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/standard_policies.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cc_hash_table_map_/trace_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/cond_dealtor.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/container_base_dispatch.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/eq_fn/eq_by_less.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/eq_fn/hash_eq_fn.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/debug_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/debug_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/erase_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/erase_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/find_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/find_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/gp_ht_map_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/info_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/insert_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/insert_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/iterator_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/policy_access_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/resize_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/resize_no_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/resize_store_hash_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/standard_policies.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/gp_hash_table_map_/trace_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/direct_mask_range_hashing_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/direct_mod_range_hashing_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/linear_probe_fn_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/mask_based_range_hashing.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/mod_based_range_hashing.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/probe_fn_base.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/quadratic_probe_fn_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/ranged_hash_fn.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/ranged_probe_fn.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/sample_probe_fn.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/sample_range_hashing.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/sample_ranged_hash_fn.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/hash_fn/sample_ranged_probe_fn.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/const_iterator.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/const_point_iterator.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/info_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/iterators_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/left_child_next_sibling_heap_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/node.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/null_metadata.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/policy_access_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/left_child_next_sibling_heap_/trace_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_map_/constructor_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_map_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_map_/entry_metadata_base.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_map_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_map_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_map_/info_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_map_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_map_/iterators_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_map_/lu_map_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_map_/trace_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_policy/counter_lu_metadata.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_policy/counter_lu_policy_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_policy/mtf_lu_policy_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/list_update_policy/sample_update_policy.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/map_debug_base.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/cond_dtor.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/info_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/iterators_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/node_iterators.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/ov_tree_map_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/policy_access_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/split_join_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/ov_tree_map_/traits.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pairing_heap_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pairing_heap_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pairing_heap_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pairing_heap_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pairing_heap_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pairing_heap_/pairing_heap_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pairing_heap_/split_join_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/child_iterator.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/cond_dtor_entry_dealtor.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/const_child_iterator.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/head.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/info_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/insert_join_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/internal_node.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/iterators_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/leaf.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/node_base.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/node_iterators.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/node_metadata_base.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/pat_trie_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/point_iterators.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/policy_access_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/r_erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/rotate_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/split_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/split_join_branch_bag.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/synth_e_access_traits.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/trace_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/traits.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/pat_trie_/update_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/priority_queue_base_dispatch.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rb_tree_map_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rb_tree_map_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rb_tree_map_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rb_tree_map_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rb_tree_map_/info_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rb_tree_map_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rb_tree_map_/node.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rb_tree_map_/rb_tree_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rb_tree_map_/split_join_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rb_tree_map_/traits.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rc_binomial_heap_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rc_binomial_heap_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rc_binomial_heap_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rc_binomial_heap_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rc_binomial_heap_/rc.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rc_binomial_heap_/rc_binomial_heap_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rc_binomial_heap_/split_join_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/rc_binomial_heap_/trace_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/resize_policy/cc_hash_max_collision_check_resize_trigger_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/resize_policy/hash_exponential_size_policy_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/resize_policy/hash_load_check_resize_trigger_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/resize_policy/hash_load_check_resize_trigger_size_base.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/resize_policy/hash_prime_size_policy_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/resize_policy/hash_standard_resize_policy_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/resize_policy/sample_resize_policy.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/resize_policy/sample_resize_trigger.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/resize_policy/sample_size_policy.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/info_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/node.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/splay_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/splay_tree_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/split_join_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/splay_tree_/traits.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/standard_policies.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/thin_heap_/constructors_destructor_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/thin_heap_/debug_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/thin_heap_/erase_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/thin_heap_/find_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/thin_heap_/insert_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/thin_heap_/split_join_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/thin_heap_/thin_heap_.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/thin_heap_/trace_fn_imps.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/tree_policy/node_metadata_selector.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/tree_policy/null_node_update_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/tree_policy/order_statistics_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/tree_policy/sample_tree_node_update.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/tree_trace_base.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/trie_policy/node_metadata_selector.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/trie_policy/null_node_update_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/trie_policy/order_statistics_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/trie_policy/prefix_search_node_update_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/trie_policy/sample_trie_e_access_traits.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/trie_policy/sample_trie_node_update.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/trie_policy/string_trie_e_access_traits_imp.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/trie_policy/trie_policy_base.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/type_utils.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/types_traits.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/unordered_iterator/const_iterator.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/unordered_iterator/const_point_iterator.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/unordered_iterator/iterator.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/detail/unordered_iterator/point_iterator.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/exception.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/hash_policy.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/list_update_policy.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/priority_queue.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/tag_and_trait.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/tree_policy.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pb_ds/trie_policy.hpp
OLD_FILES+=usr/include/c++/4.2/ext/pod_char_traits.h
OLD_FILES+=usr/include/c++/4.2/ext/pool_allocator.h
OLD_FILES+=usr/include/c++/4.2/ext/rb_tree
OLD_FILES+=usr/include/c++/4.2/ext/rc_string_base.h
OLD_FILES+=usr/include/c++/4.2/ext/rope
OLD_FILES+=usr/include/c++/4.2/ext/ropeimpl.h
OLD_FILES+=usr/include/c++/4.2/ext/slist
OLD_FILES+=usr/include/c++/4.2/ext/sso_string_base.h
OLD_FILES+=usr/include/c++/4.2/ext/stdio_filebuf.h
OLD_FILES+=usr/include/c++/4.2/ext/stdio_sync_filebuf.h
OLD_FILES+=usr/include/c++/4.2/ext/throw_allocator.h
OLD_FILES+=usr/include/c++/4.2/ext/type_traits.h
OLD_FILES+=usr/include/c++/4.2/ext/typelist.h
OLD_FILES+=usr/include/c++/4.2/ext/vstring.h
OLD_FILES+=usr/include/c++/4.2/ext/vstring.tcc
OLD_FILES+=usr/include/c++/4.2/ext/vstring_fwd.h
OLD_FILES+=usr/include/c++/4.2/ext/vstring_util.h
OLD_FILES+=usr/include/c++/4.2/fstream
OLD_FILES+=usr/include/c++/4.2/functional
OLD_FILES+=usr/include/c++/4.2/iomanip
OLD_FILES+=usr/include/c++/4.2/ios
OLD_FILES+=usr/include/c++/4.2/iosfwd
OLD_FILES+=usr/include/c++/4.2/iostream
OLD_FILES+=usr/include/c++/4.2/istream
OLD_FILES+=usr/include/c++/4.2/iterator
OLD_FILES+=usr/include/c++/4.2/limits
OLD_FILES+=usr/include/c++/4.2/list
OLD_FILES+=usr/include/c++/4.2/locale
OLD_FILES+=usr/include/c++/4.2/map
OLD_FILES+=usr/include/c++/4.2/memory
OLD_FILES+=usr/include/c++/4.2/new
OLD_FILES+=usr/include/c++/4.2/numeric
OLD_FILES+=usr/include/c++/4.2/ostream
OLD_FILES+=usr/include/c++/4.2/queue
OLD_FILES+=usr/include/c++/4.2/set
OLD_FILES+=usr/include/c++/4.2/sstream
OLD_FILES+=usr/include/c++/4.2/stack
OLD_FILES+=usr/include/c++/4.2/stdexcept
OLD_FILES+=usr/include/c++/4.2/streambuf
OLD_FILES+=usr/include/c++/4.2/string
OLD_FILES+=usr/include/c++/4.2/tr1/array
OLD_FILES+=usr/include/c++/4.2/tr1/bind_iterate.h
OLD_FILES+=usr/include/c++/4.2/tr1/bind_repeat.h
OLD_FILES+=usr/include/c++/4.2/tr1/boost_shared_ptr.h
OLD_FILES+=usr/include/c++/4.2/tr1/cctype
OLD_FILES+=usr/include/c++/4.2/tr1/cfenv
OLD_FILES+=usr/include/c++/4.2/tr1/cfloat
OLD_FILES+=usr/include/c++/4.2/tr1/cinttypes
OLD_FILES+=usr/include/c++/4.2/tr1/climits
OLD_FILES+=usr/include/c++/4.2/tr1/cmath
OLD_FILES+=usr/include/c++/4.2/tr1/common.h
OLD_FILES+=usr/include/c++/4.2/tr1/complex
OLD_FILES+=usr/include/c++/4.2/tr1/cstdarg
OLD_FILES+=usr/include/c++/4.2/tr1/cstdbool
OLD_FILES+=usr/include/c++/4.2/tr1/cstdint
OLD_FILES+=usr/include/c++/4.2/tr1/cstdio
OLD_FILES+=usr/include/c++/4.2/tr1/cstdlib
OLD_FILES+=usr/include/c++/4.2/tr1/ctgmath
OLD_FILES+=usr/include/c++/4.2/tr1/ctime
OLD_FILES+=usr/include/c++/4.2/tr1/ctype.h
OLD_FILES+=usr/include/c++/4.2/tr1/cwchar
OLD_FILES+=usr/include/c++/4.2/tr1/cwctype
OLD_FILES+=usr/include/c++/4.2/tr1/fenv.h
OLD_FILES+=usr/include/c++/4.2/tr1/float.h
OLD_FILES+=usr/include/c++/4.2/tr1/functional
OLD_FILES+=usr/include/c++/4.2/tr1/functional_hash.h
OLD_FILES+=usr/include/c++/4.2/tr1/functional_iterate.h
OLD_FILES+=usr/include/c++/4.2/tr1/hashtable
OLD_FILES+=usr/include/c++/4.2/tr1/hashtable_policy.h
OLD_FILES+=usr/include/c++/4.2/tr1/inttypes.h
OLD_FILES+=usr/include/c++/4.2/tr1/limits.h
OLD_FILES+=usr/include/c++/4.2/tr1/math.h
OLD_FILES+=usr/include/c++/4.2/tr1/memory
OLD_FILES+=usr/include/c++/4.2/tr1/mu_iterate.h
OLD_FILES+=usr/include/c++/4.2/tr1/random
OLD_FILES+=usr/include/c++/4.2/tr1/random.tcc
OLD_FILES+=usr/include/c++/4.2/tr1/ref_fwd.h
OLD_FILES+=usr/include/c++/4.2/tr1/ref_wrap_iterate.h
OLD_FILES+=usr/include/c++/4.2/tr1/repeat.h
OLD_FILES+=usr/include/c++/4.2/tr1/stdarg.h
OLD_FILES+=usr/include/c++/4.2/tr1/stdbool.h
OLD_FILES+=usr/include/c++/4.2/tr1/stdint.h
OLD_FILES+=usr/include/c++/4.2/tr1/stdio.h
OLD_FILES+=usr/include/c++/4.2/tr1/stdlib.h
OLD_FILES+=usr/include/c++/4.2/tr1/tgmath.h
OLD_FILES+=usr/include/c++/4.2/tr1/tuple
OLD_FILES+=usr/include/c++/4.2/tr1/tuple_defs.h
OLD_FILES+=usr/include/c++/4.2/tr1/tuple_iterate.h
OLD_FILES+=usr/include/c++/4.2/tr1/type_traits
OLD_FILES+=usr/include/c++/4.2/tr1/type_traits_fwd.h
OLD_FILES+=usr/include/c++/4.2/tr1/unordered_map
OLD_FILES+=usr/include/c++/4.2/tr1/unordered_set
OLD_FILES+=usr/include/c++/4.2/tr1/utility
OLD_FILES+=usr/include/c++/4.2/tr1/wchar.h
OLD_FILES+=usr/include/c++/4.2/tr1/wctype.h
OLD_FILES+=usr/include/c++/4.2/typeinfo
OLD_FILES+=usr/include/c++/4.2/utility
OLD_FILES+=usr/include/c++/4.2/valarray
OLD_FILES+=usr/include/c++/4.2/vector
OLD_FILES+=usr/lib/libstdc++.a
OLD_FILES+=usr/lib/libstdc++.so
OLD_LIBS+=usr/lib/libstdc++.so.6
OLD_FILES+=usr/lib/libstdc++_p.a
OLD_FILES+=usr/lib/libsupc++.a
OLD_FILES+=usr/lib/libsupc++.so
OLD_LIBS+=usr/lib/libsupc++.so.1
OLD_FILES+=usr/lib/libsupc++_p.a
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/libstdc++.a
OLD_FILES+=usr/lib32/libstdc++.so
OLD_LIBS+=usr/lib32/libstdc++.so.6
OLD_FILES+=usr/lib32/libstdc++_p.a
OLD_FILES+=usr/lib32/libsupc++.a
OLD_FILES+=usr/lib32/libsupc++.so
OLD_LIBS+=usr/lib32/libsupc++.so.1
OLD_FILES+=usr/lib32/libsupc++_p.a
.endif
OLD_FILES+=usr/libexec/cc1plus
.endif
.if ${MK_DICT} == no
OLD_FILES+=usr/share/dict/README
OLD_FILES+=usr/share/dict/eign
OLD_FILES+=usr/share/dict/freebsd
OLD_FILES+=usr/share/dict/propernames
OLD_FILES+=usr/share/dict/web2
OLD_FILES+=usr/share/dict/web2a
OLD_FILES+=usr/share/dict/words
OLD_DIRS+=usr/share/dict
.endif
#.if ${MK_EXAMPLES} == no
# to be filled in
#.endif
.if ${MK_FLOPPY} == no
OLD_FILES+=usr/sbin/fdcontrol
OLD_FILES+=usr/sbin/fdformat
OLD_FILES+=usr/sbin/fdread
OLD_FILES+=usr/sbin/fdwrite
OLD_FILES+=usr/share/man/man1/fdformat.1.gz
OLD_FILES+=usr/share/man/man1/fdread.1.gz
OLD_FILES+=usr/share/man/man1/fdwrite.1.gz
OLD_FILES+=usr/share/man/man8/fdcontrol.8.gz
.endif
.if ${MK_FREEBSD_UPDATE} == no
OLD_FILES+=etc/freebsd-update.conf
OLD_FILES+=usr/sbin/freebsd-update
OLD_FILES+=usr/share/examples/etc/freebsd-update.conf
OLD_FILES+=usr/share/man/man5/freebsd-update.conf.5.gz
OLD_FILES+=usr/share/man/man8/freebsd-update.8.gz
.endif
.if ${MK_GAMES} == no
OLD_FILES+=usr/games/bcd
OLD_FILES+=usr/games/caesar
OLD_FILES+=usr/games/factor
OLD_FILES+=usr/games/fortune
OLD_FILES+=usr/games/grdc
OLD_FILES+=usr/games/morse
OLD_FILES+=usr/games/number
OLD_FILES+=usr/games/pom
OLD_FILES+=usr/games/ppt
OLD_FILES+=usr/games/primes
OLD_FILES+=usr/games/random
OLD_FILES+=usr/games/rot13
OLD_FILES+=usr/games/strfile
OLD_FILES+=usr/games/unstr
OLD_DIRS+=usr/games
OLD_FILES+=usr/share/games/fortune/fortunes
OLD_FILES+=usr/share/games/fortune/fortunes.dat
OLD_FILES+=usr/share/games/fortune/freebsd-tips
OLD_FILES+=usr/share/games/fortune/freebsd-tips.dat
OLD_FILES+=usr/share/games/fortune/gerrold.limerick
OLD_FILES+=usr/share/games/fortune/gerrold.limerick.dat
OLD_FILES+=usr/share/games/fortune/limerick
OLD_FILES+=usr/share/games/fortune/limerick.dat
OLD_FILES+=usr/share/games/fortune/murphy
OLD_FILES+=usr/share/games/fortune/murphy-o
OLD_FILES+=usr/share/games/fortune/murphy-o.dat
OLD_FILES+=usr/share/games/fortune/murphy.dat
OLD_FILES+=usr/share/games/fortune/startrek
OLD_FILES+=usr/share/games/fortune/startrek.dat
OLD_FILES+=usr/share/games/fortune/zippy
OLD_FILES+=usr/share/games/fortune/zippy.dat
OLD_DIRS+=usr/share/games/fortune
OLD_DIRS+=usr/share/games
OLD_FILES+=usr/share/man/man6/bcd.6.gz
OLD_FILES+=usr/share/man/man6/caesar.6.gz
OLD_FILES+=usr/share/man/man6/factor.6.gz
OLD_FILES+=usr/share/man/man6/fortune.6.gz
OLD_FILES+=usr/share/man/man6/grdc.6.gz
OLD_FILES+=usr/share/man/man6/morse.6.gz
OLD_FILES+=usr/share/man/man6/number.6.gz
OLD_FILES+=usr/share/man/man6/pom.6.gz
OLD_FILES+=usr/share/man/man6/ppt.6.gz
OLD_FILES+=usr/share/man/man6/primes.6.gz
OLD_FILES+=usr/share/man/man6/random.6.gz
OLD_FILES+=usr/share/man/man6/rot13.6.gz
OLD_FILES+=usr/share/man/man8/strfile.8.gz
OLD_FILES+=usr/share/man/man8/unstr.8.gz
.endif
.if ${MK_GCC} == no
OLD_FILES+=usr/bin/c++filt
OLD_FILES+=usr/bin/g++
OLD_FILES+=usr/bin/gcc
OLD_FILES+=usr/bin/gcov
OLD_FILES+=usr/bin/gcpp
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/include/gcc/4.2/__wmmintrin_aes.h
OLD_FILES+=usr/include/gcc/4.2/__wmmintrin_pclmul.h
OLD_FILES+=usr/include/gcc/4.2/ammintrin.h
OLD_FILES+=usr/include/gcc/4.2/emmintrin.h
OLD_FILES+=usr/include/gcc/4.2/mm3dnow.h
OLD_FILES+=usr/include/gcc/4.2/mm_malloc.h
OLD_FILES+=usr/include/gcc/4.2/mmintrin.h
OLD_FILES+=usr/include/gcc/4.2/pmmintrin.h
OLD_FILES+=usr/include/gcc/4.2/tmmintrin.h
OLD_FILES+=usr/include/gcc/4.2/wmmintrin.h
OLD_FILES+=usr/include/gcc/4.2/xmmintrin.h
.elif ${TARGET_ARCH} == "ia64"
OLD_FILES+=usr/include/gcc/4.2/ia64intrin.h
.elif ${TARGET_ARCH} == "arm"
OLD_FILES+=usr/include/gcc/4.2/mmintrin.h
.elif ${TARGET_ARCH} == "powerpc" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/include/gcc/4.2/altivec.h
OLD_FILES+=usr/include/gcc/4.2/ppc-asm.h
OLD_FILES+=usr/include/gcc/4.2/spe.h
.endif
OLD_FILES+=usr/libexec/cc1
OLD_FILES+=usr/libexec/cc1plus
OLD_FILES+=usr/share/info/cpp.info.gz
OLD_FILES+=usr/share/info/cppinternals.info.gz
OLD_FILES+=usr/share/info/gcc.info.gz
OLD_FILES+=usr/share/info/gccint.info.gz
OLD_FILES+=usr/share/man/man1/g++.1.gz
OLD_FILES+=usr/share/man/man1/gcc.1.gz
OLD_FILES+=usr/share/man/man1/gcov.1.gz
OLD_FILES+=usr/share/man/man1/gcpp.1.gz
.endif
.if ${MK_GCOV} == no
OLD_FILES+=usr/bin/gcov
OLD_FILES+=usr/share/man/man1/gcov.1.gz
.endif
.if ${MK_GDB} == no
OLD_FILES+=usr/bin/gdb
OLD_FILES+=usr/bin/gdbserver
OLD_FILES+=usr/bin/gdbtui
OLD_FILES+=usr/bin/kgdb
OLD_FILES+=usr/share/info/gdb.info.gz
OLD_FILES+=usr/share/info/gdbint.info.gz
OLD_FILES+=usr/share/info/stabs.info.gz
OLD_FILES+=usr/share/man/man1/gdb.1.gz
OLD_FILES+=usr/share/man/man1/gdbserver.1.gz
OLD_FILES+=usr/share/man/man1/kgdb.1.gz
.endif
.if ${MK_GPIB} == no
OLD_FILES+=usr/include/dev/ieee488/ibfoo_int.h
OLD_FILES+=usr/include/dev/ieee488/tnt4882.h
OLD_FILES+=usr/include/dev/ieee488/ugpib.h
OLD_FILES+=usr/include/dev/ieee488/upd7210.h
OLD_DIRS+=usr/include/dev/ieee488
OLD_FILES+=usr/include/gpib/gpib.h
OLD_DIRS+=usr/include/gpib
OLD_FILES+=usr/lib/libgpib.a
OLD_FILES+=usr/lib/libgpib.so
OLD_LIBS+=usr/lib/libgpib.so.3
OLD_FILES+=usr/lib/libgpib_p.a
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/libgpib.a
OLD_FILES+=usr/lib32/libgpib.so
OLD_LIBS+=usr/lib32/libgpib.so.3
OLD_FILES+=usr/lib32/libgpib_p.a
.endif
.endif
.if ${MK_GPIO} == no
OLD_FILES+=usr/sbin/gpioctl
OLD_FILES+=usr/share/man/man8/gpioctl.8.gz
.endif
# Also includes vgrind(1)
.if ${MK_GROFF} == no
OLD_FILES+=usr/bin/addftinfo
OLD_FILES+=usr/bin/afmtodit
OLD_FILES+=usr/bin/eqn
OLD_FILES+=usr/bin/grn
OLD_FILES+=usr/bin/grodvi
OLD_FILES+=usr/bin/groff
OLD_FILES+=usr/bin/grog
OLD_FILES+=usr/bin/grolbp
OLD_FILES+=usr/bin/grolj4
OLD_FILES+=usr/bin/grops
OLD_FILES+=usr/bin/grotty
OLD_FILES+=usr/bin/hpftodit
OLD_FILES+=usr/bin/indxbib
OLD_FILES+=usr/bin/lkbib
OLD_FILES+=usr/bin/lookbib
OLD_FILES+=usr/bin/mmroff
OLD_FILES+=usr/bin/neqn
OLD_FILES+=usr/bin/nroff
OLD_FILES+=usr/bin/pfbtops
OLD_FILES+=usr/bin/pic
OLD_FILES+=usr/bin/post-grohtml
OLD_FILES+=usr/bin/pre-grohtml
OLD_FILES+=usr/bin/psroff
OLD_FILES+=usr/bin/refer
OLD_FILES+=usr/bin/soelim
OLD_FILES+=usr/bin/tbl
OLD_FILES+=usr/bin/tfmtodit
OLD_FILES+=usr/bin/troff
OLD_FILES+=usr/bin/vgrind
OLD_FILES+=usr/libexec/vfontedpr
OLD_FILES+=usr/share/groff_font/devX100-12/CB
OLD_FILES+=usr/share/groff_font/devX100-12/CBI
OLD_FILES+=usr/share/groff_font/devX100-12/CI
OLD_FILES+=usr/share/groff_font/devX100-12/CR
OLD_FILES+=usr/share/groff_font/devX100-12/DESC
OLD_FILES+=usr/share/groff_font/devX100-12/HB
OLD_FILES+=usr/share/groff_font/devX100-12/HBI
OLD_FILES+=usr/share/groff_font/devX100-12/HI
OLD_FILES+=usr/share/groff_font/devX100-12/HR
OLD_FILES+=usr/share/groff_font/devX100-12/NB
OLD_FILES+=usr/share/groff_font/devX100-12/NBI
OLD_FILES+=usr/share/groff_font/devX100-12/NI
OLD_FILES+=usr/share/groff_font/devX100-12/NR
OLD_FILES+=usr/share/groff_font/devX100-12/S
OLD_FILES+=usr/share/groff_font/devX100-12/TB
OLD_FILES+=usr/share/groff_font/devX100-12/TBI
OLD_FILES+=usr/share/groff_font/devX100-12/TI
OLD_FILES+=usr/share/groff_font/devX100-12/TR
OLD_DIRS+=usr/share/groff_font/devX100-12
OLD_FILES+=usr/share/groff_font/devX100/CB
OLD_FILES+=usr/share/groff_font/devX100/CBI
OLD_FILES+=usr/share/groff_font/devX100/CI
OLD_FILES+=usr/share/groff_font/devX100/CR
OLD_FILES+=usr/share/groff_font/devX100/DESC
OLD_FILES+=usr/share/groff_font/devX100/HB
OLD_FILES+=usr/share/groff_font/devX100/HBI
OLD_FILES+=usr/share/groff_font/devX100/HI
OLD_FILES+=usr/share/groff_font/devX100/HR
OLD_FILES+=usr/share/groff_font/devX100/NB
OLD_FILES+=usr/share/groff_font/devX100/NBI
OLD_FILES+=usr/share/groff_font/devX100/NI
OLD_FILES+=usr/share/groff_font/devX100/NR
OLD_FILES+=usr/share/groff_font/devX100/S
OLD_FILES+=usr/share/groff_font/devX100/TB
OLD_FILES+=usr/share/groff_font/devX100/TBI
OLD_FILES+=usr/share/groff_font/devX100/TI
OLD_FILES+=usr/share/groff_font/devX100/TR
OLD_DIRS+=usr/share/groff_font/devX100
OLD_FILES+=usr/share/groff_font/devX75-12/CB
OLD_FILES+=usr/share/groff_font/devX75-12/CBI
OLD_FILES+=usr/share/groff_font/devX75-12/CI
OLD_FILES+=usr/share/groff_font/devX75-12/CR
OLD_FILES+=usr/share/groff_font/devX75-12/DESC
OLD_FILES+=usr/share/groff_font/devX75-12/HB
OLD_FILES+=usr/share/groff_font/devX75-12/HBI
OLD_FILES+=usr/share/groff_font/devX75-12/HI
OLD_FILES+=usr/share/groff_font/devX75-12/HR
OLD_FILES+=usr/share/groff_font/devX75-12/NB
OLD_FILES+=usr/share/groff_font/devX75-12/NBI
OLD_FILES+=usr/share/groff_font/devX75-12/NI
OLD_FILES+=usr/share/groff_font/devX75-12/NR
OLD_FILES+=usr/share/groff_font/devX75-12/S
OLD_FILES+=usr/share/groff_font/devX75-12/TB
OLD_FILES+=usr/share/groff_font/devX75-12/TBI
OLD_FILES+=usr/share/groff_font/devX75-12/TI
OLD_FILES+=usr/share/groff_font/devX75-12/TR
OLD_DIRS+=usr/share/groff_font/devX75-12
OLD_FILES+=usr/share/groff_font/devX75/CB
OLD_FILES+=usr/share/groff_font/devX75/CBI
OLD_FILES+=usr/share/groff_font/devX75/CI
OLD_FILES+=usr/share/groff_font/devX75/CR
OLD_FILES+=usr/share/groff_font/devX75/DESC
OLD_FILES+=usr/share/groff_font/devX75/HB
OLD_FILES+=usr/share/groff_font/devX75/HBI
OLD_FILES+=usr/share/groff_font/devX75/HI
OLD_FILES+=usr/share/groff_font/devX75/HR
OLD_FILES+=usr/share/groff_font/devX75/NB
OLD_FILES+=usr/share/groff_font/devX75/NBI
OLD_FILES+=usr/share/groff_font/devX75/NI
OLD_FILES+=usr/share/groff_font/devX75/NR
OLD_FILES+=usr/share/groff_font/devX75/S
OLD_FILES+=usr/share/groff_font/devX75/TB
OLD_FILES+=usr/share/groff_font/devX75/TBI
OLD_FILES+=usr/share/groff_font/devX75/TI
OLD_FILES+=usr/share/groff_font/devX75/TR
OLD_DIRS+=usr/share/groff_font/devX75
OLD_FILES+=usr/share/groff_font/devascii/B
OLD_FILES+=usr/share/groff_font/devascii/BI
OLD_FILES+=usr/share/groff_font/devascii/CW
OLD_FILES+=usr/share/groff_font/devascii/DESC
OLD_FILES+=usr/share/groff_font/devascii/I
OLD_FILES+=usr/share/groff_font/devascii/L
OLD_FILES+=usr/share/groff_font/devascii/R
OLD_FILES+=usr/share/groff_font/devascii/S
OLD_DIRS+=usr/share/groff_font/devascii
OLD_FILES+=usr/share/groff_font/devcp1047/B
OLD_FILES+=usr/share/groff_font/devcp1047/BI
OLD_FILES+=usr/share/groff_font/devcp1047/CW
OLD_FILES+=usr/share/groff_font/devcp1047/DESC
OLD_FILES+=usr/share/groff_font/devcp1047/I
OLD_FILES+=usr/share/groff_font/devcp1047/L
OLD_FILES+=usr/share/groff_font/devcp1047/R
OLD_FILES+=usr/share/groff_font/devcp1047/S
OLD_DIRS+=usr/share/groff_font/devcp1047
OLD_FILES+=usr/share/groff_font/devdvi/CW
OLD_FILES+=usr/share/groff_font/devdvi/CWEC
OLD_FILES+=usr/share/groff_font/devdvi/CWI
OLD_FILES+=usr/share/groff_font/devdvi/CWIEC
OLD_FILES+=usr/share/groff_font/devdvi/CWITC
OLD_FILES+=usr/share/groff_font/devdvi/CWTC
OLD_FILES+=usr/share/groff_font/devdvi/CompileFonts
OLD_FILES+=usr/share/groff_font/devdvi/DESC
OLD_FILES+=usr/share/groff_font/devdvi/EX
OLD_FILES+=usr/share/groff_font/devdvi/HB
OLD_FILES+=usr/share/groff_font/devdvi/HBEC
OLD_FILES+=usr/share/groff_font/devdvi/HBI
OLD_FILES+=usr/share/groff_font/devdvi/HBIEC
OLD_FILES+=usr/share/groff_font/devdvi/HBITC
OLD_FILES+=usr/share/groff_font/devdvi/HBTC
OLD_FILES+=usr/share/groff_font/devdvi/HI
OLD_FILES+=usr/share/groff_font/devdvi/HIEC
OLD_FILES+=usr/share/groff_font/devdvi/HITC
OLD_FILES+=usr/share/groff_font/devdvi/HR
OLD_FILES+=usr/share/groff_font/devdvi/HREC
OLD_FILES+=usr/share/groff_font/devdvi/HRTC
OLD_FILES+=usr/share/groff_font/devdvi/MI
OLD_FILES+=usr/share/groff_font/devdvi/Makefile
OLD_FILES+=usr/share/groff_font/devdvi/S
OLD_FILES+=usr/share/groff_font/devdvi/SA
OLD_FILES+=usr/share/groff_font/devdvi/SB
OLD_FILES+=usr/share/groff_font/devdvi/SC
OLD_FILES+=usr/share/groff_font/devdvi/TB
OLD_FILES+=usr/share/groff_font/devdvi/TBEC
OLD_FILES+=usr/share/groff_font/devdvi/TBI
OLD_FILES+=usr/share/groff_font/devdvi/TBIEC
OLD_FILES+=usr/share/groff_font/devdvi/TBITC
OLD_FILES+=usr/share/groff_font/devdvi/TBTC
OLD_FILES+=usr/share/groff_font/devdvi/TI
OLD_FILES+=usr/share/groff_font/devdvi/TIEC
OLD_FILES+=usr/share/groff_font/devdvi/TITC
OLD_FILES+=usr/share/groff_font/devdvi/TR
OLD_FILES+=usr/share/groff_font/devdvi/TREC
OLD_FILES+=usr/share/groff_font/devdvi/TRTC
OLD_FILES+=usr/share/groff_font/devdvi/ec.map
OLD_FILES+=usr/share/groff_font/devdvi/msam.map
OLD_FILES+=usr/share/groff_font/devdvi/msbm.map
OLD_FILES+=usr/share/groff_font/devdvi/tc.map
OLD_FILES+=usr/share/groff_font/devdvi/texb.map
OLD_FILES+=usr/share/groff_font/devdvi/texex.map
OLD_FILES+=usr/share/groff_font/devdvi/texi.map
OLD_FILES+=usr/share/groff_font/devdvi/texmi.map
OLD_FILES+=usr/share/groff_font/devdvi/texr.map
OLD_FILES+=usr/share/groff_font/devdvi/texsy.map
OLD_FILES+=usr/share/groff_font/devdvi/textex.map
OLD_FILES+=usr/share/groff_font/devdvi/textt.map
OLD_DIRS+=usr/share/groff_font/devdvi
OLD_FILES+=usr/share/groff_font/devhtml/B
OLD_FILES+=usr/share/groff_font/devhtml/BI
OLD_FILES+=usr/share/groff_font/devhtml/CB
OLD_FILES+=usr/share/groff_font/devhtml/CBI
OLD_FILES+=usr/share/groff_font/devhtml/CI
OLD_FILES+=usr/share/groff_font/devhtml/CR
OLD_FILES+=usr/share/groff_font/devhtml/DESC
OLD_FILES+=usr/share/groff_font/devhtml/I
OLD_FILES+=usr/share/groff_font/devhtml/R
OLD_FILES+=usr/share/groff_font/devhtml/S
OLD_DIRS+=usr/share/groff_font/devhtml
OLD_FILES+=usr/share/groff_font/devkoi8-r/B
OLD_FILES+=usr/share/groff_font/devkoi8-r/BI
OLD_FILES+=usr/share/groff_font/devkoi8-r/CW
OLD_FILES+=usr/share/groff_font/devkoi8-r/DESC
OLD_FILES+=usr/share/groff_font/devkoi8-r/I
OLD_FILES+=usr/share/groff_font/devkoi8-r/L
OLD_FILES+=usr/share/groff_font/devkoi8-r/R
OLD_FILES+=usr/share/groff_font/devkoi8-r/S
OLD_DIRS+=usr/share/groff_font/devkoi8-r
OLD_FILES+=usr/share/groff_font/devlatin1/B
OLD_FILES+=usr/share/groff_font/devlatin1/BI
OLD_FILES+=usr/share/groff_font/devlatin1/CW
OLD_FILES+=usr/share/groff_font/devlatin1/DESC
OLD_FILES+=usr/share/groff_font/devlatin1/I
OLD_FILES+=usr/share/groff_font/devlatin1/L
OLD_FILES+=usr/share/groff_font/devlatin1/R
OLD_FILES+=usr/share/groff_font/devlatin1/S
OLD_DIRS+=usr/share/groff_font/devlatin1
OLD_FILES+=usr/share/groff_font/devlbp/CB
OLD_FILES+=usr/share/groff_font/devlbp/CI
OLD_FILES+=usr/share/groff_font/devlbp/CR
OLD_FILES+=usr/share/groff_font/devlbp/DESC
OLD_FILES+=usr/share/groff_font/devlbp/EB
OLD_FILES+=usr/share/groff_font/devlbp/EI
OLD_FILES+=usr/share/groff_font/devlbp/ER
OLD_FILES+=usr/share/groff_font/devlbp/HB
OLD_FILES+=usr/share/groff_font/devlbp/HBI
OLD_FILES+=usr/share/groff_font/devlbp/HI
OLD_FILES+=usr/share/groff_font/devlbp/HNB
OLD_FILES+=usr/share/groff_font/devlbp/HNBI
OLD_FILES+=usr/share/groff_font/devlbp/HNI
OLD_FILES+=usr/share/groff_font/devlbp/HNR
OLD_FILES+=usr/share/groff_font/devlbp/HR
OLD_FILES+=usr/share/groff_font/devlbp/TB
OLD_FILES+=usr/share/groff_font/devlbp/TBI
OLD_FILES+=usr/share/groff_font/devlbp/TI
OLD_FILES+=usr/share/groff_font/devlbp/TR
OLD_DIRS+=usr/share/groff_font/devlbp
OLD_FILES+=usr/share/groff_font/devlj4/AB
OLD_FILES+=usr/share/groff_font/devlj4/ABI
OLD_FILES+=usr/share/groff_font/devlj4/AI
OLD_FILES+=usr/share/groff_font/devlj4/ALBB
OLD_FILES+=usr/share/groff_font/devlj4/ALBR
OLD_FILES+=usr/share/groff_font/devlj4/AOB
OLD_FILES+=usr/share/groff_font/devlj4/AOI
OLD_FILES+=usr/share/groff_font/devlj4/AOR
OLD_FILES+=usr/share/groff_font/devlj4/AR
OLD_FILES+=usr/share/groff_font/devlj4/CB
OLD_FILES+=usr/share/groff_font/devlj4/CBI
OLD_FILES+=usr/share/groff_font/devlj4/CI
OLD_FILES+=usr/share/groff_font/devlj4/CLARENDON
OLD_FILES+=usr/share/groff_font/devlj4/CORONET
OLD_FILES+=usr/share/groff_font/devlj4/CR
OLD_FILES+=usr/share/groff_font/devlj4/DESC
OLD_FILES+=usr/share/groff_font/devlj4/GB
OLD_FILES+=usr/share/groff_font/devlj4/GBI
OLD_FILES+=usr/share/groff_font/devlj4/GI
OLD_FILES+=usr/share/groff_font/devlj4/GR
OLD_FILES+=usr/share/groff_font/devlj4/LGB
OLD_FILES+=usr/share/groff_font/devlj4/LGI
OLD_FILES+=usr/share/groff_font/devlj4/LGR
OLD_FILES+=usr/share/groff_font/devlj4/MARIGOLD
OLD_FILES+=usr/share/groff_font/devlj4/OB
OLD_FILES+=usr/share/groff_font/devlj4/OBI
OLD_FILES+=usr/share/groff_font/devlj4/OI
OLD_FILES+=usr/share/groff_font/devlj4/OR
OLD_FILES+=usr/share/groff_font/devlj4/S
OLD_FILES+=usr/share/groff_font/devlj4/SYMBOL
OLD_FILES+=usr/share/groff_font/devlj4/TB
OLD_FILES+=usr/share/groff_font/devlj4/TBI
OLD_FILES+=usr/share/groff_font/devlj4/TI
OLD_FILES+=usr/share/groff_font/devlj4/TNRB
OLD_FILES+=usr/share/groff_font/devlj4/TNRBI
OLD_FILES+=usr/share/groff_font/devlj4/TNRI
OLD_FILES+=usr/share/groff_font/devlj4/TNRR
OLD_FILES+=usr/share/groff_font/devlj4/TR
OLD_FILES+=usr/share/groff_font/devlj4/UB
OLD_FILES+=usr/share/groff_font/devlj4/UBI
OLD_FILES+=usr/share/groff_font/devlj4/UCB
OLD_FILES+=usr/share/groff_font/devlj4/UCBI
OLD_FILES+=usr/share/groff_font/devlj4/UCI
OLD_FILES+=usr/share/groff_font/devlj4/UCR
OLD_FILES+=usr/share/groff_font/devlj4/UI
OLD_FILES+=usr/share/groff_font/devlj4/UR
OLD_FILES+=usr/share/groff_font/devlj4/WINGDINGS
OLD_DIRS+=usr/share/groff_font/devlj4
OLD_FILES+=usr/share/groff_font/devps/AB
OLD_FILES+=usr/share/groff_font/devps/ABI
OLD_FILES+=usr/share/groff_font/devps/AI
OLD_FILES+=usr/share/groff_font/devps/AR
OLD_FILES+=usr/share/groff_font/devps/BMB
OLD_FILES+=usr/share/groff_font/devps/BMBI
OLD_FILES+=usr/share/groff_font/devps/BMI
OLD_FILES+=usr/share/groff_font/devps/BMR
OLD_FILES+=usr/share/groff_font/devps/CB
OLD_FILES+=usr/share/groff_font/devps/CBI
OLD_FILES+=usr/share/groff_font/devps/CI
OLD_FILES+=usr/share/groff_font/devps/CR
OLD_FILES+=usr/share/groff_font/devps/DESC
OLD_FILES+=usr/share/groff_font/devps/EURO
OLD_FILES+=usr/share/groff_font/devps/HB
OLD_FILES+=usr/share/groff_font/devps/HBI
OLD_FILES+=usr/share/groff_font/devps/HI
OLD_FILES+=usr/share/groff_font/devps/HNB
OLD_FILES+=usr/share/groff_font/devps/HNBI
OLD_FILES+=usr/share/groff_font/devps/HNI
OLD_FILES+=usr/share/groff_font/devps/HNR
OLD_FILES+=usr/share/groff_font/devps/HR
OLD_FILES+=usr/share/groff_font/devps/Makefile
OLD_FILES+=usr/share/groff_font/devps/NB
OLD_FILES+=usr/share/groff_font/devps/NBI
OLD_FILES+=usr/share/groff_font/devps/NI
OLD_FILES+=usr/share/groff_font/devps/NR
OLD_FILES+=usr/share/groff_font/devps/PB
OLD_FILES+=usr/share/groff_font/devps/PBI
OLD_FILES+=usr/share/groff_font/devps/PI
OLD_FILES+=usr/share/groff_font/devps/PR
OLD_FILES+=usr/share/groff_font/devps/S
OLD_FILES+=usr/share/groff_font/devps/SS
OLD_FILES+=usr/share/groff_font/devps/TB
OLD_FILES+=usr/share/groff_font/devps/TBI
OLD_FILES+=usr/share/groff_font/devps/TI
OLD_FILES+=usr/share/groff_font/devps/TR
OLD_FILES+=usr/share/groff_font/devps/ZCMI
OLD_FILES+=usr/share/groff_font/devps/ZD
OLD_FILES+=usr/share/groff_font/devps/ZDR
OLD_FILES+=usr/share/groff_font/devps/afmname
OLD_FILES+=usr/share/groff_font/devps/dingbats.map
OLD_FILES+=usr/share/groff_font/devps/dingbats.rmap
OLD_FILES+=usr/share/groff_font/devps/download
OLD_FILES+=usr/share/groff_font/devps/freeeuro.pfa
OLD_FILES+=usr/share/groff_font/devps/lgreekmap
OLD_FILES+=usr/share/groff_font/devps/prologue
OLD_FILES+=usr/share/groff_font/devps/symbol.sed
OLD_FILES+=usr/share/groff_font/devps/symbolchars
OLD_FILES+=usr/share/groff_font/devps/symbolsl.afm
OLD_FILES+=usr/share/groff_font/devps/symbolsl.pfa
OLD_FILES+=usr/share/groff_font/devps/text.enc
OLD_FILES+=usr/share/groff_font/devps/textmap
OLD_FILES+=usr/share/groff_font/devps/zapfdr.pfa
OLD_DIRS+=usr/share/groff_font/devps
OLD_FILES+=usr/share/groff_font/devutf8/B
OLD_FILES+=usr/share/groff_font/devutf8/BI
OLD_FILES+=usr/share/groff_font/devutf8/CW
OLD_FILES+=usr/share/groff_font/devutf8/DESC
OLD_FILES+=usr/share/groff_font/devutf8/I
OLD_FILES+=usr/share/groff_font/devutf8/L
OLD_FILES+=usr/share/groff_font/devutf8/R
OLD_FILES+=usr/share/groff_font/devutf8/S
OLD_DIRS+=usr/share/groff_font/devutf8
OLD_DIRS+=usr/share/groff_font
OLD_FILES+=usr/share/info/groff.info.gz
OLD_FILES+=usr/share/man/man1/addftinfo.1.gz
OLD_FILES+=usr/share/man/man1/afmtodit.1.gz
OLD_FILES+=usr/share/man/man1/eqn.1.gz
OLD_FILES+=usr/share/man/man1/grn.1.gz
OLD_FILES+=usr/share/man/man1/grodvi.1.gz
OLD_FILES+=usr/share/man/man1/groff.1.gz
OLD_FILES+=usr/share/man/man1/grog.1.gz
OLD_FILES+=usr/share/man/man1/grolbp.1.gz
OLD_FILES+=usr/share/man/man1/grolj4.1.gz
OLD_FILES+=usr/share/man/man1/grops.1.gz
OLD_FILES+=usr/share/man/man1/grotty.1.gz
OLD_FILES+=usr/share/man/man1/hpftodit.1.gz
OLD_FILES+=usr/share/man/man1/indxbib.1.gz
OLD_FILES+=usr/share/man/man1/lkbib.1.gz
OLD_FILES+=usr/share/man/man1/lookbib.1.gz
OLD_FILES+=usr/share/man/man1/mmroff.1.gz
OLD_FILES+=usr/share/man/man1/neqn.1.gz
OLD_FILES+=usr/share/man/man1/nroff.1.gz
OLD_FILES+=usr/share/man/man1/pfbtops.1.gz
OLD_FILES+=usr/share/man/man1/pic.1.gz
OLD_FILES+=usr/share/man/man1/psroff.1.gz
OLD_FILES+=usr/share/man/man1/refer.1.gz
OLD_FILES+=usr/share/man/man1/soelim.1.gz
OLD_FILES+=usr/share/man/man1/tbl.1.gz
OLD_FILES+=usr/share/man/man1/tfmtodit.1.gz
OLD_FILES+=usr/share/man/man1/troff.1.gz
OLD_FILES+=usr/share/man/man1/vgrind.1.gz
OLD_FILES+=usr/share/man/man5/groff_font.5.gz
OLD_FILES+=usr/share/man/man5/groff_out.5.gz
OLD_FILES+=usr/share/man/man5/groff_tmac.5.gz
OLD_FILES+=usr/share/man/man5/lj4_font.5.gz
OLD_FILES+=usr/share/man/man5/tmac.5.gz
OLD_FILES+=usr/share/man/man5/vgrindefs.5.gz
OLD_FILES+=usr/share/man/man7/ditroff.7.gz
OLD_FILES+=usr/share/man/man7/groff.7.gz
OLD_FILES+=usr/share/man/man7/groff_char.7.gz
OLD_FILES+=usr/share/man/man7/groff_diff.7.gz
OLD_FILES+=usr/share/man/man7/groff_man.7.gz
OLD_FILES+=usr/share/man/man7/groff_mdoc.7.gz
OLD_FILES+=usr/share/man/man7/groff_me.7.gz
OLD_FILES+=usr/share/man/man7/groff_mm.7.gz
OLD_FILES+=usr/share/man/man7/groff_mmse.7.gz
OLD_FILES+=usr/share/man/man7/groff_ms.7.gz
OLD_FILES+=usr/share/man/man7/groff_trace.7.gz
OLD_FILES+=usr/share/man/man7/groff_www.7.gz
OLD_FILES+=usr/share/man/man7/man.7.gz
OLD_FILES+=usr/share/man/man7/mdoc.7.gz
OLD_FILES+=usr/share/man/man7/mdoc.samples.7.gz
OLD_FILES+=usr/share/man/man7/me.7.gz
OLD_FILES+=usr/share/man/man7/mm.7.gz
OLD_FILES+=usr/share/man/man7/mmse.7.gz
OLD_FILES+=usr/share/man/man7/ms.7.gz
OLD_FILES+=usr/share/man/man7/orig_me.7.gz
OLD_FILES+=usr/share/man/man7/roff.7.gz
OLD_FILES+=usr/share/me/acm.me
OLD_FILES+=usr/share/me/chars.me
OLD_FILES+=usr/share/me/deltext.me
OLD_FILES+=usr/share/me/eqn.me
OLD_FILES+=usr/share/me/float.me
OLD_FILES+=usr/share/me/footnote.me
OLD_FILES+=usr/share/me/index.me
OLD_FILES+=usr/share/me/letterhead.me
OLD_FILES+=usr/share/me/local.me
OLD_FILES+=usr/share/me/null.me
OLD_FILES+=usr/share/me/refer.me
OLD_FILES+=usr/share/me/revisions
OLD_FILES+=usr/share/me/sh.me
OLD_FILES+=usr/share/me/tbl.me
OLD_FILES+=usr/share/me/thesis.me
OLD_DIRS+=usr/share/me
OLD_FILES+=usr/share/misc/vgrindefs
OLD_FILES+=usr/share/misc/vgrindefs.db
OLD_FILES+=usr/share/tmac/X.tmac
OLD_FILES+=usr/share/tmac/Xps.tmac
OLD_FILES+=usr/share/tmac/a4.tmac
OLD_FILES+=usr/share/tmac/an-old.tmac
OLD_FILES+=usr/share/tmac/an.tmac
OLD_FILES+=usr/share/tmac/andoc.tmac
OLD_FILES+=usr/share/tmac/composite.tmac
OLD_FILES+=usr/share/tmac/cp1047.tmac
OLD_FILES+=usr/share/tmac/devtag.tmac
OLD_FILES+=usr/share/tmac/doc.tmac
OLD_FILES+=usr/share/tmac/dvi.tmac
OLD_FILES+=usr/share/tmac/e.tmac
OLD_FILES+=usr/share/tmac/ec.tmac
OLD_FILES+=usr/share/tmac/eqnrc
OLD_FILES+=usr/share/tmac/europs.tmac
OLD_FILES+=usr/share/tmac/html-end.tmac
OLD_FILES+=usr/share/tmac/html.tmac
OLD_FILES+=usr/share/tmac/hyphen.ru
OLD_FILES+=usr/share/tmac/hyphen.us
OLD_FILES+=usr/share/tmac/hyphenex.us
OLD_FILES+=usr/share/tmac/koi8-r.tmac
OLD_FILES+=usr/share/tmac/latin1.tmac
OLD_FILES+=usr/share/tmac/latin2.tmac
OLD_FILES+=usr/share/tmac/latin9.tmac
OLD_FILES+=usr/share/tmac/lbp.tmac
OLD_FILES+=usr/share/tmac/lj4.tmac
OLD_FILES+=usr/share/tmac/m.tmac
OLD_FILES+=usr/share/tmac/man.local
OLD_FILES+=usr/share/tmac/man.tmac
OLD_FILES+=usr/share/tmac/mandoc.tmac
OLD_FILES+=usr/share/tmac/mdoc.local
OLD_FILES+=usr/share/tmac/mdoc.tmac
OLD_FILES+=usr/share/tmac/mdoc/doc-common
OLD_FILES+=usr/share/tmac/mdoc/doc-ditroff
OLD_FILES+=usr/share/tmac/mdoc/doc-nroff
OLD_FILES+=usr/share/tmac/mdoc/doc-syms
OLD_FILES+=usr/share/tmac/mdoc/fr.ISO8859-1
OLD_FILES+=usr/share/tmac/mdoc/ru.KOI8-R
OLD_DIRS+=usr/share/tmac/mdoc
OLD_FILES+=usr/share/tmac/me.tmac
OLD_FILES+=usr/share/tmac/mm/0.MT
OLD_FILES+=usr/share/tmac/mm/4.MT
OLD_FILES+=usr/share/tmac/mm/5.MT
OLD_FILES+=usr/share/tmac/mm/locale
OLD_FILES+=usr/share/tmac/mm/mm.tmac
OLD_FILES+=usr/share/tmac/mm/mmse.tmac
OLD_FILES+=usr/share/tmac/mm/ms.cov
OLD_FILES+=usr/share/tmac/mm/se_locale
OLD_FILES+=usr/share/tmac/mm/se_ms.cov
OLD_DIRS+=usr/share/tmac/mm
OLD_FILES+=usr/share/tmac/ms.tmac
OLD_FILES+=usr/share/tmac/mse.tmac
OLD_FILES+=usr/share/tmac/papersize.tmac
OLD_FILES+=usr/share/tmac/pic.tmac
OLD_FILES+=usr/share/tmac/ps.tmac
OLD_FILES+=usr/share/tmac/psatk.tmac
OLD_FILES+=usr/share/tmac/psold.tmac
OLD_FILES+=usr/share/tmac/pspic.tmac
OLD_FILES+=usr/share/tmac/s.tmac
OLD_FILES+=usr/share/tmac/safer.tmac
OLD_FILES+=usr/share/tmac/tmac.orig_me
OLD_FILES+=usr/share/tmac/tmac.vgrind
OLD_FILES+=usr/share/tmac/trace.tmac
OLD_FILES+=usr/share/tmac/troffrc
OLD_FILES+=usr/share/tmac/troffrc-end
OLD_FILES+=usr/share/tmac/tty-char.tmac
OLD_FILES+=usr/share/tmac/tty.tmac
OLD_FILES+=usr/share/tmac/unicode.tmac
OLD_FILES+=usr/share/tmac/www.tmac
OLD_DIRS+=usr/share/tmac
.endif
.if ${MK_GSSAPI} == no
OLD_FILES+=usr/lib/libgssapi.a
OLD_FILES+=usr/lib/libgssapi.so
OLD_LIBS+=usr/lib/libgssapi.so.10
OLD_FILES+=usr/lib/libgssapi_p.a
OLD_FILES+=usr/lib/librpcsec_gss.a
OLD_FILES+=usr/lib/librpcsec_gss.so
OLD_LIBS+=usr/lib/librpcsec_gss.so.1
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/libgssapi.a
OLD_FILES+=usr/lib32/libgssapi.so
OLD_LIBS+=usr/lib32/libgssapi.so.10
OLD_FILES+=usr/lib32/libgssapi_p.a
OLD_FILES+=usr/lib32/librpcsec_gss.a
OLD_FILES+=usr/lib32/librpcsec_gss.so
OLD_LIBS+=usr/lib32/librpcsec_gss.so.1
.endif
OLD_FILES+=usr/sbin/gssd
OLD_FILES+=usr/share/man/man3/gss_accept_sec_context.3.gz
OLD_FILES+=usr/share/man/man3/gss_acquire_cred.3.gz
OLD_FILES+=usr/share/man/man3/gss_add_cred.3.gz
OLD_FILES+=usr/share/man/man3/gss_add_oid_set_member.3.gz
OLD_FILES+=usr/share/man/man3/gss_canonicalize_name.3.gz
OLD_FILES+=usr/share/man/man3/gss_compare_name.3.gz
OLD_FILES+=usr/share/man/man3/gss_context_time.3.gz
OLD_FILES+=usr/share/man/man3/gss_create_empty_oid_set.3.gz
OLD_FILES+=usr/share/man/man3/gss_delete_sec_context.3.gz
OLD_FILES+=usr/share/man/man3/gss_display_name.3.gz
OLD_FILES+=usr/share/man/man3/gss_display_status.3.gz
OLD_FILES+=usr/share/man/man3/gss_duplicate_name.3.gz
OLD_FILES+=usr/share/man/man3/gss_export_name.3.gz
OLD_FILES+=usr/share/man/man3/gss_export_sec_context.3.gz
OLD_FILES+=usr/share/man/man3/gss_get_mic.3.gz
OLD_FILES+=usr/share/man/man3/gss_import_name.3.gz
OLD_FILES+=usr/share/man/man3/gss_import_sec_context.3.gz
OLD_FILES+=usr/share/man/man3/gss_indicate_mechs.3.gz
OLD_FILES+=usr/share/man/man3/gss_init_sec_context.3.gz
OLD_FILES+=usr/share/man/man3/gss_inquire_context.3.gz
OLD_FILES+=usr/share/man/man3/gss_inquire_cred.3.gz
OLD_FILES+=usr/share/man/man3/gss_inquire_cred_by_mech.3.gz
OLD_FILES+=usr/share/man/man3/gss_inquire_mechs_for_name.3.gz
OLD_FILES+=usr/share/man/man3/gss_inquire_names_for_mech.3.gz
OLD_FILES+=usr/share/man/man3/gss_process_context_token.3.gz
OLD_FILES+=usr/share/man/man3/gss_release_buffer.3.gz
OLD_FILES+=usr/share/man/man3/gss_release_cred.3.gz
OLD_FILES+=usr/share/man/man3/gss_release_name.3.gz
OLD_FILES+=usr/share/man/man3/gss_release_oid_set.3.gz
OLD_FILES+=usr/share/man/man3/gss_seal.3.gz
OLD_FILES+=usr/share/man/man3/gss_sign.3.gz
OLD_FILES+=usr/share/man/man3/gss_test_oid_set_member.3.gz
OLD_FILES+=usr/share/man/man3/gss_unseal.3.gz
OLD_FILES+=usr/share/man/man3/gss_unwrap.3.gz
OLD_FILES+=usr/share/man/man3/gss_verify.3.gz
OLD_FILES+=usr/share/man/man3/gss_verify_mic.3.gz
OLD_FILES+=usr/share/man/man3/gss_wrap.3.gz
OLD_FILES+=usr/share/man/man3/gss_wrap_size_limit.3.gz
OLD_FILES+=usr/share/man/man3/gssapi.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_get_error.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_get_mech_info.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_get_mechanisms.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_get_principal_name.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_get_versions.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_getcred.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_is_installed.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_max_data_length.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_mech_to_oid.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_oid_to_mech.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_qop_to_num.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_seccreate.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_set_callback.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_set_defaults.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_set_svc_name.3.gz
OLD_FILES+=usr/share/man/man3/rpc_gss_svc_max_data_length.3.gz
OLD_FILES+=usr/share/man/man3/rpcsec_gss.3.gz
OLD_FILES+=usr/share/man/man5/mech.5.gz
OLD_FILES+=usr/share/man/man5/qop.5.gz
OLD_FILES+=usr/share/man/man8/gssd.8.gz
.endif
.if ${MK_HESIOD} == no
OLD_FILES+=usr/bin/hesinfo
OLD_FILES+=usr/include/hesiod.h
OLD_FILES+=usr/share/man/man1/hesinfo.1.gz
OLD_FILES+=usr/share/man/man3/hesiod.3.gz
OLD_FILES+=usr/share/man/man5/hesiod.conf.5.gz
.endif
#.if ${MK_HTML} == no
# to be filled in
#.endif
.if ${MK_ICONV} == no
OLD_FILES+=usr/bin/iconv
OLD_FILES+=usr/bin/mkcsmapper
OLD_FILES+=usr/bin/mkesdb
OLD_FILES+=usr/include/_libiconv_compat.h
OLD_FILES+=usr/include/iconv.h
OLD_FILES+=usr/share/man/man1/iconv.1.gz
OLD_FILES+=usr/share/man/man1/mkcsmapper.1.gz
OLD_FILES+=usr/share/man/man1/mkesdb.1.gz
OLD_FILES+=usr/share/man/man3/__iconv.3.gz
OLD_FILES+=usr/share/man/man3/__iconv_free_list.3.gz
OLD_FILES+=usr/share/man/man3/__iconv_get_list.3.gz
OLD_FILES+=usr/share/man/man3/iconv.3.gz
OLD_FILES+=usr/share/man/man3/iconv_canonicalize.3.gz
OLD_FILES+=usr/share/man/man3/iconv_close.3.gz
OLD_FILES+=usr/share/man/man3/iconv_open.3.gz
OLD_FILES+=usr/share/man/man3/iconv_open_into.3.gz
OLD_FILES+=usr/share/man/man3/iconvctl.3.gz
OLD_FILES+=usr/share/man/man3/iconvlist.3.gz
.endif
.if ${MK_INET6} == no
OLD_FILES+=sbin/ping6
OLD_FILES+=sbin/rtsol
OLD_FILES+=usr/sbin/faithd
OLD_FILES+=usr/sbin/ip6addrctl
OLD_FILES+=usr/sbin/mld6query
OLD_FILES+=usr/sbin/ndp
OLD_FILES+=usr/sbin/rip6query
OLD_FILES+=usr/sbin/route6d
OLD_FILES+=usr/sbin/rrenumd
OLD_FILES+=usr/sbin/rtadvd
OLD_FILES+=usr/sbin/rtsold
OLD_FILES+=usr/sbin/traceroute6
OLD_FILES+=usr/share/man/man5/rrenumd.conf.5.gz
OLD_FILES+=usr/share/man/man5/rtadvd.conf.5.gz
OLD_FILES+=usr/share/man/man8/ip6addrctl.8.gz
OLD_FILES+=usr/share/man/man8/mld6query.8.gz
OLD_FILES+=usr/share/man/man8/ndp.8.gz
OLD_FILES+=usr/share/man/man8/ping6.8.gz
OLD_FILES+=usr/share/man/man8/rip6query.8.gz
OLD_FILES+=usr/share/man/man8/route6d.8.gz
OLD_FILES+=usr/share/man/man8/rrenumd.8.gz
OLD_FILES+=usr/share/man/man8/rtadvd.8.gz
OLD_FILES+=usr/share/man/man8/rtsol.8.gz
OLD_FILES+=usr/share/man/man8/rtsold.8.gz
OLD_FILES+=usr/share/man/man8/traceroute6.8.gz
.endif
.if ${MK_INET6_SUPPORT} == no
OLD_FILES+=rescue/ping6
.endif
#.if ${MK_INFO} == no
# to be filled in
#.endif
.if ${MK_IPFILTER} == no
OLD_FILES+=etc/periodic/security/510.ipfdenied
OLD_FILES+=etc/periodic/security/610.ipf6denied
OLD_FILES+=rescue/ipf
OLD_FILES+=sbin/ipf
OLD_FILES+=sbin/ipfs
OLD_FILES+=sbin/ipfstat
OLD_FILES+=sbin/ipftest
OLD_FILES+=sbin/ipmon
OLD_FILES+=sbin/ipnat
OLD_FILES+=sbin/ippool
OLD_FILES+=sbin/ipresend
OLD_FILES+=usr/include/netinet/ip_auth.h
OLD_FILES+=usr/include/netinet/ip_compat.h
OLD_FILES+=usr/include/netinet/ip_fil.h
OLD_FILES+=usr/include/netinet/ip_frag.h
OLD_FILES+=usr/include/netinet/ip_htable.h
OLD_FILES+=usr/include/netinet/ip_lookup.h
OLD_FILES+=usr/include/netinet/ip_nat.h
OLD_FILES+=usr/include/netinet/ip_pool.h
OLD_FILES+=usr/include/netinet/ip_proxy.h
OLD_FILES+=usr/include/netinet/ip_rules.h
OLD_FILES+=usr/include/netinet/ip_scan.h
OLD_FILES+=usr/include/netinet/ip_state.h
OLD_FILES+=usr/include/netinet/ip_sync.h
OLD_FILES+=usr/include/netinet/ipl.h
OLD_FILES+=usr/share/examples/ipfilter/README
OLD_FILES+=usr/share/examples/ipfilter/BASIC.NAT
OLD_FILES+=usr/share/examples/ipfilter/BASIC_1.FW
OLD_FILES+=usr/share/examples/ipfilter/BASIC_2.FW
OLD_FILES+=usr/share/examples/ipfilter/example.1
OLD_FILES+=usr/share/examples/ipfilter/example.2
OLD_FILES+=usr/share/examples/ipfilter/example.3
OLD_FILES+=usr/share/examples/ipfilter/example.4
OLD_FILES+=usr/share/examples/ipfilter/example.5
OLD_FILES+=usr/share/examples/ipfilter/example.6
OLD_FILES+=usr/share/examples/ipfilter/example.7
OLD_FILES+=usr/share/examples/ipfilter/example.8
OLD_FILES+=usr/share/examples/ipfilter/example.9
OLD_FILES+=usr/share/examples/ipfilter/example.10
OLD_FILES+=usr/share/examples/ipfilter/example.11
OLD_FILES+=usr/share/examples/ipfilter/example.12
OLD_FILES+=usr/share/examples/ipfilter/example.13
OLD_FILES+=usr/share/examples/ipfilter/example.sr
OLD_FILES+=usr/share/examples/ipfilter/firewall
OLD_FILES+=usr/share/examples/ipfilter/ftp-proxy
OLD_FILES+=usr/share/examples/ipfilter/ftppxy
OLD_FILES+=usr/share/examples/ipfilter/nat-setup
OLD_FILES+=usr/share/examples/ipfilter/nat.eg
OLD_FILES+=usr/share/examples/ipfilter/server
OLD_FILES+=usr/share/examples/ipfilter/tcpstate
OLD_FILES+=usr/share/examples/ipfilter/example.14
OLD_FILES+=usr/share/examples/ipfilter/firewall.1
OLD_FILES+=usr/share/examples/ipfilter/firewall.2
OLD_FILES+=usr/share/examples/ipfilter/ipf.conf.permissive
OLD_FILES+=usr/share/examples/ipfilter/ipf.conf.restrictive
OLD_FILES+=usr/share/examples/ipfilter/ipf.conf.sample
OLD_FILES+=usr/share/examples/ipfilter/ipnat.conf.sample
OLD_FILES+=usr/share/examples/ipfilter/ipf-howto.txt
OLD_FILES+=usr/share/examples/ipfilter/examples.txt
OLD_FILES+=usr/share/examples/ipfilter/rules.txt
OLD_FILES+=usr/share/examples/ipfilter/mkfilters
OLD_DIRS+=usr/share/examples/ipfilter
OLD_FILES+=usr/share/man/man1/ipftest.1.gz
OLD_FILES+=usr/share/man/man1/ipresend.1.gz
OLD_FILES+=usr/share/man/man4/ipf.4.gz
OLD_FILES+=usr/share/man/man4/ipl.4.gz
OLD_FILES+=usr/share/man/man4/ipfilter.4.gz
OLD_FILES+=usr/share/man/man4/ipnat.4.gz
OLD_FILES+=usr/share/man/man5/ipf.5.gz
OLD_FILES+=usr/share/man/man5/ipf.conf.5.gz
OLD_FILES+=usr/share/man/man5/ipf6.conf.5.gz
OLD_FILES+=usr/share/man/man5/ipnat.5.gz
OLD_FILES+=usr/share/man/man5/ipnat.conf.5.gz
OLD_FILES+=usr/share/man/man5/ippool.5.gz
OLD_FILES+=usr/share/man/man8/ipf.8.gz
OLD_FILES+=usr/share/man/man8/ipfs.8.gz
OLD_FILES+=usr/share/man/man8/ipfstat.8.gz
OLD_FILES+=usr/share/man/man8/ipmon.8.gz
OLD_FILES+=usr/share/man/man8/ipnat.8.gz
OLD_FILES+=usr/share/man/man8/ippool.8.gz
.endif
.if ${MK_IPFW} == no
OLD_FILES+=etc/periodic/security/500.ipfwdenied
OLD_FILES+=etc/periodic/security/550.ipfwlimit
OLD_FILES+=sbin/ipfw
OLD_FILES+=sbin/natd
OLD_FILES+=usr/sbin/ipfwpcap
OLD_FILES+=usr/share/man/man8/ipfw.8.gz
OLD_FILES+=usr/share/man/man8/ipfwpcap.8.gz
OLD_FILES+=usr/share/man/man8/natd.8.gz
.endif
.if ${MK_JAIL} == no
OLD_FILES+=usr/sbin/jail
OLD_FILES+=usr/sbin/jexec
OLD_FILES+=usr/sbin/jls
OLD_FILES+=usr/share/man/man8/jail.8.gz
OLD_FILES+=usr/share/man/man8/jexec.8.gz
OLD_FILES+=usr/share/man/man8/jls.8.gz
.endif
.if ${MK_KERBEROS} == no
OLD_FILES+=usr/bin/compile_et
OLD_FILES+=usr/bin/hxtool
OLD_FILES+=usr/bin/kadmin
OLD_FILES+=usr/bin/kdestroy
OLD_FILES+=usr/bin/kf
OLD_FILES+=usr/bin/kgetcred
OLD_FILES+=usr/bin/kinit
OLD_FILES+=usr/bin/klist
OLD_FILES+=usr/bin/kpasswd
OLD_FILES+=usr/bin/krb5-config
OLD_FILES+=usr/bin/ksu
OLD_FILES+=usr/bin/kswitch
OLD_FILES+=usr/bin/string2key
OLD_FILES+=usr/bin/verify_krb5_conf
OLD_FILES+=usr/include/asn1-common.h
OLD_FILES+=usr/include/asn1_err.h
OLD_FILES+=usr/include/base64.h
OLD_FILES+=usr/include/cms_asn1.h
OLD_FILES+=usr/include/crmf_asn1.h
OLD_FILES+=usr/include/der-private.h
OLD_FILES+=usr/include/der-protos.h
OLD_FILES+=usr/include/der.h
OLD_FILES+=usr/include/digest_asn1.h
OLD_FILES+=usr/include/getarg.h
OLD_FILES+=usr/include/gssapi/gssapi_krb5.h
OLD_FILES+=usr/include/hdb-protos.h
OLD_FILES+=usr/include/hdb.h
OLD_FILES+=usr/include/hdb_asn1.h
OLD_FILES+=usr/include/hdb_err.h
OLD_FILES+=usr/include/heim_asn1.h
OLD_FILES+=usr/include/heim_err.h
OLD_FILES+=usr/include/heim_threads.h
OLD_FILES+=usr/include/heimbase.h
OLD_FILES+=usr/include/heimntlm-protos.h
OLD_FILES+=usr/include/heimntlm.h
OLD_FILES+=usr/include/hex.h
OLD_FILES+=usr/include/hx509-private.h
OLD_FILES+=usr/include/hx509-protos.h
OLD_FILES+=usr/include/hx509.h
OLD_FILES+=usr/include/hx509_err.h
OLD_FILES+=usr/include/k524_err.h
OLD_FILES+=usr/include/kadm5/admin.h
OLD_FILES+=usr/include/kadm5/kadm5-private.h
OLD_FILES+=usr/include/kadm5/kadm5-protos.h
OLD_FILES+=usr/include/kadm5/kadm5-pwcheck.h
OLD_FILES+=usr/include/kadm5/kadm5_err.h
OLD_FILES+=usr/include/kadm5/private.h
OLD_DIRS+=usr/include/kadm5
OLD_FILES+=usr/include/kafs.h
OLD_FILES+=usr/include/kdc-protos.h
OLD_FILES+=usr/include/kdc.h
OLD_FILES+=usr/include/krb5-private.h
OLD_FILES+=usr/include/krb5-protos.h
OLD_FILES+=usr/include/krb5-types.h
OLD_FILES+=usr/include/krb5.h
OLD_FILES+=usr/include/krb5/ccache_plugin.h
OLD_FILES+=usr/include/krb5/locate_plugin.h
OLD_FILES+=usr/include/krb5/send_to_kdc_plugin.h
OLD_FILES+=usr/include/krb5/windc_plugin.h
OLD_DIRS+=usr/include/krb5
OLD_FILES+=usr/include/krb5_asn1.h
OLD_FILES+=usr/include/krb5_ccapi.h
OLD_FILES+=usr/include/krb5_err.h
OLD_FILES+=usr/include/kx509_asn1.h
OLD_FILES+=usr/include/ntlm_err.h
OLD_FILES+=usr/include/ocsp_asn1.h
OLD_FILES+=usr/include/parse_bytes.h
OLD_FILES+=usr/include/parse_time.h
OLD_FILES+=usr/include/parse_units.h
OLD_FILES+=usr/include/pkcs10_asn1.h
OLD_FILES+=usr/include/pkcs12_asn1.h
OLD_FILES+=usr/include/pkcs8_asn1.h
OLD_FILES+=usr/include/pkcs9_asn1.h
OLD_FILES+=usr/include/pkinit_asn1.h
OLD_FILES+=usr/include/resolve.h
OLD_FILES+=usr/include/rfc2459_asn1.h
OLD_FILES+=usr/include/roken-common.h
OLD_FILES+=usr/include/rtbl.h
OLD_FILES+=usr/include/wind.h
OLD_FILES+=usr/include/wind_err.h
OLD_FILES+=usr/include/xdbm.h
OLD_FILES+=usr/lib/libasn1.a
OLD_FILES+=usr/lib/libasn1.so
OLD_LIBS+=usr/lib/libasn1.so.11
OLD_FILES+=usr/lib/libasn1_p.a
OLD_FILES+=usr/lib/libcom_err.a
OLD_FILES+=usr/lib/libcom_err.so
OLD_LIBS+=usr/lib/libcom_err.so.5
OLD_FILES+=usr/lib/libcom_err_p.a
OLD_FILES+=usr/lib/libgssapi_krb5.a
OLD_FILES+=usr/lib/libgssapi_krb5.so
OLD_LIBS+=usr/lib/libgssapi_krb5.so.10
OLD_FILES+=usr/lib/libgssapi_krb5_p.a
OLD_FILES+=usr/lib/libgssapi_ntlm.a
OLD_FILES+=usr/lib/libgssapi_ntlm.so
OLD_LIBS+=usr/lib/libgssapi_ntlm.so.10
OLD_FILES+=usr/lib/libgssapi_ntlm_p.a
OLD_FILES+=usr/lib/libgssapi_spnego.a
OLD_FILES+=usr/lib/libgssapi_spnego.so
OLD_LIBS+=usr/lib/libgssapi_spnego.so.10
OLD_FILES+=usr/lib/libgssapi_spnego_p.a
OLD_FILES+=usr/lib/libhdb.a
OLD_FILES+=usr/lib/libhdb.so
OLD_LIBS+=usr/lib/libhdb.so.11
OLD_FILES+=usr/lib/libhdb_p.a
OLD_FILES+=usr/lib/libheimbase.a
OLD_FILES+=usr/lib/libheimbase.so
OLD_LIBS+=usr/lib/libheimbase.so.11
OLD_FILES+=usr/lib/libheimbase_p.a
OLD_FILES+=usr/lib/libheimntlm.a
OLD_FILES+=usr/lib/libheimntlm.so
OLD_LIBS+=usr/lib/libheimntlm.so.11
OLD_FILES+=usr/lib/libheimntlm_p.a
OLD_FILES+=usr/lib/libheimsqlite.a
OLD_FILES+=usr/lib/libheimsqlite.so
OLD_LIBS+=usr/lib/libheimsqlite.so.11
OLD_FILES+=usr/lib/libheimsqlite_p.a
OLD_FILES+=usr/lib/libhx509.a
OLD_FILES+=usr/lib/libhx509.so
OLD_LIBS+=usr/lib/libhx509.so.11
OLD_FILES+=usr/lib/libhx509_p.a
OLD_FILES+=usr/lib/libkadm5clnt.a
OLD_FILES+=usr/lib/libkadm5clnt.so
OLD_LIBS+=usr/lib/libkadm5clnt.so.11
OLD_FILES+=usr/lib/libkadm5clnt_p.a
OLD_FILES+=usr/lib/libkadm5srv.a
OLD_FILES+=usr/lib/libkadm5srv.so
OLD_LIBS+=usr/lib/libkadm5srv.so.11
OLD_FILES+=usr/lib/libkadm5srv_p.a
OLD_FILES+=usr/lib/libkafs5.a
OLD_FILES+=usr/lib/libkafs5.so
OLD_LIBS+=usr/lib/libkafs5.so.11
OLD_FILES+=usr/lib/libkafs5_p.a
OLD_FILES+=usr/lib/libkdc.a
OLD_FILES+=usr/lib/libkdc.so
OLD_LIBS+=usr/lib/libkdc.so.11
OLD_FILES+=usr/lib/libkdc_p.a
OLD_FILES+=usr/lib/libkrb5.a
OLD_FILES+=usr/lib/libkrb5.so
OLD_LIBS+=usr/lib/libkrb5.so.11
OLD_FILES+=usr/lib/libkrb5_p.a
OLD_FILES+=usr/lib/libroken.a
OLD_FILES+=usr/lib/libroken.so
OLD_LIBS+=usr/lib/libroken.so.11
OLD_FILES+=usr/lib/libroken_p.a
OLD_FILES+=usr/lib/libwind.a
OLD_FILES+=usr/lib/libwind.so
OLD_LIBS+=usr/lib/libwind.so.11
OLD_FILES+=usr/lib/libwind_p.a
OLD_FILES+=usr/lib/pam_krb5.so
OLD_LIBS+=usr/lib/pam_krb5.so.5
OLD_FILES+=usr/lib/pam_ksu.so
OLD_LIBS+=usr/lib/pam_ksu.so.5
OLD_FILES+=usr/lib/private/libheimipcc.a
OLD_FILES+=usr/lib/private/libheimipcc.so
OLD_LIBS+=usr/lib/private/libheimipcc.so.11
OLD_FILES+=usr/lib/private/libheimipcc_p.a
OLD_FILES+=usr/lib/private/libheimipcs.a
OLD_FILES+=usr/lib/private/libheimipcs.so
OLD_LIBS+=usr/lib/private/libheimipcs.so.11
OLD_FILES+=usr/lib/private/libheimipcs_p.a
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/libasn1.a
OLD_FILES+=usr/lib32/libasn1.so
OLD_LIBS+=usr/lib32/libasn1.so.11
OLD_FILES+=usr/lib32/libasn1_p.a
OLD_FILES+=usr/lib32/libgssapi_krb5.a
OLD_FILES+=usr/lib32/libgssapi_krb5.so
OLD_LIBS+=usr/lib32/libgssapi_krb5.so.10
OLD_FILES+=usr/lib32/libgssapi_krb5_p.a
OLD_FILES+=usr/lib32/libgssapi_ntlm.a
OLD_FILES+=usr/lib32/libgssapi_ntlm.so
OLD_LIBS+=usr/lib32/libgssapi_ntlm.so.10
OLD_FILES+=usr/lib32/libgssapi_ntlm_p.a
OLD_FILES+=usr/lib32/libgssapi_spnego.a
OLD_FILES+=usr/lib32/libgssapi_spnego.so
OLD_LIBS+=usr/lib32/libgssapi_spnego.so.10
OLD_FILES+=usr/lib32/libgssapi_spnego_p.a
OLD_FILES+=usr/lib32/libhdb.a
OLD_FILES+=usr/lib32/libhdb.so
OLD_LIBS+=usr/lib32/libhdb.so.11
OLD_FILES+=usr/lib32/libhdb_p.a
OLD_FILES+=usr/lib32/libheimbase.a
OLD_FILES+=usr/lib32/libheimbase.so
OLD_LIBS+=usr/lib32/libheimbase.so.11
OLD_FILES+=usr/lib32/libheimbase_p.a
OLD_FILES+=usr/lib32/libheimntlm.a
OLD_FILES+=usr/lib32/libheimntlm.so
OLD_LIBS+=usr/lib32/libheimntlm.so.11
OLD_FILES+=usr/lib32/libheimntlm_p.a
OLD_FILES+=usr/lib32/libheimsqlite.a
OLD_FILES+=usr/lib32/libheimsqlite.so
OLD_LIBS+=usr/lib32/libheimsqlite.so.11
OLD_FILES+=usr/lib32/libheimsqlite_p.a
OLD_FILES+=usr/lib32/libhx509.a
OLD_FILES+=usr/lib32/libhx509.so
OLD_LIBS+=usr/lib32/libhx509.so.11
OLD_FILES+=usr/lib32/libhx509_p.a
OLD_FILES+=usr/lib32/libkadm5clnt.a
OLD_FILES+=usr/lib32/libkadm5clnt.so
OLD_LIBS+=usr/lib32/libkadm5clnt.so.11
OLD_FILES+=usr/lib32/libkadm5clnt_p.a
OLD_FILES+=usr/lib32/libkadm5srv.a
OLD_FILES+=usr/lib32/libkadm5srv.so
OLD_LIBS+=usr/lib32/libkadm5srv.so.11
OLD_FILES+=usr/lib32/libkadm5srv_p.a
OLD_FILES+=usr/lib32/libkafs5.a
OLD_FILES+=usr/lib32/libkafs5.so
OLD_LIBS+=usr/lib32/libkafs5.so.11
OLD_FILES+=usr/lib32/libkafs5_p.a
OLD_FILES+=usr/lib32/libkdc.a
OLD_FILES+=usr/lib32/libkdc.so
OLD_LIBS+=usr/lib32/libkdc.so.11
OLD_FILES+=usr/lib32/libkdc_p.a
OLD_FILES+=usr/lib32/libkrb5.a
OLD_FILES+=usr/lib32/libkrb5.so
OLD_LIBS+=usr/lib32/libkrb5.so.11
OLD_FILES+=usr/lib32/libkrb5_p.a
OLD_FILES+=usr/lib32/libroken.a
OLD_FILES+=usr/lib32/libroken.so
OLD_LIBS+=usr/lib32/libroken.so.11
OLD_FILES+=usr/lib32/libroken_p.a
OLD_FILES+=usr/lib32/libwind.a
OLD_FILES+=usr/lib32/libwind.so
OLD_LIBS+=usr/lib32/libwind.so.11
OLD_FILES+=usr/lib32/libwind_p.a
OLD_FILES+=usr/lib32/pam_krb5.so
OLD_LIBS+=usr/lib32/pam_krb5.so.5
OLD_FILES+=usr/lib32/pam_ksu.so
OLD_LIBS+=usr/lib32/pam_ksu.so.5
OLD_FILES+=usr/lib32/private/libheimipcc.a
OLD_FILES+=usr/lib32/private/libheimipcc.so
OLD_LIBS+=usr/lib32/private/libheimipcc.so.11
OLD_FILES+=usr/lib32/private/libheimipcc_p.a
OLD_FILES+=usr/lib32/private/libheimipcs.a
OLD_FILES+=usr/lib32/private/libheimipcs.so
OLD_LIBS+=usr/lib32/private/libheimipcs.so.11
OLD_FILES+=usr/lib32/private/libheimipcs_p.a
.endif
OLD_FILES+=usr/libexec/digest-service
OLD_FILES+=usr/libexec/hprop
OLD_FILES+=usr/libexec/hpropd
OLD_FILES+=usr/libexec/ipropd-master
OLD_FILES+=usr/libexec/ipropd-slave
OLD_FILES+=usr/libexec/kadmind
OLD_FILES+=usr/libexec/kcm
OLD_FILES+=usr/libexec/kdc
OLD_FILES+=usr/libexec/kdigest
OLD_FILES+=usr/libexec/kfd
OLD_FILES+=usr/libexec/kimpersonate
OLD_FILES+=usr/libexec/kpasswdd
OLD_FILES+=usr/sbin/kstash
OLD_FILES+=usr/sbin/ktutil
OLD_FILES+=usr/sbin/iprop-log
OLD_FILES+=usr/share/info/heimdal.info.gz
OLD_FILES+=usr/share/man/man1/kdestroy.1.gz
OLD_FILES+=usr/share/man/man1/kf.1.gz
OLD_FILES+=usr/share/man/man1/kinit.1.gz
OLD_FILES+=usr/share/man/man1/klist.1.gz
OLD_FILES+=usr/share/man/man1/kpasswd.1.gz
OLD_FILES+=usr/share/man/man1/krb5-config.1.gz
OLD_FILES+=usr/share/man/man1/kswitch.1.gz
OLD_FILES+=usr/share/man/man3/HDB.3.gz
OLD_FILES+=usr/share/man/man3/hdb__del.3.gz
OLD_FILES+=usr/share/man/man3/hdb__get.3.gz
OLD_FILES+=usr/share/man/man3/hdb__put.3.gz
OLD_FILES+=usr/share/man/man3/hdb_auth_status.3.gz
OLD_FILES+=usr/share/man/man3/hdb_check_constrained_delegation.3.gz
OLD_FILES+=usr/share/man/man3/hdb_check_pkinit_ms_upn_match.3.gz
OLD_FILES+=usr/share/man/man3/hdb_check_s4u2self.3.gz
OLD_FILES+=usr/share/man/man3/hdb_close.3.gz
OLD_FILES+=usr/share/man/man3/hdb_destroy.3.gz
OLD_FILES+=usr/share/man/man3/hdb_entry_ex.3.gz
OLD_FILES+=usr/share/man/man3/hdb_fetch_kvno.3.gz
OLD_FILES+=usr/share/man/man3/hdb_firstkey.3.gz
OLD_FILES+=usr/share/man/man3/hdb_free.3.gz
OLD_FILES+=usr/share/man/man3/hdb_get_realms.3.gz
OLD_FILES+=usr/share/man/man3/hdb_lock.3.gz
OLD_FILES+=usr/share/man/man3/hdb_name.3.gz
OLD_FILES+=usr/share/man/man3/hdb_nextkey.3.gz
OLD_FILES+=usr/share/man/man3/hdb_open.3.gz
OLD_FILES+=usr/share/man/man3/hdb_password.3.gz
OLD_FILES+=usr/share/man/man3/hdb_remove.3.gz
OLD_FILES+=usr/share/man/man3/hdb_rename.3.gz
OLD_FILES+=usr/share/man/man3/hdb_store.3.gz
OLD_FILES+=usr/share/man/man3/hdb_unlock.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_build_ntlm1_master.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_build_ntlm2_master.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_calculate_lm2.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_calculate_ntlm1.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_calculate_ntlm2.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_decode_targetinfo.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_encode_targetinfo.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_encode_type1.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_encode_type2.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_encode_type3.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_free_buf.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_free_targetinfo.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_free_type1.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_free_type2.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_free_type3.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_keyex_unwrap.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_nt_key.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_ntlmv2_key.3.gz
OLD_FILES+=usr/share/man/man3/heim_ntlm_verify_ntlm2.3.gz
OLD_FILES+=usr/share/man/man3/hx509.3.gz
OLD_FILES+=usr/share/man/man3/hx509_bitstring_print.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_sign.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_sign_self.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_add_crl_dp_uri.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_add_eku.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_add_san_hostname.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_add_san_jid.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_add_san_ms_upn.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_add_san_otherName.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_add_san_pkinit.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_add_san_rfc822name.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_init.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_ca.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_domaincontroller.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_notAfter.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_notAfter_lifetime.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_notBefore.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_proxy.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_serialnumber.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_spki.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_subject.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_template.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_set_unique.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_subject_expand.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ca_tbs_template_units.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_binary.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_check_eku.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_cmp.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_find_subjectAltName_otherName.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_SPKI.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_SPKI_AlgorithmIdentifier.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_attribute.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_base_subject.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_friendly_name.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_issuer.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_issuer_unique_id.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_notAfter.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_notBefore.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_serialnumber.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_subject.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_get_subject_unique_id.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_init.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_init_data.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_keyusage_print.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_ref.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cert_set_friendly_name.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_add.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_append.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_end_seq.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_filter.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_find.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_info.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_init.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_iter_f.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_merge.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_next_cert.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_start_seq.3.gz
OLD_FILES+=usr/share/man/man3/hx509_certs_store.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ci_print_names.3.gz
OLD_FILES+=usr/share/man/man3/hx509_clear_error_string.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cms.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cms_create_signed_1.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cms_envelope_1.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cms_unenvelope.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cms_unwrap_ContentInfo.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cms_verify_signed.3.gz
OLD_FILES+=usr/share/man/man3/hx509_cms_wrap_ContentInfo.3.gz
OLD_FILES+=usr/share/man/man3/hx509_context_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_context_init.3.gz
OLD_FILES+=usr/share/man/man3/hx509_context_set_missing_revoke.3.gz
OLD_FILES+=usr/share/man/man3/hx509_crl_add_revoked_certs.3.gz
OLD_FILES+=usr/share/man/man3/hx509_crl_alloc.3.gz
OLD_FILES+=usr/share/man/man3/hx509_crl_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_crl_lifetime.3.gz
OLD_FILES+=usr/share/man/man3/hx509_crl_sign.3.gz
OLD_FILES+=usr/share/man/man3/hx509_crypto.3.gz
OLD_FILES+=usr/share/man/man3/hx509_env.3.gz
OLD_FILES+=usr/share/man/man3/hx509_env_add.3.gz
OLD_FILES+=usr/share/man/man3/hx509_env_add_binding.3.gz
OLD_FILES+=usr/share/man/man3/hx509_env_find.3.gz
OLD_FILES+=usr/share/man/man3/hx509_env_find_binding.3.gz
OLD_FILES+=usr/share/man/man3/hx509_env_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_env_lfind.3.gz
OLD_FILES+=usr/share/man/man3/hx509_err.3.gz
OLD_FILES+=usr/share/man/man3/hx509_error.3.gz
OLD_FILES+=usr/share/man/man3/hx509_free_error_string.3.gz
OLD_FILES+=usr/share/man/man3/hx509_free_octet_string_list.3.gz
OLD_FILES+=usr/share/man/man3/hx509_general_name_unparse.3.gz
OLD_FILES+=usr/share/man/man3/hx509_get_error_string.3.gz
OLD_FILES+=usr/share/man/man3/hx509_get_one_cert.3.gz
OLD_FILES+=usr/share/man/man3/hx509_keyset.3.gz
OLD_FILES+=usr/share/man/man3/hx509_lock.3.gz
OLD_FILES+=usr/share/man/man3/hx509_misc.3.gz
OLD_FILES+=usr/share/man/man3/hx509_name.3.gz
OLD_FILES+=usr/share/man/man3/hx509_name_binary.3.gz
OLD_FILES+=usr/share/man/man3/hx509_name_cmp.3.gz
OLD_FILES+=usr/share/man/man3/hx509_name_copy.3.gz
OLD_FILES+=usr/share/man/man3/hx509_name_expand.3.gz
OLD_FILES+=usr/share/man/man3/hx509_name_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_name_is_null_p.3.gz
OLD_FILES+=usr/share/man/man3/hx509_name_to_Name.3.gz
OLD_FILES+=usr/share/man/man3/hx509_name_to_string.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ocsp_request.3.gz
OLD_FILES+=usr/share/man/man3/hx509_ocsp_verify.3.gz
OLD_FILES+=usr/share/man/man3/hx509_oid_print.3.gz
OLD_FILES+=usr/share/man/man3/hx509_oid_sprint.3.gz
OLD_FILES+=usr/share/man/man3/hx509_parse_name.3.gz
OLD_FILES+=usr/share/man/man3/hx509_peer.3.gz
OLD_FILES+=usr/share/man/man3/hx509_peer_info_add_cms_alg.3.gz
OLD_FILES+=usr/share/man/man3/hx509_peer_info_alloc.3.gz
OLD_FILES+=usr/share/man/man3/hx509_peer_info_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_peer_info_set_cert.3.gz
OLD_FILES+=usr/share/man/man3/hx509_peer_info_set_cms_algs.3.gz
OLD_FILES+=usr/share/man/man3/hx509_print.3.gz
OLD_FILES+=usr/share/man/man3/hx509_print_cert.3.gz
OLD_FILES+=usr/share/man/man3/hx509_print_stdout.3.gz
OLD_FILES+=usr/share/man/man3/hx509_query.3.gz
OLD_FILES+=usr/share/man/man3/hx509_query_alloc.3.gz
OLD_FILES+=usr/share/man/man3/hx509_query_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_query_match_cmp_func.3.gz
OLD_FILES+=usr/share/man/man3/hx509_query_match_eku.3.gz
OLD_FILES+=usr/share/man/man3/hx509_query_match_friendly_name.3.gz
OLD_FILES+=usr/share/man/man3/hx509_query_match_issuer_serial.3.gz
OLD_FILES+=usr/share/man/man3/hx509_query_match_option.3.gz
OLD_FILES+=usr/share/man/man3/hx509_query_statistic_file.3.gz
OLD_FILES+=usr/share/man/man3/hx509_query_unparse_stats.3.gz
OLD_FILES+=usr/share/man/man3/hx509_revoke.3.gz
OLD_FILES+=usr/share/man/man3/hx509_revoke_add_crl.3.gz
OLD_FILES+=usr/share/man/man3/hx509_revoke_add_ocsp.3.gz
OLD_FILES+=usr/share/man/man3/hx509_revoke_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_revoke_init.3.gz
OLD_FILES+=usr/share/man/man3/hx509_revoke_ocsp_print.3.gz
OLD_FILES+=usr/share/man/man3/hx509_revoke_verify.3.gz
OLD_FILES+=usr/share/man/man3/hx509_set_error_string.3.gz
OLD_FILES+=usr/share/man/man3/hx509_set_error_stringv.3.gz
OLD_FILES+=usr/share/man/man3/hx509_unparse_der_name.3.gz
OLD_FILES+=usr/share/man/man3/hx509_validate_cert.3.gz
OLD_FILES+=usr/share/man/man3/hx509_validate_ctx_add_flags.3.gz
OLD_FILES+=usr/share/man/man3/hx509_validate_ctx_free.3.gz
OLD_FILES+=usr/share/man/man3/hx509_validate_ctx_init.3.gz
OLD_FILES+=usr/share/man/man3/hx509_validate_ctx_set_print.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_attach_anchors.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_attach_revoke.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_ctx_f_allow_default_trustanchors.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_destroy_ctx.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_hostname.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_init_ctx.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_path.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_set_max_depth.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_set_proxy_certificate.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_set_strict_rfc3280_verification.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_set_time.3.gz
OLD_FILES+=usr/share/man/man3/hx509_verify_signature.3.gz
OLD_FILES+=usr/share/man/man3/hx509_xfree.3.gz
OLD_FILES+=usr/share/man/man3/k_afs_cell_of_file.3.gz
OLD_FILES+=usr/share/man/man3/k_hasafs.3.gz
OLD_FILES+=usr/share/man/man3/k_pioctl.3.gz
OLD_FILES+=usr/share/man/man3/k_setpag.3.gz
OLD_FILES+=usr/share/man/man3/k_unlog.3.gz
OLD_FILES+=usr/share/man/man3/kadm5_pwcheck.3.gz
OLD_FILES+=usr/share/man/man3/kafs.3.gz
OLD_FILES+=usr/share/man/man3/kafs5.3.gz
OLD_FILES+=usr/share/man/man3/kafs_set_verbose.3.gz
OLD_FILES+=usr/share/man/man3/kafs_settoken.3.gz
OLD_FILES+=usr/share/man/man3/kafs_settoken5.3.gz
OLD_FILES+=usr/share/man/man3/kafs_settoken_rxkad.3.gz
OLD_FILES+=usr/share/man/man3/krb5.3.gz
OLD_FILES+=usr/share/man/man3/krb524_convert_creds_kdc.3.gz
OLD_FILES+=usr/share/man/man3/krb524_convert_creds_kdc_ccache.3.gz
OLD_FILES+=usr/share/man/man3/krb5_425_conv_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_425_conv_principal_ext.3.gz
OLD_FILES+=usr/share/man/man3/krb5_524_conv_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_acc_ops.3.gz
OLD_FILES+=usr/share/man/man3/krb5_acl_match_file.3.gz
OLD_FILES+=usr/share/man/man3/krb5_acl_match_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_add_et_list.3.gz
OLD_FILES+=usr/share/man/man3/krb5_add_extra_addresses.3.gz
OLD_FILES+=usr/share/man/man3/krb5_add_ignore_addresses.3.gz
OLD_FILES+=usr/share/man/man3/krb5_addlog_dest.3.gz
OLD_FILES+=usr/share/man/man3/krb5_addlog_func.3.gz
OLD_FILES+=usr/share/man/man3/krb5_addr2sockaddr.3.gz
OLD_FILES+=usr/share/man/man3/krb5_address.3.gz
OLD_FILES+=usr/share/man/man3/krb5_address_compare.3.gz
OLD_FILES+=usr/share/man/man3/krb5_address_order.3.gz
OLD_FILES+=usr/share/man/man3/krb5_address_prefixlen_boundary.3.gz
OLD_FILES+=usr/share/man/man3/krb5_address_search.3.gz
OLD_FILES+=usr/share/man/man3/krb5_afslog.3.gz
OLD_FILES+=usr/share/man/man3/krb5_afslog_uid.3.gz
OLD_FILES+=usr/share/man/man3/krb5_allow_weak_crypto.3.gz
OLD_FILES+=usr/share/man/man3/krb5_aname_to_localname.3.gz
OLD_FILES+=usr/share/man/man3/krb5_anyaddr.3.gz
OLD_FILES+=usr/share/man/man3/krb5_appdefault.3.gz
OLD_FILES+=usr/share/man/man3/krb5_appdefault_boolean.3.gz
OLD_FILES+=usr/share/man/man3/krb5_appdefault_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_appdefault_time.3.gz
OLD_FILES+=usr/share/man/man3/krb5_append_addresses.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_free.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_genaddrs.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_getaddrs.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_getflags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_getkey.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_getlocalsubkey.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_getrcache.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_getremotesubkey.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_getuserkey.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_init.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_initivector.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_setaddrs.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_setaddrs_from_fd.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_setflags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_setivector.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_setkey.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_setlocalsubkey.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_setrcache.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_setremotesubkey.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_con_setuserkey.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_context.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_getauthenticator.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_getcksumtype.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_getkeytype.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_getlocalseqnumber.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_getremoteseqnumber.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_setcksumtype.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_setkeytype.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_setlocalseqnumber.3.gz
OLD_FILES+=usr/share/man/man3/krb5_auth_setremoteseqnumber.3.gz
OLD_FILES+=usr/share/man/man3/krb5_build_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_build_principal_ext.3.gz
OLD_FILES+=usr/share/man/man3/krb5_build_principal_va.3.gz
OLD_FILES+=usr/share/man/man3/krb5_build_principal_va_ext.3.gz
OLD_FILES+=usr/share/man/man3/krb5_c_enctype_compare.3.gz
OLD_FILES+=usr/share/man/man3/krb5_c_make_checksum.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_cache_end_seq_get.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_cache_get_first.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_cache_match.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_cache_next.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_clear_mcred.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_close.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_copy_cache.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_copy_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_copy_match_f.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_default.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_default_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_destroy.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_end_seq_get.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_gen_new.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_config.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_friendly_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_full_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_kdc_offset.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_lifetime.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_ops.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_prefix_ops.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_type.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_get_version.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_initialize.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_last_change_time.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_move.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_new_unique.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_next_cred.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_register.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_remove_cred.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_resolve.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_retrieve_cred.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_set_config.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_set_default_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_set_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_set_friendly_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_set_kdc_offset.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_start_seq_get.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_store_cred.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_support_switch.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cc_switch.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ccache.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ccache_intro.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cccol_cursor_free.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cccol_cursor_new.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cccol_cursor_next.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cccol_last_change_time.3.gz
OLD_FILES+=usr/share/man/man3/krb5_change_password.3.gz
OLD_FILES+=usr/share/man/man3/krb5_check_transited.3.gz
OLD_FILES+=usr/share/man/man3/krb5_checksum_is_collision_proof.3.gz
OLD_FILES+=usr/share/man/man3/krb5_checksum_is_keyed.3.gz
OLD_FILES+=usr/share/man/man3/krb5_checksumsize.3.gz
OLD_FILES+=usr/share/man/man3/krb5_cksumtype_to_enctype.3.gz
OLD_FILES+=usr/share/man/man3/krb5_clear_error_message.3.gz
OLD_FILES+=usr/share/man/man3/krb5_clear_error_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_closelog.3.gz
OLD_FILES+=usr/share/man/man3/krb5_compare_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_file_free.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_free_strings.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_get_bool.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_get_bool_default.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_get_list.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_get_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_get_string_default.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_get_strings.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_get_time.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_get_time_default.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_parse_file_multi.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_parse_string_multi.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_vget_bool.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_vget_bool_default.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_vget_list.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_vget_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_vget_string_default.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_vget_strings.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_vget_time.3.gz
OLD_FILES+=usr/share/man/man3/krb5_config_vget_time_default.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_address.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_addresses.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_context.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_creds_contents.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_data.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_host_realm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_keyblock.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_keyblock_contents.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_copy_ticket.3.gz
OLD_FILES+=usr/share/man/man3/krb5_create_checksum.3.gz
OLD_FILES+=usr/share/man/man3/krb5_create_checksum_iov.3.gz
OLD_FILES+=usr/share/man/man3/krb5_credential.3.gz
OLD_FILES+=usr/share/man/man3/krb5_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_creds_get_ticket_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_crypto.3.gz
OLD_FILES+=usr/share/man/man3/krb5_crypto_destroy.3.gz
OLD_FILES+=usr/share/man/man3/krb5_crypto_fx_cf2.3.gz
OLD_FILES+=usr/share/man/man3/krb5_crypto_getblocksize.3.gz
OLD_FILES+=usr/share/man/man3/krb5_crypto_getconfoundersize.3.gz
OLD_FILES+=usr/share/man/man3/krb5_crypto_getenctype.3.gz
OLD_FILES+=usr/share/man/man3/krb5_crypto_getpadsize.3.gz
OLD_FILES+=usr/share/man/man3/krb5_crypto_init.3.gz
OLD_FILES+=usr/share/man/man3/krb5_crypto_iov.3.gz
OLD_FILES+=usr/share/man/man3/krb5_data_alloc.3.gz
OLD_FILES+=usr/share/man/man3/krb5_data_cmp.3.gz
OLD_FILES+=usr/share/man/man3/krb5_data_copy.3.gz
OLD_FILES+=usr/share/man/man3/krb5_data_ct_cmp.3.gz
OLD_FILES+=usr/share/man/man3/krb5_data_free.3.gz
OLD_FILES+=usr/share/man/man3/krb5_data_realloc.3.gz
OLD_FILES+=usr/share/man/man3/krb5_data_zero.3.gz
OLD_FILES+=usr/share/man/man3/krb5_decrypt.3.gz
OLD_FILES+=usr/share/man/man3/krb5_decrypt_EncryptedData.3.gz
OLD_FILES+=usr/share/man/man3/krb5_decrypt_iov_ivec.3.gz
OLD_FILES+=usr/share/man/man3/krb5_deprecated.3.gz
OLD_FILES+=usr/share/man/man3/krb5_digest.3.gz
OLD_FILES+=usr/share/man/man3/krb5_digest_probe.3.gz
OLD_FILES+=usr/share/man/man3/krb5_eai_to_heim_errno.3.gz
OLD_FILES+=usr/share/man/man3/krb5_encrypt.3.gz
OLD_FILES+=usr/share/man/man3/krb5_encrypt_EncryptedData.3.gz
OLD_FILES+=usr/share/man/man3/krb5_encrypt_iov_ivec.3.gz
OLD_FILES+=usr/share/man/man3/krb5_enctype_disable.3.gz
OLD_FILES+=usr/share/man/man3/krb5_enctype_enable.3.gz
OLD_FILES+=usr/share/man/man3/krb5_enctype_valid.3.gz
OLD_FILES+=usr/share/man/man3/krb5_enctypes_compatible_keys.3.gz
OLD_FILES+=usr/share/man/man3/krb5_error.3.gz
OLD_FILES+=usr/share/man/man3/krb5_expand_hostname.3.gz
OLD_FILES+=usr/share/man/man3/krb5_expand_hostname_realms.3.gz
OLD_FILES+=usr/share/man/man3/krb5_fcc_ops.3.gz
OLD_FILES+=usr/share/man/man3/krb5_fileformats.3.gz
OLD_FILES+=usr/share/man/man3/krb5_find_padata.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_address.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_addresses.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_config_files.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_context.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_cred_contents.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_creds_contents.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_data.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_data_contents.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_error_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_host_realm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_keyblock.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_keyblock_contents.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_krbhst.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_ticket.3.gz
OLD_FILES+=usr/share/man/man3/krb5_free_unparsed_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_fwd_tgt_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_generate_random_block.3.gz
OLD_FILES+=usr/share/man/man3/krb5_generate_subkey.3.gz
OLD_FILES+=usr/share/man/man3/krb5_generate_subkey_extended.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_all_client_addrs.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_all_server_addrs.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_cred_from_kdc.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_cred_from_kdc_opt.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_credentials.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_default_config_files.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_default_in_tkt_etypes.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_default_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_default_realm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_default_realms.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_dns_canonicalize_hostname.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_extra_addresses.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_fcache_version.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_forwarded_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_host_realm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_ignore_addresses.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_in_cred.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_in_tkt_with_keytab.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_in_tkt_with_password.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_in_tkt_with_skey.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_init_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_init_creds_keyblock.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_init_creds_keytab.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_init_creds_opt_alloc.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_init_creds_opt_free.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_init_creds_opt_get_error.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_init_creds_opt_init.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_init_creds_password.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_kdc_sec_offset.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_krb524hst.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_krb_admin_hst.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_krb_changepw_hst.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_krbhst.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_max_time_skew.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_use_admin_kdc.3.gz
OLD_FILES+=usr/share/man/man3/krb5_get_validated_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_getportbyname.3.gz
OLD_FILES+=usr/share/man/man3/krb5_h_addr2addr.3.gz
OLD_FILES+=usr/share/man/man3/krb5_h_addr2sockaddr.3.gz
OLD_FILES+=usr/share/man/man3/krb5_h_errno_to_heim_errno.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_context.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_creds_free.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_creds_get.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_creds_get_error.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_creds_init.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_creds_intro.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_creds_set_keytab.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_creds_set_password.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_creds_set_service.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_creds_step.3.gz
OLD_FILES+=usr/share/man/man3/krb5_init_ets.3.gz
OLD_FILES+=usr/share/man/man3/krb5_initlog.3.gz
OLD_FILES+=usr/share/man/man3/krb5_introduction.3.gz
OLD_FILES+=usr/share/man/man3/krb5_is_config_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_is_thread_safe.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kerberos_enctypes.3.gz
OLD_FILES+=usr/share/man/man3/krb5_keyblock_get_enctype.3.gz
OLD_FILES+=usr/share/man/man3/krb5_keyblock_init.3.gz
OLD_FILES+=usr/share/man/man3/krb5_keyblock_zero.3.gz
OLD_FILES+=usr/share/man/man3/krb5_keytab.3.gz
OLD_FILES+=usr/share/man/man3/krb5_keytab_intro.3.gz
OLD_FILES+=usr/share/man/man3/krb5_keytab_key_proc.3.gz
OLD_FILES+=usr/share/man/man3/krb5_keytype_to_enctypes.3.gz
OLD_FILES+=usr/share/man/man3/krb5_keytype_to_enctypes_default.3.gz
OLD_FILES+=usr/share/man/man3/krb5_keytype_to_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_krbhst_format_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_krbhst_free.3.gz
OLD_FILES+=usr/share/man/man3/krb5_krbhst_get_addrinfo.3.gz
OLD_FILES+=usr/share/man/man3/krb5_krbhst_init.3.gz
OLD_FILES+=usr/share/man/man3/krb5_krbhst_next.3.gz
OLD_FILES+=usr/share/man/man3/krb5_krbhst_next_as_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_krbhst_reset.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_add_entry.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_close.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_compare.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_copy_entry_contents.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_default.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_default_modify_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_default_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_destroy.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_end_seq_get.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_free_entry.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_get_entry.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_get_full_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_get_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_get_type.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_have_content.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_next_entry.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_read_service_key.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_register.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_remove_entry.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_resolve.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kt_start_seq_get.3.gz
OLD_FILES+=usr/share/man/man3/krb5_kuserok.3.gz
OLD_FILES+=usr/share/man/man3/krb5_log.3.gz
OLD_FILES+=usr/share/man/man3/krb5_log_msg.3.gz
OLD_FILES+=usr/share/man/man3/krb5_make_addrport.3.gz
OLD_FILES+=usr/share/man/man3/krb5_make_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_max_sockaddr_size.3.gz
OLD_FILES+=usr/share/man/man3/krb5_mcc_ops.3.gz
OLD_FILES+=usr/share/man/man3/krb5_mk_req.3.gz
OLD_FILES+=usr/share/man/man3/krb5_mk_safe.3.gz
OLD_FILES+=usr/share/man/man3/krb5_openlog.3.gz
OLD_FILES+=usr/share/man/man3/krb5_pac.3.gz
OLD_FILES+=usr/share/man/man3/krb5_pac_get_buffer.3.gz
OLD_FILES+=usr/share/man/man3/krb5_pac_verify.3.gz
OLD_FILES+=usr/share/man/man3/krb5_parse_address.3.gz
OLD_FILES+=usr/share/man/man3/krb5_parse_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_parse_name_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_parse_nametype.3.gz
OLD_FILES+=usr/share/man/man3/krb5_password_key_proc.3.gz
OLD_FILES+=usr/share/man/man3/krb5_plugin_register.3.gz
OLD_FILES+=usr/share/man/man3/krb5_prepend_config_files_default.3.gz
OLD_FILES+=usr/share/man/man3/krb5_princ_realm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_princ_set_realm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_compare.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_compare_any_realm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_get_comp_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_get_num_comp.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_get_realm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_get_type.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_intro.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_is_krbtgt.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_match.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_set_realm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_principal_set_type.3.gz
OLD_FILES+=usr/share/man/man3/krb5_print_address.3.gz
OLD_FILES+=usr/share/man/man3/krb5_random_to_key.3.gz
OLD_FILES+=usr/share/man/man3/krb5_rcache.3.gz
OLD_FILES+=usr/share/man/man3/krb5_rd_error.3.gz
OLD_FILES+=usr/share/man/man3/krb5_rd_req_ctx.3.gz
OLD_FILES+=usr/share/man/man3/krb5_rd_req_in_ctx_alloc.3.gz
OLD_FILES+=usr/share/man/man3/krb5_rd_req_in_set_keytab.3.gz
OLD_FILES+=usr/share/man/man3/krb5_rd_req_in_set_pac_check.3.gz
OLD_FILES+=usr/share/man/man3/krb5_rd_req_out_ctx_free.3.gz
OLD_FILES+=usr/share/man/man3/krb5_rd_req_out_get_server.3.gz
OLD_FILES+=usr/share/man/man3/krb5_rd_safe.3.gz
OLD_FILES+=usr/share/man/man3/krb5_realm_compare.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_address.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_addrs.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_authdata.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_creds_tag.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_data.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_int16.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_int32.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_int8.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_keyblock.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_stringz.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_times.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_uint16.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_uint32.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ret_uint8.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_config_files.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_default_in_tkt_etypes.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_default_realm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_dns_canonicalize_hostname.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_error_message.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_error_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_extra_addresses.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_fcache_version.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_home_dir_access.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_ignore_addresses.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_kdc_sec_offset.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_max_time_skew.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_password.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_real_time.3.gz
OLD_FILES+=usr/share/man/man3/krb5_set_use_admin_kdc.3.gz
OLD_FILES+=usr/share/man/man3/krb5_sname_to_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_sock_to_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_sockaddr2address.3.gz
OLD_FILES+=usr/share/man/man3/krb5_sockaddr2port.3.gz
OLD_FILES+=usr/share/man/man3/krb5_sockaddr_uninteresting.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_clear_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_emem.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_free.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_from_data.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_from_fd.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_from_mem.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_from_readonly_mem.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_get_byteorder.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_get_eof_code.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_is_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_read.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_seek.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_set_byteorder.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_set_eof_code.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_set_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_set_max_alloc.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_to_data.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_truncate.3.gz
OLD_FILES+=usr/share/man/man3/krb5_storage_write.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_address.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_addrs.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_authdata.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_creds_tag.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_data.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_int16.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_int32.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_int8.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_keyblock.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_principal.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_stringz.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_times.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_uint16.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_uint32.3.gz
OLD_FILES+=usr/share/man/man3/krb5_store_uint8.3.gz
OLD_FILES+=usr/share/man/man3/krb5_string_to_key.3.gz
OLD_FILES+=usr/share/man/man3/krb5_string_to_keytype.3.gz
OLD_FILES+=usr/share/man/man3/krb5_support.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ticket.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ticket_get_authorization_data_type.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ticket_get_client.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ticket_get_endtime.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ticket_get_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_ticket_get_server.3.gz
OLD_FILES+=usr/share/man/man3/krb5_timeofday.3.gz
OLD_FILES+=usr/share/man/man3/krb5_unparse_name.3.gz
OLD_FILES+=usr/share/man/man3/krb5_unparse_name_fixed.3.gz
OLD_FILES+=usr/share/man/man3/krb5_unparse_name_fixed_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_unparse_name_fixed_short.3.gz
OLD_FILES+=usr/share/man/man3/krb5_unparse_name_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_unparse_name_short.3.gz
OLD_FILES+=usr/share/man/man3/krb5_us_timeofday.3.gz
OLD_FILES+=usr/share/man/man3/krb5_v4compat.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_checksum.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_checksum_iov.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_init_creds.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_opt_init.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_opt_set_flags.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_opt_set_keytab.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_opt_set_secure.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_opt_set_service.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_user.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_user_lrealm.3.gz
OLD_FILES+=usr/share/man/man3/krb5_verify_user_opt.3.gz
OLD_FILES+=usr/share/man/man3/krb5_vlog.3.gz
OLD_FILES+=usr/share/man/man3/krb5_vlog_msg.3.gz
OLD_FILES+=usr/share/man/man3/krb5_vset_error_string.3.gz
OLD_FILES+=usr/share/man/man3/krb5_vwarn.3.gz
OLD_FILES+=usr/share/man/man3/krb_afslog.3.gz
OLD_FILES+=usr/share/man/man3/krb_afslog_uid.3.gz
OLD_FILES+=usr/share/man/man3/ntlm_buf.3.gz
OLD_FILES+=usr/share/man/man3/ntlm_core.3.gz
OLD_FILES+=usr/share/man/man3/ntlm_type1.3.gz
OLD_FILES+=usr/share/man/man3/ntlm_type2.3.gz
OLD_FILES+=usr/share/man/man3/ntlm_type3.3.gz
OLD_FILES+=usr/share/man/man5/krb5.conf.5.gz
OLD_FILES+=usr/share/man/man8/hprop.8.gz
OLD_FILES+=usr/share/man/man8/hpropd.8.gz
OLD_FILES+=usr/share/man/man8/iprop-log.8.gz
OLD_FILES+=usr/share/man/man8/iprop.8.gz
OLD_FILES+=usr/share/man/man8/kadmin.8.gz
OLD_FILES+=usr/share/man/man8/kadmind.8.gz
OLD_FILES+=usr/share/man/man8/kcm.8.gz
OLD_FILES+=usr/share/man/man8/kdc.8.gz
OLD_FILES+=usr/share/man/man8/kdigest.8.gz
OLD_FILES+=usr/share/man/man8/kerberos.8.gz
OLD_FILES+=usr/share/man/man8/kimpersonate.8.gz
OLD_FILES+=usr/share/man/man8/kpasswdd.8.gz
OLD_FILES+=usr/share/man/man8/kstash.8.gz
OLD_FILES+=usr/share/man/man8/ktutil.8.gz
OLD_FILES+=usr/share/man/man8/pam_krb5.8.gz
OLD_FILES+=usr/share/man/man8/pam_ksu.8.gz
OLD_FILES+=usr/share/man/man8/string2key.8.gz
OLD_FILES+=usr/share/man/man8/verify_krb5_conf.8.gz
.endif
.if ${MK_LDNS} == no
OLD_FILES+=usr/lib/private/libldns.a
OLD_FILES+=usr/lib/private/libldns.so
OLD_LIBS+=usr/lib/private/libldns.so.5
OLD_FILES+=usr/lib/private/libldns_p.a
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/private/libldns.a
OLD_FILES+=usr/lib32/private/libldns.so
OLD_LIBS+=usr/lib32/private/libldns.so.5
OLD_FILES+=usr/lib32/private/libldns_p.a
.endif
.endif
.if ${MK_LDNS_UTILS} == no
OLD_FILES+=usr/bin/drill
OLD_FILES+=usr/share/man/man1/drill.1.gz
OLD_FILES+=usr/bin/host
OLD_FILES+=usr/share/man/man1/host.1.gz
.endif
#.if ${MK_LIB32} == no
# to be filled in
#.endif
.if ${MK_LIBCPLUSPLUS} == no
OLD_LIBS+=lib/libcxxrt.so.1
OLD_FILES+=usr/lib/libc++.a
OLD_FILES+=usr/lib/libc++_p.a
OLD_FILES+=usr/lib/libc++.so
OLD_LIBS+=usr/lib/libc++.so.1
OLD_FILES+=usr/lib/libcxxrt.a
OLD_FILES+=usr/lib/libcxxrt.so
OLD_FILES+=usr/lib/libcxxrt_p.a
OLD_FILES+=usr/include/c++/v1/__bit_reference
OLD_FILES+=usr/include/c++/v1/__config
OLD_FILES+=usr/include/c++/v1/__debug
OLD_FILES+=usr/include/c++/v1/__functional_03
OLD_FILES+=usr/include/c++/v1/__functional_base
OLD_FILES+=usr/include/c++/v1/__functional_base_03
OLD_FILES+=usr/include/c++/v1/__hash_table
OLD_FILES+=usr/include/c++/v1/__locale
OLD_FILES+=usr/include/c++/v1/__mutex_base
OLD_FILES+=usr/include/c++/v1/__split_buffer
OLD_FILES+=usr/include/c++/v1/__sso_allocator
OLD_FILES+=usr/include/c++/v1/__std_stream
OLD_FILES+=usr/include/c++/v1/__tree
OLD_FILES+=usr/include/c++/v1/__tuple
OLD_FILES+=usr/include/c++/v1/__tuple_03
OLD_FILES+=usr/include/c++/v1/__undef_min_max
OLD_FILES+=usr/include/c++/v1/algorithm
OLD_FILES+=usr/include/c++/v1/array
OLD_FILES+=usr/include/c++/v1/atomic
OLD_FILES+=usr/include/c++/v1/bitset
OLD_FILES+=usr/include/c++/v1/cassert
OLD_FILES+=usr/include/c++/v1/ccomplex
OLD_FILES+=usr/include/c++/v1/cctype
OLD_FILES+=usr/include/c++/v1/cerrno
OLD_FILES+=usr/include/c++/v1/cfenv
OLD_FILES+=usr/include/c++/v1/cfloat
OLD_FILES+=usr/include/c++/v1/chrono
OLD_FILES+=usr/include/c++/v1/cinttypes
OLD_FILES+=usr/include/c++/v1/ciso646
OLD_FILES+=usr/include/c++/v1/climits
OLD_FILES+=usr/include/c++/v1/clocale
OLD_FILES+=usr/include/c++/v1/cmath
OLD_FILES+=usr/include/c++/v1/codecvt
OLD_FILES+=usr/include/c++/v1/complex
OLD_FILES+=usr/include/c++/v1/complex.h
OLD_FILES+=usr/include/c++/v1/condition_variable
OLD_FILES+=usr/include/c++/v1/csetjmp
OLD_FILES+=usr/include/c++/v1/csignal
OLD_FILES+=usr/include/c++/v1/cstdarg
OLD_FILES+=usr/include/c++/v1/cstdbool
OLD_FILES+=usr/include/c++/v1/cstddef
OLD_FILES+=usr/include/c++/v1/cstdint
OLD_FILES+=usr/include/c++/v1/cstdio
OLD_FILES+=usr/include/c++/v1/cstdlib
OLD_FILES+=usr/include/c++/v1/cstring
OLD_FILES+=usr/include/c++/v1/ctgmath
OLD_FILES+=usr/include/c++/v1/ctime
OLD_FILES+=usr/include/c++/v1/cwchar
OLD_FILES+=usr/include/c++/v1/cwctype
OLD_FILES+=usr/include/c++/v1/cxxabi.h
OLD_FILES+=usr/include/c++/v1/deque
OLD_FILES+=usr/include/c++/v1/exception
OLD_FILES+=usr/include/c++/v1/experimental/dynarray
OLD_FILES+=usr/include/c++/v1/experimental/optional
OLD_FILES+=usr/include/c++/v1/ext/__hash
OLD_FILES+=usr/include/c++/v1/ext/hash_map
OLD_FILES+=usr/include/c++/v1/ext/hash_set
OLD_FILES+=usr/include/c++/v1/forward_list
OLD_FILES+=usr/include/c++/v1/fstream
OLD_FILES+=usr/include/c++/v1/functional
OLD_FILES+=usr/include/c++/v1/future
OLD_FILES+=usr/include/c++/v1/initializer_list
OLD_FILES+=usr/include/c++/v1/iomanip
OLD_FILES+=usr/include/c++/v1/ios
OLD_FILES+=usr/include/c++/v1/iosfwd
OLD_FILES+=usr/include/c++/v1/iostream
OLD_FILES+=usr/include/c++/v1/istream
OLD_FILES+=usr/include/c++/v1/iterator
OLD_FILES+=usr/include/c++/v1/limits
OLD_FILES+=usr/include/c++/v1/list
OLD_FILES+=usr/include/c++/v1/locale
OLD_FILES+=usr/include/c++/v1/map
OLD_FILES+=usr/include/c++/v1/memory
OLD_FILES+=usr/include/c++/v1/mutex
OLD_FILES+=usr/include/c++/v1/new
OLD_FILES+=usr/include/c++/v1/numeric
OLD_FILES+=usr/include/c++/v1/ostream
OLD_FILES+=usr/include/c++/v1/queue
OLD_FILES+=usr/include/c++/v1/random
OLD_FILES+=usr/include/c++/v1/ratio
OLD_FILES+=usr/include/c++/v1/regex
OLD_FILES+=usr/include/c++/v1/scoped_allocator
OLD_FILES+=usr/include/c++/v1/set
OLD_FILES+=usr/include/c++/v1/sstream
OLD_FILES+=usr/include/c++/v1/stack
OLD_FILES+=usr/include/c++/v1/stdexcept
OLD_FILES+=usr/include/c++/v1/streambuf
OLD_FILES+=usr/include/c++/v1/string
OLD_FILES+=usr/include/c++/v1/strstream
OLD_FILES+=usr/include/c++/v1/system_error
OLD_FILES+=usr/include/c++/v1/tgmath.h
OLD_FILES+=usr/include/c++/v1/thread
OLD_FILES+=usr/include/c++/v1/tuple
OLD_FILES+=usr/include/c++/v1/type_traits
OLD_FILES+=usr/include/c++/v1/typeindex
OLD_FILES+=usr/include/c++/v1/typeinfo
OLD_FILES+=usr/include/c++/v1/unordered_map
OLD_FILES+=usr/include/c++/v1/unordered_set
OLD_FILES+=usr/include/c++/v1/unwind-arm.h
OLD_FILES+=usr/include/c++/v1/unwind-itanium.h
OLD_FILES+=usr/include/c++/v1/unwind.h
OLD_FILES+=usr/include/c++/v1/utility
OLD_FILES+=usr/include/c++/v1/valarray
OLD_FILES+=usr/include/c++/v1/vector
OLD_DIRS+=usr/include/c++/v1/experimental
OLD_DIRS+=usr/include/c++/v1/ext
OLD_DIRS+=usr/include/c++/v1
.endif
#.if ${MK_LIBTHR} == no
# to be filled in
#.endif
#.if ${MK_LOCALES} == no
# to be filled in
#.endif
.if ${MK_LOCATE} == no
OLD_FILES+=etc/locate.rc
OLD_FILES+=etc/periodic/weekly/310.locate
OLD_FILES+=usr/bin/locate
OLD_FILES+=usr/libexec/locate.bigram
OLD_FILES+=usr/libexec/locate.code
OLD_FILES+=usr/libexec/locate.concatdb
OLD_FILES+=usr/libexec/locate.mklocatedb
OLD_FILES+=usr/libexec/locate.updatedb
OLD_FILES+=usr/share/man/man1/locate.1.gz
OLD_FILES+=usr/share/man/man8/locate.updatedb.8.gz
OLD_FILES+=usr/share/man/man8/updatedb.8.gz
.endif
.if ${MK_LPR} == no
OLD_FILES+=etc/hosts.lpd
OLD_FILES+=etc/printcap
OLD_FILES+=usr/bin/lp
OLD_FILES+=usr/bin/lpq
OLD_FILES+=usr/bin/lpr
OLD_FILES+=usr/bin/lprm
OLD_FILES+=usr/libexec/lpr/ru/bjc-240.sh.sample
OLD_FILES+=usr/libexec/lpr/ru/koi2alt
OLD_FILES+=usr/libexec/lpr/ru/koi2855
OLD_DIRS+=usr/libexec/lpr/ru
OLD_FILES+=usr/libexec/lpr/lpf
OLD_DIRS+=usr/libexec/lpr
OLD_FILES+=usr/sbin/chkprintcap
OLD_FILES+=usr/sbin/lpc
OLD_FILES+=usr/sbin/lpd
OLD_FILES+=usr/sbin/lptest
OLD_FILES+=usr/sbin/pac
OLD_FILES+=usr/share/doc/smm/07.lpd/paper.ascii.gz
OLD_DIRS+=usr/share/doc/smm/07.lpd
OLD_FILES+=usr/share/examples/etc/hosts.lpd
OLD_FILES+=usr/share/examples/etc/printcap
OLD_FILES+=usr/share/man/man1/lp.1.gz
OLD_FILES+=usr/share/man/man1/lpq.1.gz
OLD_FILES+=usr/share/man/man1/lpr.1.gz
OLD_FILES+=usr/share/man/man1/lprm.1.gz
OLD_FILES+=usr/share/man/man1/lptest.1.gz
OLD_FILES+=usr/share/man/man5/printcap.5.gz
OLD_FILES+=usr/share/man/man8/chkprintcap.8.gz
OLD_FILES+=usr/share/man/man8/lpc.8.gz
OLD_FILES+=usr/share/man/man8/lpd.8.gz
OLD_FILES+=usr/share/man/man8/pac.8.gz
.endif
.if ${MK_MAIL} == no
OLD_FILES+=etc/periodic/daily/130.clean-msgs
OLD_FILES+=usr/bin/Mail
OLD_FILES+=usr/bin/biff
OLD_FILES+=usr/bin/from
OLD_FILES+=usr/bin/mail
OLD_FILES+=usr/bin/mailx
OLD_FILES+=usr/bin/msgs
OLD_FILES+=usr/libexec/comsat
OLD_FILES+=usr/share/examples/etc/mail.rc
OLD_FILES+=usr/share/man/man1/Mail.1.gz
OLD_FILES+=usr/share/man/man1/biff.1.gz
OLD_FILES+=usr/share/man/man1/from.1.gz
OLD_FILES+=usr/share/man/man1/mail.1.gz
OLD_FILES+=usr/share/man/man1/mailx.1.gz
OLD_FILES+=usr/share/man/man1/msgs.1.gz
OLD_FILES+=usr/share/man/man8/comsat.8.gz
OLD_FILES+=usr/share/misc/mail.help
OLD_FILES+=usr/share/misc/mail.tildehelp
.endif
.if ${MK_MAILWRAPPER} == no
OLD_FILES+=etc/mail/mailer.conf
.if ${MK_SENDMAIL} == no
OLD_FILES+=usr/sbin/mailwrapper
.endif
OLD_FILES+=usr/share/man/man8/mailwrapper.8.gz
.endif
#.if ${MK_MAN} == no
# This should add a dependency to a special target which removes all man pages.
# Listing all of them here is overkill.
#.endif
.if ${MK_NDIS} == no
OLD_FILES+=usr/sbin/ndiscvt
OLD_FILES+=usr/sbin/ndisgen
OLD_FILES+=usr/share/man/man8/ndiscvt.8.gz
OLD_FILES+=usr/share/man/man8/ndisgen.8.gz
OLD_FILES+=usr/share/misc/windrv_stub.c
.endif
.if ${MK_NETCAT} == no
OLD_FILES+=usr/bin/nc
OLD_FILES+=usr/share/man/man1/nc.1.gz
.endif
.if ${MK_NIS} == no
OLD_FILES+=usr/bin/ypcat
OLD_FILES+=usr/bin/ypchfn
OLD_FILES+=usr/bin/ypchpass
OLD_FILES+=usr/bin/ypchsh
OLD_FILES+=usr/bin/ypmatch
OLD_FILES+=usr/bin/yppasswd
OLD_FILES+=usr/bin/ypwhich
OLD_FILES+=usr/include/ypclnt.h
OLD_FILES+=usr/lib/libypclnt.a
OLD_FILES+=usr/lib/libypclnt.so
OLD_LIBS+=usr/lib/libypclnt.so.4
OLD_FILES+=usr/lib/libypclnt_p.a
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/libypclnt.a
OLD_FILES+=usr/lib32/libypclnt.so
OLD_LIBS+=usr/lib32/libypclnt.so.4
OLD_FILES+=usr/lib32/libypclnt_p.a
.endif
OLD_FILES+=usr/libexec/mknetid
OLD_FILES+=usr/libexec/yppwupdate
OLD_FILES+=usr/libexec/ypxfr
OLD_FILES+=usr/sbin/rpc.yppasswdd
OLD_FILES+=usr/sbin/rpc.ypupdated
OLD_FILES+=usr/sbin/rpc.ypxfrd
OLD_FILES+=usr/sbin/yp_mkdb
OLD_FILES+=usr/sbin/ypbind
OLD_FILES+=usr/sbin/ypinit
OLD_FILES+=usr/sbin/yppoll
OLD_FILES+=usr/sbin/yppush
OLD_FILES+=usr/sbin/ypserv
OLD_FILES+=usr/sbin/ypset
OLD_FILES+=usr/share/man/man1/ypcat.1.gz
OLD_FILES+=usr/share/man/man1/ypchfn.1.gz
OLD_FILES+=usr/share/man/man1/ypchpass.1.gz
OLD_FILES+=usr/share/man/man1/ypchsh.1.gz
OLD_FILES+=usr/share/man/man1/ypmatch.1.gz
OLD_FILES+=usr/share/man/man1/yppasswd.1.gz
OLD_FILES+=usr/share/man/man1/ypwhich.1.gz
OLD_FILES+=usr/share/man/man5/netid.5.gz
OLD_FILES+=usr/share/man/man8/mknetid.8.gz
OLD_FILES+=usr/share/man/man8/rpc.yppasswdd.8.gz
OLD_FILES+=usr/share/man/man8/rpc.ypxfrd.8.gz
OLD_FILES+=usr/share/man/man8/yp_mkdb.8.gz
OLD_FILES+=usr/share/man/man8/ypbind.8.gz
OLD_FILES+=usr/share/man/man8/ypinit.8.gz
OLD_FILES+=usr/share/man/man8/yppoll.8.gz
OLD_FILES+=usr/share/man/man8/yppush.8.gz
OLD_FILES+=usr/share/man/man8/ypserv.8.gz
OLD_FILES+=usr/share/man/man8/ypset.8.gz
OLD_FILES+=usr/share/man/man8/ypxfr.8.gz
OLD_FILES+=var/yp/Makefile
OLD_FILES+=var/yp/Makefile.dist
.endif
#.if ${MK_NLS} == no
# to be filled in
#.endif
.if ${MK_NTP} == no
OLD_FILES+=etc/ntp.conf
OLD_FILES+=etc/periodic/daily/480.status-ntpd
OLD_FILES+=usr/bin/ntpq
OLD_FILES+=usr/sbin/ntp-keygen
OLD_FILES+=usr/sbin/ntpd
OLD_FILES+=usr/sbin/ntpdate
OLD_FILES+=usr/sbin/ntpdc
OLD_FILES+=usr/sbin/ntptime
OLD_FILES+=usr/sbin/sntp
OLD_FILES+=usr/share/doc/ntp/accopt.html
OLD_FILES+=usr/share/doc/ntp/assoc.html
OLD_FILES+=usr/share/doc/ntp/audio.html
OLD_FILES+=usr/share/doc/ntp/authopt.html
OLD_FILES+=usr/share/doc/ntp/build.html
OLD_FILES+=usr/share/doc/ntp/clockopt.html
OLD_FILES+=usr/share/doc/ntp/config.html
OLD_FILES+=usr/share/doc/ntp/confopt.html
OLD_FILES+=usr/share/doc/ntp/copyright.html
OLD_FILES+=usr/share/doc/ntp/debug.html
OLD_FILES+=usr/share/doc/ntp/driver1.html
OLD_FILES+=usr/share/doc/ntp/driver10.html
OLD_FILES+=usr/share/doc/ntp/driver11.html
OLD_FILES+=usr/share/doc/ntp/driver12.html
OLD_FILES+=usr/share/doc/ntp/driver16.html
OLD_FILES+=usr/share/doc/ntp/driver18.html
OLD_FILES+=usr/share/doc/ntp/driver19.html
OLD_FILES+=usr/share/doc/ntp/driver2.html
OLD_FILES+=usr/share/doc/ntp/driver20.html
OLD_FILES+=usr/share/doc/ntp/driver22.html
OLD_FILES+=usr/share/doc/ntp/driver26.html
OLD_FILES+=usr/share/doc/ntp/driver27.html
OLD_FILES+=usr/share/doc/ntp/driver28.html
OLD_FILES+=usr/share/doc/ntp/driver29.html
OLD_FILES+=usr/share/doc/ntp/driver3.html
OLD_FILES+=usr/share/doc/ntp/driver30.html
OLD_FILES+=usr/share/doc/ntp/driver32.html
OLD_FILES+=usr/share/doc/ntp/driver33.html
OLD_FILES+=usr/share/doc/ntp/driver34.html
OLD_FILES+=usr/share/doc/ntp/driver35.html
OLD_FILES+=usr/share/doc/ntp/driver36.html
OLD_FILES+=usr/share/doc/ntp/driver37.html
OLD_FILES+=usr/share/doc/ntp/driver4.html
OLD_FILES+=usr/share/doc/ntp/driver5.html
OLD_FILES+=usr/share/doc/ntp/driver6.html
OLD_FILES+=usr/share/doc/ntp/driver7.html
OLD_FILES+=usr/share/doc/ntp/driver8.html
OLD_FILES+=usr/share/doc/ntp/driver9.html
OLD_FILES+=usr/share/doc/ntp/extern.html
OLD_FILES+=usr/share/doc/ntp/hints.html
OLD_FILES+=usr/share/doc/ntp/howto.html
OLD_FILES+=usr/share/doc/ntp/index.html
OLD_FILES+=usr/share/doc/ntp/kern.html
OLD_FILES+=usr/share/doc/ntp/ldisc.html
OLD_FILES+=usr/share/doc/ntp/measure.html
OLD_FILES+=usr/share/doc/ntp/miscopt.html
OLD_FILES+=usr/share/doc/ntp/monopt.html
OLD_FILES+=usr/share/doc/ntp/mx4200data.html
OLD_FILES+=usr/share/doc/ntp/notes.html
OLD_FILES+=usr/share/doc/ntp/ntpd.html
OLD_FILES+=usr/share/doc/ntp/ntpdate.html
OLD_FILES+=usr/share/doc/ntp/ntpdc.html
OLD_FILES+=usr/share/doc/ntp/ntpq.html
OLD_FILES+=usr/share/doc/ntp/ntptime.html
OLD_FILES+=usr/share/doc/ntp/ntptrace.html
OLD_FILES+=usr/share/doc/ntp/parsedata.html
OLD_FILES+=usr/share/doc/ntp/parsenew.html
OLD_FILES+=usr/share/doc/ntp/patches.html
OLD_FILES+=usr/share/doc/ntp/porting.html
OLD_FILES+=usr/share/doc/ntp/pps.html
OLD_FILES+=usr/share/doc/ntp/prefer.html
OLD_FILES+=usr/share/doc/ntp/quick.html
OLD_FILES+=usr/share/doc/ntp/rdebug.html
OLD_FILES+=usr/share/doc/ntp/refclock.html
OLD_FILES+=usr/share/doc/ntp/release.html
OLD_FILES+=usr/share/doc/ntp/tickadj.html
OLD_DIRS+=usr/share/doc/ntp
OLD_FILES+=usr/share/examples/etc/ntp.conf
OLD_FILES+=usr/share/man/man5/ntp.conf.5.gz
OLD_FILES+=usr/share/man/man5/ntp.keys.5.gz
OLD_FILES+=usr/share/man/man8/ntp-keygen.8.gz
OLD_FILES+=usr/share/man/man8/ntpd.8.gz
OLD_FILES+=usr/share/man/man8/ntpdate.8.gz
OLD_FILES+=usr/share/man/man8/ntpdc.8.gz
OLD_FILES+=usr/share/man/man8/ntpq.8.gz
OLD_FILES+=usr/share/man/man8/ntptime.8.gz
.endif
#.if ${MK_OBJC} == no
# to be filled in
#.endif
.if ${MK_OPENSSH} == no
OLD_FILES+=usr/bin/sftp
OLD_FILES+=usr/bin/ssh
OLD_FILES+=usr/bin/ssh-add
OLD_FILES+=usr/bin/ssh-agent
OLD_FILES+=usr/bin/ssh-copy-id
OLD_FILES+=usr/bin/ssh-keygen
OLD_FILES+=usr/bin/ssh-keyscan
OLD_FILES+=usr/lib/private/libssh.a
OLD_FILES+=usr/lib/private/libssh.so
OLD_LIBS+=usr/lib/private/libssh.so.5
OLD_FILES+=usr/lib/private/libssh_p.a
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/private/libssh.a
OLD_FILES+=usr/lib32/private/libssh.so
OLD_LIBS+=usr/lib32/private/libssh.so.5
OLD_FILES+=usr/lib32/private/libssh_p.a
.endif
OLD_FILES+=usr/libexec/sftp-server
OLD_FILES+=usr/libexec/ssh-keysign
OLD_FILES+=usr/libexec/ssh-pkcs11-helper
OLD_FILES+=usr/sbin/sshd
.endif
#.if ${MK_OPENSSL} == no
# to be filled in
#.endif
.if ${MK_PC_SYSINSTALL} == no
# backend-partmanager
OLD_FILES+=usr/share/pc-sysinstall/backend-partmanager/create-part.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-partmanager/delete-part.sh
# backend-query
OLD_FILES+=usr/share/pc-sysinstall/backend-query/detect-emulation.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/detect-laptop.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/detect-nics.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/disk-info.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/disk-list.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/disk-part.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/enable-net.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/get-packages.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/list-components.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/list-config.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/list-mirrors.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/list-packages.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/list-rsync-backups.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/list-tzones.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/query-langs.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/send-logs.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/setup-ssh-keys.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/set-mirror.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/sys-mem.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/test-live.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/test-netup.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/update-part-list.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/xkeyboard-layouts.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/xkeyboard-models.sh
OLD_FILES+=usr/share/pc-sysinstall/backend-query/xkeyboard-variants.sh
# backend
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-bsdlabel.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-cleanup.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-disk.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-extractimage.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-ftp.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-installcomponents.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-installpackages.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-localize.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-mountdisk.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-mountoptical.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-networking.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-newfs.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-parse.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-packages.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-runcommands.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-unmount.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-upgrade.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions-users.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/functions.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/installimage.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/parseconfig.sh
OLD_FILES+=usr/share/pc-sysinstall/backend/startautoinstall.sh
# conf
OLD_FILES+=usr/share/pc-sysinstall/conf/avail-langs
OLD_FILES+=usr/share/pc-sysinstall/conf/exclude-from-upgrade
OLD_FILES+=usr/share/pc-sysinstall/conf/license/bsd-en.txt
OLD_FILES+=usr/share/pc-sysinstall/conf/license/intel-en.txt
OLD_FILES+=usr/share/pc-sysinstall/conf/license/nvidia-en.txt
OLD_FILES+=usr/share/pc-sysinstall/conf/pc-sysinstall.conf
# doc
OLD_FILES+=usr/share/pc-sysinstall/doc/help-disk-list
OLD_FILES+=usr/share/pc-sysinstall/doc/help-disk-size
OLD_FILES+=usr/share/pc-sysinstall/doc/help-index
OLD_FILES+=usr/share/pc-sysinstall/doc/help-start-autoinstall
# examples
OLD_FILES+=usr/share/examples/pc-sysinstall/README
OLD_FILES+=usr/share/examples/pc-sysinstall/pc-autoinstall.conf
OLD_FILES+=usr/share/examples/pc-sysinstall/pcinstall.cfg.fbsd-netinstall
OLD_FILES+=usr/share/examples/pc-sysinstall/pcinstall.cfg.geli
OLD_FILES+=usr/share/examples/pc-sysinstall/pcinstall.cfg.gmirror
OLD_FILES+=usr/share/examples/pc-sysinstall/pcinstall.cfg.netinstall
OLD_FILES+=usr/share/examples/pc-sysinstall/pcinstall.cfg.restore
OLD_FILES+=usr/share/examples/pc-sysinstall/pcinstall.cfg.rsync
OLD_FILES+=usr/share/examples/pc-sysinstall/pcinstall.cfg.upgrade
OLD_FILES+=usr/share/examples/pc-sysinstall/pcinstall.cfg.zfs
# pc-sysinstall
OLD_FILES+=usr/sbin/pc-sysinstall
OLD_FILES+=usr/share/man/man8/pc-sysinstall.8.gz
OLD_DIRS+=usr/share/pc-sysinstall/backend
OLD_DIRS+=usr/share/pc-sysinstall/backend-partmanager
OLD_DIRS+=usr/share/pc-sysinstall/backend-query
OLD_DIRS+=usr/share/pc-sysinstall/conf/license
OLD_DIRS+=usr/share/pc-sysinstall/conf
OLD_DIRS+=usr/share/pc-sysinstall/doc
OLD_DIRS+=usr/share/pc-sysinstall
OLD_DIRS+=usr/share/examples/pc-sysinstall
.endif
.if ${MK_PF} == no
OLD_FILES+=etc/periodic/security/520.pfdenied
OLD_FILES+=etc/pf.os
OLD_FILES+=sbin/pfctl
OLD_FILES+=sbin/pflogd
OLD_FILES+=usr/libexec/tftp-proxy
OLD_FILES+=usr/sbin/ftp-proxy
OLD_FILES+=usr/share/examples/etc/pf.os
OLD_FILES+=usr/share/examples/pf/ackpri
OLD_FILES+=usr/share/examples/pf/faq-example1
OLD_FILES+=usr/share/examples/pf/faq-example2
OLD_FILES+=usr/share/examples/pf/faq-example3
OLD_FILES+=usr/share/examples/pf/pf.conf
OLD_FILES+=usr/share/examples/pf/queue1
OLD_FILES+=usr/share/examples/pf/queue2
OLD_FILES+=usr/share/examples/pf/queue3
OLD_FILES+=usr/share/examples/pf/queue4
OLD_FILES+=usr/share/examples/pf/spamd
OLD_DIRS+=usr/share/examples/pf
OLD_FILES+=usr/share/man/man4/pf.4.gz
OLD_FILES+=usr/share/man/man4/pflog.4.gz
OLD_FILES+=usr/share/man/man4/pfsync.4.gz
OLD_FILES+=usr/share/man/man5/pf.conf.5.gz
OLD_FILES+=usr/share/man/man5/pf.os.5.gz
OLD_FILES+=usr/share/man/man8/ftp-proxy.8.gz
OLD_FILES+=usr/share/man/man8/pfctl.8.gz
OLD_FILES+=usr/share/man/man8/pflogd.8.gz
OLD_FILES+=usr/share/man/man8/tftp-proxy.8.gz
.endif
.if ${MK_PKGBOOTSTRAP} == no
OLD_FILES+=usr/sbin/pkg
OLD_FILES+=usr/share/man/man7/pkg.7.gz
.endif
.if ${MK_PORTSNAP} == no
OLD_FILES+=etc/portsnap.conf
OLD_FILES+=usr/libexec/make_index
OLD_FILES+=usr/libexec/phttpget
OLD_FILES+=usr/sbin/portsnap
OLD_FILES+=usr/share/examples/etc/portsnap.conf
OLD_FILES+=usr/share/man/man8/portsnap.8.gz
.endif
.if ${MK_PPP} == no
OLD_FILES+=etc/ppp/ppp.conf
OLD_DIRS+=etc/ppp
OLD_FILES+=usr/sbin/ppp
OLD_FILES+=usr/sbin/pppctl
OLD_FILES+=usr/share/man/man8/ppp.8.gz
OLD_FILES+=usr/share/man/man8/pppctl.8.gz
.endif
.if ${MK_PROFILE} == no
OLD_FILES+=usr/lib/libalias_cuseeme_p.a
OLD_FILES+=usr/lib/libalias_dummy_p.a
OLD_FILES+=usr/lib/libalias_ftp_p.a
OLD_FILES+=usr/lib/libalias_irc_p.a
OLD_FILES+=usr/lib/libalias_nbt_p.a
OLD_FILES+=usr/lib/libalias_p.a
OLD_FILES+=usr/lib/libalias_pptp_p.a
OLD_FILES+=usr/lib/libalias_skinny_p.a
OLD_FILES+=usr/lib/libalias_smedia_p.a
OLD_FILES+=usr/lib/libarchive_p.a
OLD_FILES+=usr/lib/libasn1_p.a
OLD_FILES+=usr/lib/libbegemot_p.a
OLD_FILES+=usr/lib/libbluetooth_p.a
OLD_FILES+=usr/lib/libbsdxml_p.a
OLD_FILES+=usr/lib/libbsm_p.a
OLD_FILES+=usr/lib/libbsnmp_p.a
OLD_FILES+=usr/lib/libbz2_p.a
OLD_FILES+=usr/lib/libc_p.a
OLD_FILES+=usr/lib/libcalendar_p.a
OLD_FILES+=usr/lib/libcam_p.a
OLD_FILES+=usr/lib/libcom_err_p.a
OLD_FILES+=usr/lib/libcompat_p.a
OLD_FILES+=usr/lib/libcrypt_p.a
OLD_FILES+=usr/lib/libcrypto_p.a
OLD_FILES+=usr/lib/libcurses_p.a
OLD_FILES+=usr/lib/libcursesw_p.a
OLD_FILES+=usr/lib/libdevinfo_p.a
OLD_FILES+=usr/lib/libdevstat_p.a
OLD_FILES+=usr/lib/libdialog_p.a
OLD_FILES+=usr/lib/libedit_p.a
OLD_FILES+=usr/lib/libelf_p.a
OLD_FILES+=usr/lib/libfetch_p.a
OLD_FILES+=usr/lib/libfl_p.a
OLD_FILES+=usr/lib/libform_p.a
OLD_FILES+=usr/lib/libformw_p.a
OLD_FILES+=usr/lib/libgcc_p.a
OLD_FILES+=usr/lib/libgeom_p.a
OLD_FILES+=usr/lib/libgnuregex_p.a
OLD_FILES+=usr/lib/libgpib_p.a
OLD_FILES+=usr/lib/libgssapi_krb5_p.a
OLD_FILES+=usr/lib/libgssapi_p.a
OLD_FILES+=usr/lib/libhdb_p.a
OLD_FILES+=usr/lib/libheimbase_p.a
OLD_FILES+=usr/lib/libheimsqlite_p.a
OLD_FILES+=usr/lib/libhistory_p.a
OLD_FILES+=usr/lib/libipsec_p.a
OLD_FILES+=usr/lib/libjail_p.a
OLD_FILES+=usr/lib/libkadm5clnt_p.a
OLD_FILES+=usr/lib/libkadm5srv_p.a
OLD_FILES+=usr/lib/libkafs5_p.a
OLD_FILES+=usr/lib/libkdc_p.a
OLD_FILES+=usr/lib/libkiconv_p.a
OLD_FILES+=usr/lib/libkrb5_p.a
OLD_FILES+=usr/lib/libkvm_p.a
OLD_FILES+=usr/lib/libl_p.a
OLD_FILES+=usr/lib/libln_p.a
OLD_FILES+=usr/lib/libm_p.a
OLD_FILES+=usr/lib/libmagic_p.a
OLD_FILES+=usr/lib/libmd_p.a
OLD_FILES+=usr/lib/libmemstat_p.a
OLD_FILES+=usr/lib/libmenu_p.a
OLD_FILES+=usr/lib/libmenuw_p.a
OLD_FILES+=usr/lib/libmilter_p.a
OLD_FILES+=usr/lib/libmp_p.a
OLD_FILES+=usr/lib/libncurses_p.a
OLD_FILES+=usr/lib/libncursesw_p.a
OLD_FILES+=usr/lib/libnetgraph_p.a
OLD_FILES+=usr/lib/libngatm_p.a
OLD_FILES+=usr/lib/libopie_p.a
OLD_FILES+=usr/lib/libpanel_p.a
OLD_FILES+=usr/lib/libpanelw_p.a
OLD_FILES+=usr/lib/libpcap_p.a
OLD_FILES+=usr/lib/libpmc_p.a
OLD_FILES+=usr/lib/libpthread_p.a
OLD_FILES+=usr/lib/libradius_p.a
OLD_FILES+=usr/lib/libreadline_p.a
OLD_FILES+=usr/lib/libroken_p.a
OLD_FILES+=usr/lib/librpcsvc_p.a
OLD_FILES+=usr/lib/librt_p.a
OLD_FILES+=usr/lib/libsbuf_p.a
OLD_FILES+=usr/lib/libsdp_p.a
OLD_FILES+=usr/lib/libsmb_p.a
OLD_FILES+=usr/lib/libssl_p.a
OLD_FILES+=usr/lib/libstdc++_p.a
OLD_FILES+=usr/lib/libsupc++_p.a
OLD_FILES+=usr/lib/libtacplus_p.a
OLD_FILES+=usr/lib/libtermcap_p.a
OLD_FILES+=usr/lib/libtermcapw_p.a
OLD_FILES+=usr/lib/libtermlib_p.a
OLD_FILES+=usr/lib/libtermlibw_p.a
OLD_FILES+=usr/lib/libthr_p.a
OLD_FILES+=usr/lib/libthread_db_p.a
OLD_FILES+=usr/lib/libtinfo_p.a
OLD_FILES+=usr/lib/libtinfow_p.a
OLD_FILES+=usr/lib/libufs_p.a
OLD_FILES+=usr/lib/libugidfw_p.a
OLD_FILES+=usr/lib/libusbhid_p.a
OLD_FILES+=usr/lib/libutil_p.a
OLD_FILES+=usr/lib/libvgl_p.a
OLD_FILES+=usr/lib/libwind_p.a
OLD_FILES+=usr/lib/libwrap_p.a
OLD_FILES+=usr/lib/liby_p.a
OLD_FILES+=usr/lib/libypclnt_p.a
OLD_FILES+=usr/lib/libz_p.a
OLD_FILES+=usr/lib/private/libldns_p.a
OLD_FILES+=usr/lib/private/libssh_p.a
.endif
.if ${MK_RCMDS} == no
OLD_FILES+=bin/rcp
OLD_FILES+=etc/periodic/daily/140.clean-rwho
OLD_FILES+=etc/periodic/daily/430.status-rwho
OLD_FILES+=rescue/rcp
OLD_FILES+=usr/bin/rlogin
OLD_FILES+=usr/bin/rsh
OLD_FILES+=usr/bin/ruptime
OLD_FILES+=usr/bin/rwho
OLD_FILES+=usr/libexec/rlogind
OLD_FILES+=usr/libexec/rshd
OLD_FILES+=usr/sbin/rwhod
OLD_FILES+=usr/share/man/man1/rcp.1.gz
OLD_FILES+=usr/share/man/man1/rlogin.1.gz
OLD_FILES+=usr/share/man/man1/rsh.1.gz
OLD_FILES+=usr/share/man/man1/ruptime.1.gz
OLD_FILES+=usr/share/man/man1/rwho.1.gz
OLD_FILES+=usr/share/man/man8/rlogind.8.gz
OLD_FILES+=usr/share/man/man8/rshd.8.gz
OLD_FILES+=usr/share/man/man8/rwhod.8.gz
.endif
.if ${MK_RCS} == no
OLD_FILES+=usr/bin/ci
OLD_FILES+=usr/bin/co
OLD_FILES+=usr/bin/ident
OLD_FILES+=usr/bin/merge
OLD_FILES+=usr/bin/rcs
OLD_FILES+=usr/bin/rcsclean
OLD_FILES+=usr/bin/rcsdiff
OLD_FILES+=usr/bin/rcsfreeze
OLD_FILES+=usr/bin/rcsmerge
OLD_FILES+=usr/bin/rlog
OLD_FILES+=usr/share/man/man1/ci.1.gz
OLD_FILES+=usr/share/man/man1/co.1.gz
OLD_FILES+=usr/share/man/man1/ident.1.gz
OLD_FILES+=usr/share/man/man1/merge.1.gz
OLD_FILES+=usr/share/man/man1/rcs.1.gz
OLD_FILES+=usr/share/man/man1/rcsclean.1.gz
OLD_FILES+=usr/share/man/man1/rcsdiff.1.gz
OLD_FILES+=usr/share/man/man1/rcsfreeze.1.gz
OLD_FILES+=usr/share/man/man1/rcsintro.1.gz
OLD_FILES+=usr/share/man/man1/rcsmerge.1.gz
OLD_FILES+=usr/share/man/man1/rlog.1.gz
OLD_FILES+=usr/share/man/man5/rcsfile.5.gz
.endif
#.if ${MK_RESCUE} == no
# to be filled in or replaced with a special target
#.endif
.if ${MK_ROUTED} == no
OLD_FILES+=sbin/routed
OLD_FILES+=sbin/rtquery
OLD_FILES+=usr/share/man/man8/routed.8.gz
OLD_FILES+=usr/share/man/man8/rtquery.8.gz
.endif
.if ${MK_SENDMAIL} == no
OLD_FILES+=etc/periodic/daily/150.clean-hoststat
OLD_FILES+=etc/periodic/daily/440.status-mailq
OLD_FILES+=etc/periodic/daily/460.status-mail-rejects
OLD_FILES+=etc/periodic/daily/500.queuerun
.if ${MK_MAILWRAPPER} == no
OLD_FILES+=bin/rmail
.endif
OLD_FILES+=usr/bin/vacation
OLD_FILES+=usr/include/libmilter/mfapi.h
OLD_FILES+=usr/include/libmilter/mfdef.h
OLD_DIRS+=usr/include/libmilter
OLD_FILES+=usr/lib/libmilter.a
OLD_FILES+=usr/lib/libmilter.so
OLD_LIBS+=usr/lib/libmilter.so.5
OLD_FILES+=usr/lib/libmilter_p.a
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/libmilter.a
OLD_FILES+=usr/lib32/libmilter.so
OLD_LIBS+=usr/lib32/libmilter.so.5
OLD_FILES+=usr/lib32/libmilter_p.a
.endif
OLD_FILES+=usr/libexec/mail.local
OLD_FILES+=usr/libexec/sendmail/sendmail
OLD_FILES+=usr/libexec/smrsh
OLD_FILES+=usr/sbin/editmap
OLD_FILES+=usr/sbin/mailstats
OLD_FILES+=usr/sbin/makemap
OLD_FILES+=usr/sbin/praliases
OLD_FILES+=usr/share/doc/smm/08.sendmailop/paper.ascii.gz
OLD_DIRS+=usr/share/doc/smm/08.sendmailop
OLD_FILES+=usr/share/man/man1/mailq.1.gz
OLD_FILES+=usr/share/man/man1/newaliases.1.gz
OLD_FILES+=usr/share/man/man1/vacation.1.gz
OLD_FILES+=usr/share/man/man5/aliases.5.gz
OLD_FILES+=usr/share/man/man8/editmap.8.gz
OLD_FILES+=usr/share/man/man8/hoststat.8.gz
OLD_FILES+=usr/share/man/man8/mail.local.8.gz
OLD_FILES+=usr/share/man/man8/mailstats.8.gz
OLD_FILES+=usr/share/man/man8/makemap.8.gz
OLD_FILES+=usr/share/man/man8/praliases.8.gz
OLD_FILES+=usr/share/man/man8/purgestat.8.gz
OLD_FILES+=usr/share/man/man8/rmail.8.gz
OLD_FILES+=usr/share/man/man8/sendmail.8.gz
OLD_FILES+=usr/share/man/man8/smrsh.8.gz
OLD_FILES+=usr/share/sendmail/cf/README
OLD_FILES+=usr/share/sendmail/cf/cf/Makefile
OLD_FILES+=usr/share/sendmail/cf/cf/README
OLD_FILES+=usr/share/sendmail/cf/cf/chez.cs.mc
OLD_FILES+=usr/share/sendmail/cf/cf/clientproto.mc
OLD_FILES+=usr/share/sendmail/cf/cf/cs-hpux10.mc
OLD_FILES+=usr/share/sendmail/cf/cf/cs-hpux9.mc
OLD_FILES+=usr/share/sendmail/cf/cf/cs-osf1.mc
OLD_FILES+=usr/share/sendmail/cf/cf/cs-solaris2.mc
OLD_FILES+=usr/share/sendmail/cf/cf/cs-sunos4.1.mc
OLD_FILES+=usr/share/sendmail/cf/cf/cs-ultrix4.mc
OLD_FILES+=usr/share/sendmail/cf/cf/cyrusproto.mc
OLD_FILES+=usr/share/sendmail/cf/cf/generic-bsd4.4.mc
OLD_FILES+=usr/share/sendmail/cf/cf/generic-hpux10.mc
OLD_FILES+=usr/share/sendmail/cf/cf/generic-hpux9.mc
OLD_FILES+=usr/share/sendmail/cf/cf/generic-linux.mc
OLD_FILES+=usr/share/sendmail/cf/cf/generic-mpeix.mc
OLD_FILES+=usr/share/sendmail/cf/cf/generic-nextstep3.3.mc
OLD_FILES+=usr/share/sendmail/cf/cf/generic-osf1.mc
OLD_FILES+=usr/share/sendmail/cf/cf/generic-solaris.mc
OLD_FILES+=usr/share/sendmail/cf/cf/generic-sunos4.1.mc
OLD_FILES+=usr/share/sendmail/cf/cf/generic-ultrix4.mc
OLD_FILES+=usr/share/sendmail/cf/cf/huginn.cs.mc
OLD_FILES+=usr/share/sendmail/cf/cf/knecht.mc
OLD_FILES+=usr/share/sendmail/cf/cf/mail.cs.mc
OLD_FILES+=usr/share/sendmail/cf/cf/mail.eecs.mc
OLD_FILES+=usr/share/sendmail/cf/cf/mailspool.cs.mc
OLD_FILES+=usr/share/sendmail/cf/cf/python.cs.mc
OLD_FILES+=usr/share/sendmail/cf/cf/s2k-osf1.mc
OLD_FILES+=usr/share/sendmail/cf/cf/s2k-ultrix4.mc
OLD_FILES+=usr/share/sendmail/cf/cf/submit.cf
OLD_FILES+=usr/share/sendmail/cf/cf/submit.mc
OLD_FILES+=usr/share/sendmail/cf/cf/tcpproto.mc
OLD_FILES+=usr/share/sendmail/cf/cf/ucbarpa.mc
OLD_FILES+=usr/share/sendmail/cf/cf/ucbvax.mc
OLD_FILES+=usr/share/sendmail/cf/cf/uucpproto.mc
OLD_FILES+=usr/share/sendmail/cf/cf/vangogh.cs.mc
OLD_DIRS+=usr/share/sendmail/cf/cf
OLD_FILES+=usr/share/sendmail/cf/domain/Berkeley.EDU.m4
OLD_FILES+=usr/share/sendmail/cf/domain/CS.Berkeley.EDU.m4
OLD_FILES+=usr/share/sendmail/cf/domain/EECS.Berkeley.EDU.m4
OLD_FILES+=usr/share/sendmail/cf/domain/S2K.Berkeley.EDU.m4
OLD_FILES+=usr/share/sendmail/cf/domain/berkeley-only.m4
OLD_FILES+=usr/share/sendmail/cf/domain/generic.m4
OLD_DIRS+=usr/share/sendmail/cf/domain
OLD_FILES+=usr/share/sendmail/cf/feature/accept_unqualified_senders.m4
OLD_FILES+=usr/share/sendmail/cf/feature/accept_unresolvable_domains.m4
OLD_FILES+=usr/share/sendmail/cf/feature/access_db.m4
OLD_FILES+=usr/share/sendmail/cf/feature/allmasquerade.m4
OLD_FILES+=usr/share/sendmail/cf/feature/always_add_domain.m4
OLD_FILES+=usr/share/sendmail/cf/feature/authinfo.m4
OLD_FILES+=usr/share/sendmail/cf/feature/badmx.m4
OLD_FILES+=usr/share/sendmail/cf/feature/bestmx_is_local.m4
OLD_FILES+=usr/share/sendmail/cf/feature/bitdomain.m4
OLD_FILES+=usr/share/sendmail/cf/feature/blacklist_recipients.m4
OLD_FILES+=usr/share/sendmail/cf/feature/block_bad_helo.m4
OLD_FILES+=usr/share/sendmail/cf/feature/compat_check.m4
OLD_FILES+=usr/share/sendmail/cf/feature/conncontrol.m4
OLD_FILES+=usr/share/sendmail/cf/feature/delay_checks.m4
OLD_FILES+=usr/share/sendmail/cf/feature/dnsbl.m4
OLD_FILES+=usr/share/sendmail/cf/feature/domaintable.m4
OLD_FILES+=usr/share/sendmail/cf/feature/enhdnsbl.m4
OLD_FILES+=usr/share/sendmail/cf/feature/generics_entire_domain.m4
OLD_FILES+=usr/share/sendmail/cf/feature/genericstable.m4
OLD_FILES+=usr/share/sendmail/cf/feature/greet_pause.m4
OLD_FILES+=usr/share/sendmail/cf/feature/ldap_routing.m4
OLD_FILES+=usr/share/sendmail/cf/feature/limited_masquerade.m4
OLD_FILES+=usr/share/sendmail/cf/feature/local_lmtp.m4
OLD_FILES+=usr/share/sendmail/cf/feature/local_no_masquerade.m4
OLD_FILES+=usr/share/sendmail/cf/feature/local_procmail.m4
OLD_FILES+=usr/share/sendmail/cf/feature/lookupdotdomain.m4
OLD_FILES+=usr/share/sendmail/cf/feature/loose_relay_check.m4
OLD_FILES+=usr/share/sendmail/cf/feature/mailertable.m4
OLD_FILES+=usr/share/sendmail/cf/feature/masquerade_entire_domain.m4
OLD_FILES+=usr/share/sendmail/cf/feature/masquerade_envelope.m4
OLD_FILES+=usr/share/sendmail/cf/feature/msp.m4
OLD_FILES+=usr/share/sendmail/cf/feature/mtamark.m4
OLD_FILES+=usr/share/sendmail/cf/feature/no_default_msa.m4
OLD_FILES+=usr/share/sendmail/cf/feature/nocanonify.m4
OLD_FILES+=usr/share/sendmail/cf/feature/notsticky.m4
OLD_FILES+=usr/share/sendmail/cf/feature/nouucp.m4
OLD_FILES+=usr/share/sendmail/cf/feature/nullclient.m4
OLD_FILES+=usr/share/sendmail/cf/feature/preserve_local_plus_detail.m4
OLD_FILES+=usr/share/sendmail/cf/feature/preserve_luser_host.m4
OLD_FILES+=usr/share/sendmail/cf/feature/promiscuous_relay.m4
OLD_FILES+=usr/share/sendmail/cf/feature/queuegroup.m4
OLD_FILES+=usr/share/sendmail/cf/feature/ratecontrol.m4
OLD_FILES+=usr/share/sendmail/cf/feature/redirect.m4
OLD_FILES+=usr/share/sendmail/cf/feature/relay_based_on_MX.m4
OLD_FILES+=usr/share/sendmail/cf/feature/relay_entire_domain.m4
OLD_FILES+=usr/share/sendmail/cf/feature/relay_hosts_only.m4
OLD_FILES+=usr/share/sendmail/cf/feature/relay_local_from.m4
OLD_FILES+=usr/share/sendmail/cf/feature/relay_mail_from.m4
OLD_FILES+=usr/share/sendmail/cf/feature/require_rdns.m4
OLD_FILES+=usr/share/sendmail/cf/feature/smrsh.m4
OLD_FILES+=usr/share/sendmail/cf/feature/stickyhost.m4
OLD_FILES+=usr/share/sendmail/cf/feature/use_client_ptr.m4
OLD_FILES+=usr/share/sendmail/cf/feature/use_ct_file.m4
OLD_FILES+=usr/share/sendmail/cf/feature/use_cw_file.m4
OLD_FILES+=usr/share/sendmail/cf/feature/uucpdomain.m4
OLD_FILES+=usr/share/sendmail/cf/feature/virtuser_entire_domain.m4
OLD_FILES+=usr/share/sendmail/cf/feature/virtusertable.m4
OLD_DIRS+=usr/share/sendmail/cf/feature
OLD_FILES+=usr/share/sendmail/cf/hack/cssubdomain.m4
OLD_DIRS+=usr/share/sendmail/cf/hack
OLD_FILES+=usr/share/sendmail/cf/m4/cf.m4
OLD_FILES+=usr/share/sendmail/cf/m4/cfhead.m4
OLD_FILES+=usr/share/sendmail/cf/m4/proto.m4
OLD_FILES+=usr/share/sendmail/cf/m4/version.m4
OLD_DIRS+=usr/share/sendmail/cf/m4
OLD_FILES+=usr/share/sendmail/cf/mailer/cyrus.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/cyrusv2.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/fax.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/local.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/mail11.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/phquery.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/pop.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/procmail.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/qpage.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/smtp.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/usenet.m4
OLD_FILES+=usr/share/sendmail/cf/mailer/uucp.m4
OLD_DIRS+=usr/share/sendmail/cf/mailer
OLD_FILES+=usr/share/sendmail/cf/ostype/a-ux.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/aix3.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/aix4.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/aix5.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/altos.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/amdahl-uts.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/bsd4.3.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/bsd4.4.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/bsdi.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/bsdi1.0.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/bsdi2.0.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/darwin.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/dgux.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/domainos.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/dragonfly.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/dynix3.2.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/freebsd4.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/freebsd5.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/freebsd6.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/gnu.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/hpux10.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/hpux11.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/hpux9.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/irix4.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/irix5.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/irix6.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/isc4.1.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/linux.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/maxion.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/mklinux.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/mpeix.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/nextstep.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/openbsd.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/osf1.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/powerux.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/ptx2.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/qnx.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/riscos4.5.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/sco-uw-2.1.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/sco3.2.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/sinix.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/solaris11.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/solaris2.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/solaris2.ml.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/solaris2.pre5.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/solaris8.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/sunos3.5.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/sunos4.1.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/svr4.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/ultrix4.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/unicos.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/unicosmk.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/unicosmp.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/unixware7.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/unknown.m4
OLD_FILES+=usr/share/sendmail/cf/ostype/uxpds.m4
OLD_DIRS+=usr/share/sendmail/cf/ostype
OLD_FILES+=usr/share/sendmail/cf/sendmail.schema
OLD_FILES+=usr/share/sendmail/cf/sh/makeinfo.sh
OLD_DIRS+=usr/share/sendmail/cf/sh
OLD_FILES+=usr/share/sendmail/cf/siteconfig/uucp.cogsci.m4
OLD_FILES+=usr/share/sendmail/cf/siteconfig/uucp.old.arpa.m4
OLD_FILES+=usr/share/sendmail/cf/siteconfig/uucp.ucbarpa.m4
OLD_FILES+=usr/share/sendmail/cf/siteconfig/uucp.ucbvax.m4
OLD_DIRS+=usr/share/sendmail/cf/siteconfig
OLD_DIRS+=usr/share/sendmail/cf
OLD_DIRS+=usr/share/sendmail
.endif
#.if ${MK_SHAREDOCS} == no
# to be filled in
#.endif
#.if ${MK_SYSCONS} == no
# to be filled in
#.endif
.if ${MK_TCSH} == no
OLD_FILES+=bin/csh
OLD_FILES+=bin/tcsh
OLD_FILES+=rescue/csh
OLD_FILES+=rescue/tcsh
OLD_FILES+=usr/share/examples/tcsh/complete.tcsh
OLD_FILES+=usr/share/examples/tcsh/csh-mode.el
OLD_DIRS+=usr/share/examples/tcsh
OLD_FILES+=usr/share/man/man1/csh.1.gz
OLD_FILES+=usr/share/man/man1/tcsh.1.gz
OLD_FILES+=usr/share/nls/de_AT.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/de_AT.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/de_AT.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/de_CH.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/de_CH.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/de_CH.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/de_DE.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/de_DE.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/de_DE.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/el_GR.ISO8859-7/tcsh.cat
OLD_FILES+=usr/share/nls/el_GR.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/es_ES.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/es_ES.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/es_ES.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/et_EE.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/et_EE.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/fi_FI.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/fi_FI.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/fi_FI.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/fr_BE.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/fr_BE.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/fr_BE.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/fr_CA.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/fr_CA.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/fr_CA.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/fr_CH.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/fr_CH.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/fr_CH.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/fr_FR.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/fr_FR.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/fr_FR.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/it_CH.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/it_CH.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/it_CH.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/it_IT.ISO8859-1/tcsh.cat
OLD_FILES+=usr/share/nls/it_IT.ISO8859-15/tcsh.cat
OLD_FILES+=usr/share/nls/it_IT.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/ja_JP.SJIS/tcsh.cat
OLD_FILES+=usr/share/nls/ja_JP.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/ja_JP.eucJP/tcsh.cat
OLD_FILES+=usr/share/nls/ru_RU.CP1251/tcsh.cat
OLD_FILES+=usr/share/nls/ru_RU.CP866/tcsh.cat
OLD_FILES+=usr/share/nls/ru_RU.ISO8859-5/tcsh.cat
OLD_FILES+=usr/share/nls/ru_RU.KOI8-R/tcsh.cat
OLD_FILES+=usr/share/nls/ru_RU.UTF-8/tcsh.cat
OLD_FILES+=usr/share/nls/uk_UA.ISO8859-5/tcsh.cat
OLD_FILES+=usr/share/nls/uk_UA.KOI8-U/tcsh.cat
OLD_FILES+=usr/share/nls/uk_UA.UTF-8/tcsh.cat
.endif
.if ${MK_TELNET} == no
OLD_FILES+=usr/bin/telnet
OLD_FILES+=usr/libexec/telnetd
OLD_FILES+=usr/share/man/man1/telnet.1.gz
OLD_FILES+=usr/share/man/man8/telnetd.8.gz
.endif
.if ${MK_TESTS} == yes
OLD_LIBS+=usr/lib/libatf-c++.so.1
OLD_FILES+=usr/tests/lib/atf/libatf-c/test_helpers_test
OLD_FILES+=usr/tests/lib/atf/test-programs/fork_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/application_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/detail/expand_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/detail/parser_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/detail/ui_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/env_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/exceptions_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/expand_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/fs_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/parser_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/process_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/sanity_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/text_test
OLD_FILES+=usr/tests/lib/atf/libatf-c++/ui_test
OLD_FILES+=usr/tests/lib/atf/libatf-c/dynstr_test
OLD_FILES+=usr/tests/lib/atf/libatf-c/env_test
OLD_FILES+=usr/tests/lib/atf/libatf-c/fs_test
OLD_FILES+=usr/tests/lib/atf/libatf-c/list_test
OLD_FILES+=usr/tests/lib/atf/libatf-c/map_test
OLD_FILES+=usr/tests/lib/atf/libatf-c/process_helpers
OLD_FILES+=usr/tests/lib/atf/libatf-c/process_test
OLD_FILES+=usr/tests/lib/atf/libatf-c/sanity_test
OLD_FILES+=usr/tests/lib/atf/libatf-c/text_test
OLD_FILES+=usr/tests/lib/atf/libatf-c/user_test
.if ${MK_MAKE} == yes && ${MK_BMAKE} == yes
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.status.3
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.status.4
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.status.5
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.status.6
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.status.7
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stderr.3
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stderr.4
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stderr.5
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stderr.6
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stderr.7
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stdout.3
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stdout.4
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stdout.5
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stdout.6
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/expected.stdout.7
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd/libtest.a
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.status.3
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.status.4
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.status.5
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.status.6
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.status.7
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stderr.3
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stderr.4
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stderr.5
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stderr.6
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stderr.7
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stdout.3
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stdout.4
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stdout.5
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stdout.6
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/expected.stdout.7
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod/libtest.a
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.status.3
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.status.4
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.status.5
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.status.6
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.status.7
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stderr.3
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stderr.4
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stderr.5
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stderr.6
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stderr.7
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stdout.3
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stdout.4
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stdout.5
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stdout.6
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/expected.stdout.7
OLD_FILES+=usr/tests/usr.bin/make/archives/fmt_oldbsd/libtest.a
OLD_FILES+=usr/tests/usr.bin/make/archives/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/basic/t0/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/basic/t0/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/basic/t0/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t0/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t0/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t1/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/basic/t1/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/basic/t1/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/basic/t1/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t1/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t1/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t2/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/basic/t2/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/basic/t2/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/basic/t2/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t2/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t2/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t3/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/basic/t3/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/basic/t3/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t3/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/basic/t3/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/basic/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/execution/ellipsis/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/execution/ellipsis/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/execution/ellipsis/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/execution/ellipsis/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/execution/ellipsis/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/execution/ellipsis/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/execution/empty/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/execution/empty/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/execution/empty/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/execution/empty/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/execution/empty/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/execution/empty/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/execution/joberr/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/execution/joberr/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/execution/joberr/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/execution/joberr/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/execution/joberr/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/execution/joberr/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/execution/plus/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/execution/plus/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/execution/plus/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/execution/plus/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/execution/plus/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/execution/plus/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/execution/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/shell/builtin/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/shell/builtin/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/shell/builtin/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/shell/builtin/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/shell/builtin/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/shell/builtin/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/shell/builtin/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/shell/builtin/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/shell/builtin/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/shell/builtin/sh
OLD_FILES+=usr/tests/usr.bin/make/shell/meta/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/shell/meta/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/shell/meta/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/shell/meta/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/shell/meta/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/shell/meta/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/shell/meta/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/shell/meta/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/shell/meta/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/shell/meta/sh
OLD_FILES+=usr/tests/usr.bin/make/shell/path/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/shell/path/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/shell/path/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/shell/path/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/shell/path/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/shell/path/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/shell/path/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/shell/path/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/shell/path/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/shell/path/sh
OLD_FILES+=usr/tests/usr.bin/make/shell/path_select/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/shell/path_select/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/shell/path_select/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/shell/path_select/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/shell/path_select/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/shell/path_select/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/shell/path_select/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/shell/path_select/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/shell/path_select/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/shell/path_select/shell
OLD_FILES+=usr/tests/usr.bin/make/shell/replace/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/shell/replace/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/shell/replace/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/shell/replace/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/shell/replace/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/shell/replace/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/shell/replace/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/shell/replace/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/shell/replace/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/shell/replace/shell
OLD_FILES+=usr/tests/usr.bin/make/shell/select/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/shell/select/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/shell/select/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/shell/select/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/shell/select/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/shell/select/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/shell/select/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/shell/select/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/shell/select/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/shell/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/suffixes/basic/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/suffixes/basic/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/suffixes/basic/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/suffixes/basic/TEST1.a
OLD_FILES+=usr/tests/usr.bin/make/suffixes/basic/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/suffixes/basic/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/suffixes/basic/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild1/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild1/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild1/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild1/TEST1.a
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild1/TEST2.a
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild1/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild1/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild1/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild2/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild2/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild2/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild2/TEST1.a
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild2/TEST2.a
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild2/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild2/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/suffixes/src_wild2/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/suffixes/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/syntax/directive-t0/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/syntax/directive-t0/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/syntax/directive-t0/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/syntax/directive-t0/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/directive-t0/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/directive-t0/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.status.3
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.status.4
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.status.5
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.stderr.3
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.stderr.4
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.stderr.5
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.stdout.3
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.stdout.4
OLD_FILES+=usr/tests/usr.bin/make/syntax/enl/expected.stdout.5
OLD_FILES+=usr/tests/usr.bin/make/syntax/funny-targets/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/syntax/funny-targets/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/syntax/funny-targets/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/syntax/funny-targets/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/funny-targets/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/syntax/funny-targets/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/funny-targets/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/syntax/funny-targets/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/funny-targets/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/syntax/semi/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/syntax/semi/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/syntax/semi/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/syntax/semi/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/semi/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/syntax/semi/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/semi/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/syntax/semi/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/syntax/semi/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/syntax/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t0/2/1/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t0/2/1/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t0/2/1/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t0/2/1/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t0/2/1/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t0/2/1/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t0/2/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t0/mk/sys.mk
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t0/mk/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t0/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t1/2/1/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t1/2/1/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t1/2/1/cleanup
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t1/2/1/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t1/2/1/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t1/2/1/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t1/2/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t1/mk/sys.mk
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t1/mk/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t1/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t2/2/1/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t2/2/1/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t2/2/1/cleanup
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t2/2/1/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t2/2/1/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t2/2/1/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t2/2/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t2/mk/sys.mk
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t2/mk/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/t2/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/sysmk/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_M/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_M/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_M/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_M/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_M/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_M/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/expected.status.3
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/expected.stderr.3
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/variables/modifier_t/expected.stdout.3
OLD_FILES+=usr/tests/usr.bin/make/variables/opt_V/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/variables/opt_V/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/variables/opt_V/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/variables/opt_V/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/variables/opt_V/expected.status.2
OLD_FILES+=usr/tests/usr.bin/make/variables/opt_V/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/variables/opt_V/expected.stderr.2
OLD_FILES+=usr/tests/usr.bin/make/variables/opt_V/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/variables/opt_V/expected.stdout.2
OLD_FILES+=usr/tests/usr.bin/make/variables/t0/legacy_test
OLD_FILES+=usr/tests/usr.bin/make/variables/t0/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/variables/t0/Makefile.test
OLD_FILES+=usr/tests/usr.bin/make/variables/t0/expected.status.1
OLD_FILES+=usr/tests/usr.bin/make/variables/t0/expected.stderr.1
OLD_FILES+=usr/tests/usr.bin/make/variables/t0/expected.stdout.1
OLD_FILES+=usr/tests/usr.bin/make/variables/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/Kyuafile
OLD_FILES+=usr/tests/usr.bin/make/common.sh
OLD_FILES+=usr/tests/usr.bin/make/test-new.mk
OLD_DIRS+=usr/tests/usr.bin/make/variables/t0
OLD_DIRS+=usr/tests/usr.bin/make/variables/opt_V
OLD_DIRS+=usr/tests/usr.bin/make/variables/modifier_t
OLD_DIRS+=usr/tests/usr.bin/make/variables/modifier_M
OLD_DIRS+=usr/tests/usr.bin/make/variables
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t2/mk
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t2/2/1
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t2/2
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t2
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t1/mk
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t1/2/1
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t1/2
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t1
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t0/mk
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t0/2/1
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t0/2
OLD_DIRS+=usr/tests/usr.bin/make/sysmk/t0
OLD_DIRS+=usr/tests/usr.bin/make/sysmk
OLD_DIRS+=usr/tests/usr.bin/make/syntax/semi
OLD_DIRS+=usr/tests/usr.bin/make/syntax/funny-targets
OLD_DIRS+=usr/tests/usr.bin/make/syntax/enl
OLD_DIRS+=usr/tests/usr.bin/make/syntax/directive-t0
OLD_DIRS+=usr/tests/usr.bin/make/syntax
OLD_DIRS+=usr/tests/usr.bin/make/suffixes/src_wild2
OLD_DIRS+=usr/tests/usr.bin/make/suffixes/src_wild1
OLD_DIRS+=usr/tests/usr.bin/make/suffixes/basic
OLD_DIRS+=usr/tests/usr.bin/make/suffixes
OLD_DIRS+=usr/tests/usr.bin/make/shell/select
OLD_DIRS+=usr/tests/usr.bin/make/shell/replace
OLD_DIRS+=usr/tests/usr.bin/make/shell/path_select
OLD_DIRS+=usr/tests/usr.bin/make/shell/path
OLD_DIRS+=usr/tests/usr.bin/make/shell/meta
OLD_DIRS+=usr/tests/usr.bin/make/shell/builtin
OLD_DIRS+=usr/tests/usr.bin/make/shell
OLD_DIRS+=usr/tests/usr.bin/make/execution/plus
OLD_DIRS+=usr/tests/usr.bin/make/execution/joberr
OLD_DIRS+=usr/tests/usr.bin/make/execution/empty
OLD_DIRS+=usr/tests/usr.bin/make/execution/ellipsis
OLD_DIRS+=usr/tests/usr.bin/make/execution
OLD_DIRS+=usr/tests/usr.bin/make/basic/t3
OLD_DIRS+=usr/tests/usr.bin/make/basic/t2
OLD_DIRS+=usr/tests/usr.bin/make/basic/t1
OLD_DIRS+=usr/tests/usr.bin/make/basic/t0
OLD_DIRS+=usr/tests/usr.bin/make/basic
OLD_DIRS+=usr/tests/usr.bin/make/archives/fmt_oldbsd
OLD_DIRS+=usr/tests/usr.bin/make/archives/fmt_44bsd_mod
OLD_DIRS+=usr/tests/usr.bin/make/archives/fmt_44bsd
OLD_DIRS+=usr/tests/usr.bin/make/archives
OLD_DIRS+=usr/tests/usr.bin/make
.endif
.else
# ATF libraries.
OLD_FILES+=usr/bin/atf-sh
OLD_DIRS+=usr/include/atf-c
OLD_FILES+=usr/include/atf-c/build.h
OLD_FILES+=usr/include/atf-c/check.h
OLD_FILES+=usr/include/atf-c/config.h
OLD_FILES+=usr/include/atf-c/defs.h
OLD_FILES+=usr/include/atf-c/error.h
OLD_FILES+=usr/include/atf-c/error_fwd.h
OLD_FILES+=usr/include/atf-c/macros.h
OLD_FILES+=usr/include/atf-c/tc.h
OLD_FILES+=usr/include/atf-c/tp.h
OLD_FILES+=usr/include/atf-c/utils.h
OLD_FILES+=usr/include/atf-c.h
OLD_DIRS+=usr/include/atf-c++
OLD_FILES+=usr/include/atf-c++/build.hpp
OLD_FILES+=usr/include/atf-c++/check.hpp
OLD_FILES+=usr/include/atf-c++/config.hpp
OLD_FILES+=usr/include/atf-c++/macros.hpp
OLD_FILES+=usr/include/atf-c++/tests.hpp
OLD_FILES+=usr/include/atf-c++/utils.hpp
OLD_FILES+=usr/include/atf-c++.hpp
OLD_FILES+=usr/lib/libatf-c_p.a
OLD_FILES+=usr/lib/libatf-c.so.1
OLD_FILES+=usr/lib/libatf-c.so
OLD_FILES+=usr/lib/libatf-c++.a
OLD_FILES+=usr/lib/libatf-c++_p.a
OLD_FILES+=usr/lib/libatf-c++.so.1
OLD_FILES+=usr/lib/libatf-c++.so
OLD_FILES+=usr/lib/libatf-c.a
OLD_FILES+=usr/libexec/atf-check
OLD_DIRS+=usr/share/atf
OLD_FILES+=usr/share/atf/libatf-sh.subr
OLD_DIRS+=usr/share/doc/atf
OLD_FILES+=usr/share/doc/atf/AUTHORS
OLD_FILES+=usr/share/doc/atf/COPYING
OLD_FILES+=usr/share/doc/atf/NEWS
OLD_FILES+=usr/share/doc/atf/README
OLD_FILES+=usr/share/man/man1/atf-check.1.gz
OLD_FILES+=usr/share/man/man1/atf-sh.1.gz
OLD_FILES+=usr/share/man/man1/atf-test-program.1.gz
OLD_FILES+=usr/share/man/man3/atf-c-api.3.gz
OLD_FILES+=usr/share/man/man3/atf-c++-api.3.gz
OLD_FILES+=usr/share/man/man3/atf-sh-api.3.gz
OLD_FILES+=usr/share/man/man4/atf-test-case.4.gz
OLD_FILES+=usr/share/mk/atf.test.mk
# Test suite.
. if(exists(${DESTDIR}/usr/tests/))
TESTS_DIRS!=find ${DESTDIR}/usr/tests -type d | sed -e 's,^${DESTDIR}/,,'; echo
OLD_DIRS+=${TESTS_DIRS}
TESTS_FILES!=find ${DESTDIR}/usr/tests \! -type d | sed -e 's,^${DESTDIR}/,,'; echo
OLD_FILES+=${TESTS_FILES}
. endif
.endif # Test suite.
#.if ${MK_TOOLCHAIN} == no
# to be filled in
#.endif
.if ${MK_UNBOUND} == no
OLD_FILES+=etc/rc.d/local_unbound
OLD_FILES+=usr/lib/private/libunbound.a
OLD_FILES+=usr/lib/private/libunbound.so
OLD_LIBS+=usr/lib/private/libunbound.so.5
OLD_FILES+=usr/lib/private/libunbound_p.a
.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
OLD_FILES+=usr/lib32/private/libunbound.a
OLD_FILES+=usr/lib32/private/libunbound.so
OLD_LIBS+=usr/lib32/private/libunbound.so.5
OLD_FILES+=usr/lib32/private/libunbound_p.a
.endif
OLD_FILES+=usr/sbin/local-unbound-setup
OLD_FILES+=usr/sbin/unbound
OLD_FILES+=usr/sbin/unbound-anchor
OLD_FILES+=usr/sbin/unbound-checkconf
OLD_FILES+=usr/sbin/unbound-control
OLD_FILES+=usr/sbin/unbound-control-setup
.endif
#.if ${MK_USB} == no
# to be filled in
#.endif
.if ${MK_UTMPX} == no
OLD_FILES+=etc/periodic/monthly/200.accounting
OLD_FILES+=usr/bin/last
OLD_FILES+=usr/bin/users
OLD_FILES+=usr/bin/who
OLD_FILES+=usr/sbin/ac
OLD_FILES+=usr/sbin/lastlogin
OLD_FILES+=usr/sbin/utx
OLD_FILES+=usr/share/man/man1/last.1.gz
OLD_FILES+=usr/share/man/man1/users.1.gz
OLD_FILES+=usr/share/man/man1/who.1.gz
OLD_FILES+=usr/share/man/man8/ac.8.gz
OLD_FILES+=usr/share/man/man8/lastlogin.8.gz
OLD_FILES+=usr/share/man/man8/utx.8.gz
.endif
.if ${MK_WIRELESS} == no
OLD_FILES+=etc/regdomain.xml
OLD_FILES+=usr/sbin/ancontrol
OLD_FILES+=usr/sbin/hostapd
OLD_FILES+=usr/sbin/hostapd_cli
OLD_FILES+=usr/sbin/ndis_events
OLD_FILES+=usr/sbin/wlandebug
.if ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/sbin/wlconfig
.endif
OLD_FILES+=usr/sbin/wpa_cli
OLD_FILES+=usr/sbin/wpa_passphrase
OLD_FILES+=usr/sbin/wpa_supplicant
OLD_FILES+=usr/share/examples/etc/regdomain.xml
OLD_FILES+=usr/share/examples/etc/wpa_supplicant.conf
OLD_FILES+=usr/share/examples/hostapd/hostapd.conf
OLD_FILES+=usr/share/examples/hostapd/hostapd.eap_user
OLD_FILES+=usr/share/examples/hostapd/hostapd.wpa_psk
OLD_DIRS+=usr/share/examples/hostapd
OLD_FILES+=usr/share/man/man5/hostapd.conf.5.gz
OLD_FILES+=usr/share/man/man5/wpa_supplicant.conf.5.gz
OLD_FILES+=usr/share/man/man8/ancontrol.8.gz
OLD_FILES+=usr/share/man/man8/hostapd.8.gz
OLD_FILES+=usr/share/man/man8/hostapd_cli.8.gz
.if ${TARGET_ARCH} == "i386"
OLD_FILES+=usr/share/man/man8/i386/wlconfig.8.gz
.endif
OLD_FILES+=usr/share/man/man8/ndis_events.8.gz
OLD_FILES+=usr/share/man/man8/wlandebug.8.gz
OLD_FILES+=usr/share/man/man8/wpa_cli.8.gz
OLD_FILES+=usr/share/man/man8/wpa_passphrase.8.gz
OLD_FILES+=usr/share/man/man8/wpa_supplicant.8.gz
.endif
.if ${MK_SVNLITE} == no || ${MK_SVN} == yes
OLD_FILES+=usr/bin/svnlite
OLD_FILES+=usr/bin/svnliteadmin
OLD_FILES+=usr/bin/svnlitedumpfilter
OLD_FILES+=usr/bin/svnlitelook
OLD_FILES+=usr/bin/svnlitemucc
OLD_FILES+=usr/bin/svnliterdump
OLD_FILES+=usr/bin/svnliteserve
OLD_FILES+=usr/bin/svnlitesync
OLD_FILES+=usr/bin/svnliteversion
.endif
.if ${MK_SVN} == no
OLD_FILES+=usr/bin/svn
OLD_FILES+=usr/bin/svnadmin
OLD_FILES+=usr/bin/svndumpfilter
OLD_FILES+=usr/bin/svnlook
OLD_FILES+=usr/bin/svnmucc
OLD_FILES+=usr/bin/svnrdump
OLD_FILES+=usr/bin/svnserve
OLD_FILES+=usr/bin/svnsync
OLD_FILES+=usr/bin/svnversion
.endif
.if ${MK_DMAGENT} == no
OLD_FILES+=usr/libexec/dma
OLD_FILES+=usr/libexec/dma-mbox-create
OLD_FILES+=usr/share/man/man8/dma.8.gz
OLD_FILES+=usr/share/examples/dma/mailer.conf
.endif

File Metadata

Mime Type
application/octet-stream
Expires
Fri, Oct 17, 6:59 AM (1 d, 23 h)
Storage Engine
chunks
Storage Format
Chunks
Storage Handle
npk7CWhn6fHP
Default Alt Text
(6 MB)

Event Timeline