Index: stable/11/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c =================================================================== --- stable/11/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c (revision 326301) +++ stable/11/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c (revision 326302) @@ -1,1983 +1,1991 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #define ELF_TARGET_ALL #include #include #ifdef illumos #include #else #define P2ROUNDUP(x, align) (-(-(x) & -(align))) #endif #include #include #ifdef illumos #include #endif #include #include #include #include #include #include #ifdef illumos #include #else #include #include #include #include #endif #include #include #include #include #include #include #define ESHDR_NULL 0 #define ESHDR_SHSTRTAB 1 #define ESHDR_DOF 2 #define ESHDR_STRTAB 3 #define ESHDR_SYMTAB 4 #define ESHDR_REL 5 #define ESHDR_NUM 6 #define PWRITE_SCN(index, data) \ (lseek64(fd, (off64_t)elf_file.shdr[(index)].sh_offset, SEEK_SET) != \ (off64_t)elf_file.shdr[(index)].sh_offset || \ dt_write(dtp, fd, (data), elf_file.shdr[(index)].sh_size) != \ elf_file.shdr[(index)].sh_size) static const char DTRACE_SHSTRTAB32[] = "\0" ".shstrtab\0" /* 1 */ ".SUNW_dof\0" /* 11 */ ".strtab\0" /* 21 */ ".symtab\0" /* 29 */ #ifdef __sparc ".rela.SUNW_dof"; /* 37 */ #else ".rel.SUNW_dof"; /* 37 */ #endif static const char DTRACE_SHSTRTAB64[] = "\0" ".shstrtab\0" /* 1 */ ".SUNW_dof\0" /* 11 */ ".strtab\0" /* 21 */ ".symtab\0" /* 29 */ ".rela.SUNW_dof"; /* 37 */ static const char DOFSTR[] = "__SUNW_dof"; static const char DOFLAZYSTR[] = "___SUNW_dof"; typedef struct dt_link_pair { struct dt_link_pair *dlp_next; /* next pair in linked list */ void *dlp_str; /* buffer for string table */ void *dlp_sym; /* buffer for symbol table */ } dt_link_pair_t; typedef struct dof_elf32 { uint32_t de_nrel; /* relocation count */ #ifdef __sparc Elf32_Rela *de_rel; /* array of relocations for sparc */ #else Elf32_Rel *de_rel; /* array of relocations for x86 */ #endif uint32_t de_nsym; /* symbol count */ Elf32_Sym *de_sym; /* array of symbols */ uint32_t de_strlen; /* size of of string table */ char *de_strtab; /* string table */ uint32_t de_global; /* index of the first global symbol */ } dof_elf32_t; static int prepare_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf32_t *dep) { dof_sec_t *dofs, *s; dof_relohdr_t *dofrh; dof_relodesc_t *dofr; char *strtab; int i, j, nrel; size_t strtabsz = 1; uint32_t count = 0; size_t base; Elf32_Sym *sym; #ifdef __sparc Elf32_Rela *rel; #else Elf32_Rel *rel; #endif /*LINTED*/ dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff); /* * First compute the size of the string table and the number of * relocations present in the DOF. */ for (i = 0; i < dof->dofh_secnum; i++) { if (dofs[i].dofs_type != DOF_SECT_URELHDR) continue; /*LINTED*/ dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset); s = &dofs[dofrh->dofr_strtab]; strtab = (char *)dof + s->dofs_offset; assert(strtab[0] == '\0'); strtabsz += s->dofs_size - 1; s = &dofs[dofrh->dofr_relsec]; /*LINTED*/ dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset); count += s->dofs_size / s->dofs_entsize; } dep->de_strlen = strtabsz; dep->de_nrel = count; dep->de_nsym = count + 1; /* the first symbol is always null */ if (dtp->dt_lazyload) { dep->de_strlen += sizeof (DOFLAZYSTR); dep->de_nsym++; } else { dep->de_strlen += sizeof (DOFSTR); dep->de_nsym++; } if ((dep->de_rel = calloc(dep->de_nrel, sizeof (dep->de_rel[0]))) == NULL) { return (dt_set_errno(dtp, EDT_NOMEM)); } if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf32_Sym))) == NULL) { free(dep->de_rel); return (dt_set_errno(dtp, EDT_NOMEM)); } if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) { free(dep->de_rel); free(dep->de_sym); return (dt_set_errno(dtp, EDT_NOMEM)); } count = 0; strtabsz = 1; dep->de_strtab[0] = '\0'; rel = dep->de_rel; sym = dep->de_sym; dep->de_global = 1; /* * The first symbol table entry must be zeroed and is always ignored. */ bzero(sym, sizeof (Elf32_Sym)); sym++; /* * Take a second pass through the DOF sections filling in the * memory we allocated. */ for (i = 0; i < dof->dofh_secnum; i++) { if (dofs[i].dofs_type != DOF_SECT_URELHDR) continue; /*LINTED*/ dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset); s = &dofs[dofrh->dofr_strtab]; strtab = (char *)dof + s->dofs_offset; bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size); base = strtabsz; strtabsz += s->dofs_size - 1; s = &dofs[dofrh->dofr_relsec]; /*LINTED*/ dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset); nrel = s->dofs_size / s->dofs_entsize; s = &dofs[dofrh->dofr_tgtsec]; for (j = 0; j < nrel; j++) { #if defined(__aarch64__) /* XXX */ -printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + printf("%s:%s(%d): aarch64 not implemented\n", + __FUNCTION__, __FILE__, __LINE__); #elif defined(__arm__) /* XXX */ -printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + printf("%s:%s(%d): arm not implemented\n", + __FUNCTION__, __FILE__, __LINE__); #elif defined(__i386) || defined(__amd64) rel->r_offset = s->dofs_offset + dofr[j].dofr_offset; rel->r_info = ELF32_R_INFO(count + dep->de_global, R_386_32); #elif defined(__mips__) /* XXX */ -printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + printf("%s:%s(%d): MIPS not implemented\n", + __FUNCTION__, __FILE__, __LINE__); #elif defined(__powerpc__) /* * Add 4 bytes to hit the low half of this 64-bit * big-endian address. */ rel->r_offset = s->dofs_offset + dofr[j].dofr_offset + 4; rel->r_info = ELF32_R_INFO(count + dep->de_global, R_PPC_REL32); #elif defined(__riscv__) /* XXX */ -printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + printf("%s:%s(%d): RISC-V not implemented\n", + __FUNCTION__, __FILE__, __LINE__); #elif defined(__sparc) /* * Add 4 bytes to hit the low half of this 64-bit * big-endian address. */ rel->r_offset = s->dofs_offset + dofr[j].dofr_offset + 4; rel->r_info = ELF32_R_INFO(count + dep->de_global, R_SPARC_32); #else #error unknown ISA #endif sym->st_name = base + dofr[j].dofr_name - 1; sym->st_value = 0; sym->st_size = 0; sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC); sym->st_other = 0; sym->st_shndx = SHN_UNDEF; rel++; sym++; count++; } } /* * Add a symbol for the DOF itself. We use a different symbol for * lazily and actively loaded DOF to make them easy to distinguish. */ sym->st_name = strtabsz; sym->st_value = 0; sym->st_size = dof->dofh_filesz; sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_OBJECT); #ifdef illumos sym->st_other = 0; #else sym->st_other = ELF32_ST_VISIBILITY(STV_HIDDEN); #endif sym->st_shndx = ESHDR_DOF; sym++; if (dtp->dt_lazyload) { bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz, sizeof (DOFLAZYSTR)); strtabsz += sizeof (DOFLAZYSTR); } else { bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR)); strtabsz += sizeof (DOFSTR); } assert(count == dep->de_nrel); assert(strtabsz == dep->de_strlen); return (0); } typedef struct dof_elf64 { uint32_t de_nrel; Elf64_Rela *de_rel; uint32_t de_nsym; Elf64_Sym *de_sym; uint32_t de_strlen; char *de_strtab; uint32_t de_global; } dof_elf64_t; static int prepare_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf64_t *dep) { dof_sec_t *dofs, *s; dof_relohdr_t *dofrh; dof_relodesc_t *dofr; char *strtab; int i, j, nrel; size_t strtabsz = 1; #ifdef illumos uint32_t count = 0; #else uint64_t count = 0; #endif size_t base; Elf64_Sym *sym; Elf64_Rela *rel; /*LINTED*/ dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff); /* * First compute the size of the string table and the number of * relocations present in the DOF. */ for (i = 0; i < dof->dofh_secnum; i++) { if (dofs[i].dofs_type != DOF_SECT_URELHDR) continue; /*LINTED*/ dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset); s = &dofs[dofrh->dofr_strtab]; strtab = (char *)dof + s->dofs_offset; assert(strtab[0] == '\0'); strtabsz += s->dofs_size - 1; s = &dofs[dofrh->dofr_relsec]; /*LINTED*/ dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset); count += s->dofs_size / s->dofs_entsize; } dep->de_strlen = strtabsz; dep->de_nrel = count; dep->de_nsym = count + 1; /* the first symbol is always null */ if (dtp->dt_lazyload) { dep->de_strlen += sizeof (DOFLAZYSTR); dep->de_nsym++; } else { dep->de_strlen += sizeof (DOFSTR); dep->de_nsym++; } if ((dep->de_rel = calloc(dep->de_nrel, sizeof (dep->de_rel[0]))) == NULL) { return (dt_set_errno(dtp, EDT_NOMEM)); } if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf64_Sym))) == NULL) { free(dep->de_rel); return (dt_set_errno(dtp, EDT_NOMEM)); } if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) { free(dep->de_rel); free(dep->de_sym); return (dt_set_errno(dtp, EDT_NOMEM)); } count = 0; strtabsz = 1; dep->de_strtab[0] = '\0'; rel = dep->de_rel; sym = dep->de_sym; dep->de_global = 1; /* * The first symbol table entry must be zeroed and is always ignored. */ bzero(sym, sizeof (Elf64_Sym)); sym++; /* * Take a second pass through the DOF sections filling in the * memory we allocated. */ for (i = 0; i < dof->dofh_secnum; i++) { if (dofs[i].dofs_type != DOF_SECT_URELHDR) continue; /*LINTED*/ dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset); s = &dofs[dofrh->dofr_strtab]; strtab = (char *)dof + s->dofs_offset; bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size); base = strtabsz; strtabsz += s->dofs_size - 1; s = &dofs[dofrh->dofr_relsec]; /*LINTED*/ dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset); nrel = s->dofs_size / s->dofs_entsize; s = &dofs[dofrh->dofr_tgtsec]; for (j = 0; j < nrel; j++) { #if defined(__aarch64__) /* XXX */ #elif defined(__arm__) /* XXX */ #elif defined(__mips__) /* XXX */ #elif defined(__powerpc__) rel->r_offset = s->dofs_offset + dofr[j].dofr_offset; rel->r_info = ELF64_R_INFO(count + dep->de_global, R_PPC64_REL64); #elif defined(__riscv__) /* XXX */ #elif defined(__i386) || defined(__amd64) rel->r_offset = s->dofs_offset + dofr[j].dofr_offset; #ifdef illumos rel->r_info = ELF64_R_INFO(count + dep->de_global, R_AMD64_64); #else rel->r_info = ELF64_R_INFO(count + dep->de_global, R_X86_64_RELATIVE); #endif #elif defined(__sparc) rel->r_offset = s->dofs_offset + dofr[j].dofr_offset; rel->r_info = ELF64_R_INFO(count + dep->de_global, R_SPARC_64); #else #error unknown ISA #endif sym->st_name = base + dofr[j].dofr_name - 1; sym->st_value = 0; sym->st_size = 0; sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_FUNC); sym->st_other = 0; sym->st_shndx = SHN_UNDEF; rel++; sym++; count++; } } /* * Add a symbol for the DOF itself. We use a different symbol for * lazily and actively loaded DOF to make them easy to distinguish. */ sym->st_name = strtabsz; sym->st_value = 0; sym->st_size = dof->dofh_filesz; sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_OBJECT); #ifdef illumos sym->st_other = 0; #else sym->st_other = ELF64_ST_VISIBILITY(STV_HIDDEN); #endif sym->st_shndx = ESHDR_DOF; sym++; if (dtp->dt_lazyload) { bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz, sizeof (DOFLAZYSTR)); strtabsz += sizeof (DOFLAZYSTR); } else { bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR)); strtabsz += sizeof (DOFSTR); } assert(count == dep->de_nrel); assert(strtabsz == dep->de_strlen); return (0); } /* * Write out an ELF32 file prologue consisting of a header, section headers, * and a section header string table. The DOF data will follow this prologue * and complete the contents of the given ELF file. */ static int dump_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd) { struct { Elf32_Ehdr ehdr; Elf32_Shdr shdr[ESHDR_NUM]; } elf_file; Elf32_Shdr *shp; Elf32_Off off; dof_elf32_t de; int ret = 0; uint_t nshdr; if (prepare_elf32(dtp, dof, &de) != 0) return (-1); /* errno is set for us */ /* * If there are no relocations, we only need enough sections for * the shstrtab and the DOF. */ nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM; bzero(&elf_file, sizeof (elf_file)); elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0; elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1; elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2; elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3; elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT; elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS32; #if BYTE_ORDER == _BIG_ENDIAN elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB; #else elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB; #endif #if defined(__FreeBSD__) elf_file.ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD; #endif elf_file.ehdr.e_type = ET_REL; #if defined(__arm__) elf_file.ehdr.e_machine = EM_ARM; #elif defined(__mips__) elf_file.ehdr.e_machine = EM_MIPS; #elif defined(__powerpc__) elf_file.ehdr.e_machine = EM_PPC; #elif defined(__sparc) elf_file.ehdr.e_machine = EM_SPARC; #elif defined(__i386) || defined(__amd64) elf_file.ehdr.e_machine = EM_386; #endif elf_file.ehdr.e_version = EV_CURRENT; elf_file.ehdr.e_shoff = sizeof (Elf32_Ehdr); elf_file.ehdr.e_ehsize = sizeof (Elf32_Ehdr); elf_file.ehdr.e_phentsize = sizeof (Elf32_Phdr); elf_file.ehdr.e_shentsize = sizeof (Elf32_Shdr); elf_file.ehdr.e_shnum = nshdr; elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB; off = sizeof (elf_file) + nshdr * sizeof (Elf32_Shdr); shp = &elf_file.shdr[ESHDR_SHSTRTAB]; shp->sh_name = 1; /* DTRACE_SHSTRTAB32[1] = ".shstrtab" */ shp->sh_type = SHT_STRTAB; shp->sh_offset = off; shp->sh_size = sizeof (DTRACE_SHSTRTAB32); shp->sh_addralign = sizeof (char); off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8); shp = &elf_file.shdr[ESHDR_DOF]; shp->sh_name = 11; /* DTRACE_SHSTRTAB32[11] = ".SUNW_dof" */ shp->sh_flags = SHF_ALLOC; shp->sh_type = SHT_SUNW_dof; shp->sh_offset = off; shp->sh_size = dof->dofh_filesz; shp->sh_addralign = 8; off = shp->sh_offset + shp->sh_size; shp = &elf_file.shdr[ESHDR_STRTAB]; shp->sh_name = 21; /* DTRACE_SHSTRTAB32[21] = ".strtab" */ shp->sh_flags = SHF_ALLOC; shp->sh_type = SHT_STRTAB; shp->sh_offset = off; shp->sh_size = de.de_strlen; shp->sh_addralign = sizeof (char); off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4); shp = &elf_file.shdr[ESHDR_SYMTAB]; shp->sh_name = 29; /* DTRACE_SHSTRTAB32[29] = ".symtab" */ shp->sh_flags = SHF_ALLOC; shp->sh_type = SHT_SYMTAB; shp->sh_entsize = sizeof (Elf32_Sym); shp->sh_link = ESHDR_STRTAB; shp->sh_offset = off; shp->sh_info = de.de_global; shp->sh_size = de.de_nsym * sizeof (Elf32_Sym); shp->sh_addralign = 4; off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4); if (de.de_nrel == 0) { if (dt_write(dtp, fd, &elf_file, sizeof (elf_file)) != sizeof (elf_file) || PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) || PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) || PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) || PWRITE_SCN(ESHDR_DOF, dof)) { ret = dt_set_errno(dtp, errno); } } else { shp = &elf_file.shdr[ESHDR_REL]; shp->sh_name = 37; /* DTRACE_SHSTRTAB32[37] = ".rel.SUNW_dof" */ shp->sh_flags = SHF_ALLOC; #ifdef __sparc shp->sh_type = SHT_RELA; #else shp->sh_type = SHT_REL; #endif shp->sh_entsize = sizeof (de.de_rel[0]); shp->sh_link = ESHDR_SYMTAB; shp->sh_info = ESHDR_DOF; shp->sh_offset = off; shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]); shp->sh_addralign = 4; if (dt_write(dtp, fd, &elf_file, sizeof (elf_file)) != sizeof (elf_file) || PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) || PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) || PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) || PWRITE_SCN(ESHDR_REL, de.de_rel) || PWRITE_SCN(ESHDR_DOF, dof)) { ret = dt_set_errno(dtp, errno); } } free(de.de_strtab); free(de.de_sym); free(de.de_rel); return (ret); } /* * Write out an ELF64 file prologue consisting of a header, section headers, * and a section header string table. The DOF data will follow this prologue * and complete the contents of the given ELF file. */ static int dump_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd) { struct { Elf64_Ehdr ehdr; Elf64_Shdr shdr[ESHDR_NUM]; } elf_file; Elf64_Shdr *shp; Elf64_Off off; dof_elf64_t de; int ret = 0; uint_t nshdr; if (prepare_elf64(dtp, dof, &de) != 0) return (-1); /* errno is set for us */ /* * If there are no relocations, we only need enough sections for * the shstrtab and the DOF. */ nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM; bzero(&elf_file, sizeof (elf_file)); elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0; elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1; elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2; elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3; elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT; elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS64; #if BYTE_ORDER == _BIG_ENDIAN elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB; #else elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB; #endif #if defined(__FreeBSD__) elf_file.ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD; #endif elf_file.ehdr.e_type = ET_REL; #if defined(__arm__) elf_file.ehdr.e_machine = EM_ARM; #elif defined(__mips__) elf_file.ehdr.e_machine = EM_MIPS; #elif defined(__powerpc64__) elf_file.ehdr.e_machine = EM_PPC64; #elif defined(__sparc) elf_file.ehdr.e_machine = EM_SPARCV9; #elif defined(__i386) || defined(__amd64) elf_file.ehdr.e_machine = EM_AMD64; #endif elf_file.ehdr.e_version = EV_CURRENT; elf_file.ehdr.e_shoff = sizeof (Elf64_Ehdr); elf_file.ehdr.e_ehsize = sizeof (Elf64_Ehdr); elf_file.ehdr.e_phentsize = sizeof (Elf64_Phdr); elf_file.ehdr.e_shentsize = sizeof (Elf64_Shdr); elf_file.ehdr.e_shnum = nshdr; elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB; off = sizeof (elf_file) + nshdr * sizeof (Elf64_Shdr); shp = &elf_file.shdr[ESHDR_SHSTRTAB]; shp->sh_name = 1; /* DTRACE_SHSTRTAB64[1] = ".shstrtab" */ shp->sh_type = SHT_STRTAB; shp->sh_offset = off; shp->sh_size = sizeof (DTRACE_SHSTRTAB64); shp->sh_addralign = sizeof (char); off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8); shp = &elf_file.shdr[ESHDR_DOF]; shp->sh_name = 11; /* DTRACE_SHSTRTAB64[11] = ".SUNW_dof" */ shp->sh_flags = SHF_ALLOC; shp->sh_type = SHT_SUNW_dof; shp->sh_offset = off; shp->sh_size = dof->dofh_filesz; shp->sh_addralign = 8; off = shp->sh_offset + shp->sh_size; shp = &elf_file.shdr[ESHDR_STRTAB]; shp->sh_name = 21; /* DTRACE_SHSTRTAB64[21] = ".strtab" */ shp->sh_flags = SHF_ALLOC; shp->sh_type = SHT_STRTAB; shp->sh_offset = off; shp->sh_size = de.de_strlen; shp->sh_addralign = sizeof (char); off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8); shp = &elf_file.shdr[ESHDR_SYMTAB]; shp->sh_name = 29; /* DTRACE_SHSTRTAB64[29] = ".symtab" */ shp->sh_flags = SHF_ALLOC; shp->sh_type = SHT_SYMTAB; shp->sh_entsize = sizeof (Elf64_Sym); shp->sh_link = ESHDR_STRTAB; shp->sh_offset = off; shp->sh_info = de.de_global; shp->sh_size = de.de_nsym * sizeof (Elf64_Sym); shp->sh_addralign = 8; off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8); if (de.de_nrel == 0) { if (dt_write(dtp, fd, &elf_file, sizeof (elf_file)) != sizeof (elf_file) || PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) || PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) || PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) || PWRITE_SCN(ESHDR_DOF, dof)) { ret = dt_set_errno(dtp, errno); } } else { shp = &elf_file.shdr[ESHDR_REL]; shp->sh_name = 37; /* DTRACE_SHSTRTAB64[37] = ".rel.SUNW_dof" */ shp->sh_flags = SHF_ALLOC; shp->sh_type = SHT_RELA; shp->sh_entsize = sizeof (de.de_rel[0]); shp->sh_link = ESHDR_SYMTAB; shp->sh_info = ESHDR_DOF; shp->sh_offset = off; shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]); shp->sh_addralign = 8; if (dt_write(dtp, fd, &elf_file, sizeof (elf_file)) != sizeof (elf_file) || PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) || PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) || PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) || PWRITE_SCN(ESHDR_REL, de.de_rel) || PWRITE_SCN(ESHDR_DOF, dof)) { ret = dt_set_errno(dtp, errno); } } free(de.de_strtab); free(de.de_sym); free(de.de_rel); return (ret); } static int dt_symtab_lookup(Elf_Data *data_sym, int nsym, uintptr_t addr, uint_t shn, GElf_Sym *sym, int uses_funcdesc, Elf *elf) { int i, ret = -1; Elf64_Addr symval; Elf_Scn *opd_scn; Elf_Data *opd_desc; GElf_Sym s; for (i = 0; i < nsym && gelf_getsym(data_sym, i, sym) != NULL; i++) { if (GELF_ST_TYPE(sym->st_info) == STT_FUNC) { symval = sym->st_value; if (uses_funcdesc) { opd_scn = elf_getscn(elf, sym->st_shndx); opd_desc = elf_rawdata(opd_scn, NULL); symval = *(uint64_t*)((char *)opd_desc->d_buf + symval); } if ((uses_funcdesc || shn == sym->st_shndx) && symval <= addr && addr < symval + sym->st_size) { if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL) return (0); ret = 0; s = *sym; } } } if (ret == 0) *sym = s; return (ret); } #if defined(__aarch64__) /* XXX */ static int dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, uint32_t *off) { -printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + printf("%s:%s(%d): aarch64 not implemented\n", __FUNCTION__, __FILE__, + __LINE__); return (0); } #elif defined(__arm__) /* XXX */ static int dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, uint32_t *off) { -printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + printf("%s:%s(%d): arm not implemented\n", __FUNCTION__, __FILE__, + __LINE__); return (0); } #elif defined(__mips__) /* XXX */ static int dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, uint32_t *off) { -printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + printf("%s:%s(%d): MIPS not implemented\n", __FUNCTION__, __FILE__, + __LINE__); return (0); } #elif defined(__powerpc__) /* The sentinel is 'xor r3,r3,r3'. */ #define DT_OP_XOR_R3 0x7c631a78 #define DT_OP_NOP 0x60000000 #define DT_OP_BLR 0x4e800020 /* This captures all forms of branching to address. */ #define DT_IS_BRANCH(inst) ((inst & 0xfc000000) == 0x48000000) #define DT_IS_BL(inst) (DT_IS_BRANCH(inst) && (inst & 0x01)) /* XXX */ static int dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, uint32_t *off) { uint32_t *ip; if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0) return (-1); /*LINTED*/ ip = (uint32_t *)(p + rela->r_offset); /* * We only know about some specific relocation types. */ if (GELF_R_TYPE(rela->r_info) != R_PPC_REL24 && GELF_R_TYPE(rela->r_info) != R_PPC_PLTREL24) return (-1); /* * We may have already processed this object file in an earlier linker * invocation. Check to see if the present instruction sequence matches * the one we would install below. */ if (isenabled) { if (ip[0] == DT_OP_XOR_R3) { (*off) += sizeof (ip[0]); return (0); } } else { if (ip[0] == DT_OP_NOP) { (*off) += sizeof (ip[0]); return (0); } } /* * We only expect branch to address instructions. */ if (!DT_IS_BRANCH(ip[0])) { dt_dprintf("found %x instead of a branch instruction at %llx\n", ip[0], (u_longlong_t)rela->r_offset); return (-1); } if (isenabled) { /* * It would necessarily indicate incorrect usage if an is- * enabled probe were tail-called so flag that as an error. * It's also potentially (very) tricky to handle gracefully, * but could be done if this were a desired use scenario. */ if (!DT_IS_BL(ip[0])) { dt_dprintf("tail call to is-enabled probe at %llx\n", (u_longlong_t)rela->r_offset); return (-1); } ip[0] = DT_OP_XOR_R3; (*off) += sizeof (ip[0]); } else { if (DT_IS_BL(ip[0])) ip[0] = DT_OP_NOP; else ip[0] = DT_OP_BLR; } return (0); } #elif defined(__riscv__) /* XXX */ static int dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, uint32_t *off) { -printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + printf("%s:%s(%d): RISC-V implementation required\n", __FUNCTION__, + __FILE__, __LINE__); return (0); } #elif defined(__sparc) #define DT_OP_RET 0x81c7e008 #define DT_OP_NOP 0x01000000 #define DT_OP_CALL 0x40000000 #define DT_OP_CLR_O0 0x90102000 #define DT_IS_MOV_O7(inst) (((inst) & 0xffffe000) == 0x9e100000) #define DT_IS_RESTORE(inst) (((inst) & 0xc1f80000) == 0x81e80000) #define DT_IS_RETL(inst) (((inst) & 0xfff83fff) == 0x81c02008) #define DT_RS2(inst) ((inst) & 0x1f) #define DT_MAKE_RETL(reg) (0x81c02008 | ((reg) << 14)) /*ARGSUSED*/ static int dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, uint32_t *off) { uint32_t *ip; if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0) return (-1); /*LINTED*/ ip = (uint32_t *)(p + rela->r_offset); /* * We only know about some specific relocation types. */ if (GELF_R_TYPE(rela->r_info) != R_SPARC_WDISP30 && GELF_R_TYPE(rela->r_info) != R_SPARC_WPLT30) return (-1); /* * We may have already processed this object file in an earlier linker * invocation. Check to see if the present instruction sequence matches * the one we would install below. */ if (isenabled) { if (ip[0] == DT_OP_NOP) { (*off) += sizeof (ip[0]); return (0); } } else { if (DT_IS_RESTORE(ip[1])) { if (ip[0] == DT_OP_RET) { (*off) += sizeof (ip[0]); return (0); } } else if (DT_IS_MOV_O7(ip[1])) { if (DT_IS_RETL(ip[0])) return (0); } else { if (ip[0] == DT_OP_NOP) { (*off) += sizeof (ip[0]); return (0); } } } /* * We only expect call instructions with a displacement of 0. */ if (ip[0] != DT_OP_CALL) { dt_dprintf("found %x instead of a call instruction at %llx\n", ip[0], (u_longlong_t)rela->r_offset); return (-1); } if (isenabled) { /* * It would necessarily indicate incorrect usage if an is- * enabled probe were tail-called so flag that as an error. * It's also potentially (very) tricky to handle gracefully, * but could be done if this were a desired use scenario. */ if (DT_IS_RESTORE(ip[1]) || DT_IS_MOV_O7(ip[1])) { dt_dprintf("tail call to is-enabled probe at %llx\n", (u_longlong_t)rela->r_offset); return (-1); } /* * On SPARC, we take advantage of the fact that the first * argument shares the same register as for the return value. * The macro handles the work of zeroing that register so we * don't need to do anything special here. We instrument the * instruction in the delay slot as we'll need to modify the * return register after that instruction has been emulated. */ ip[0] = DT_OP_NOP; (*off) += sizeof (ip[0]); } else { /* * If the call is followed by a restore, it's a tail call so * change the call to a ret. If the call if followed by a mov * of a register into %o7, it's a tail call in leaf context * so change the call to a retl-like instruction that returns * to that register value + 8 (rather than the typical %o7 + * 8); the delay slot instruction is left, but should have no * effect. Otherwise we change the call to be a nop. We * identify the subsequent instruction as the probe point in * all but the leaf tail-call case to ensure that arguments to * the probe are complete and consistent. An astute, though * largely hypothetical, observer would note that there is the * possibility of a false-positive probe firing if the function * contained a branch to the instruction in the delay slot of * the call. Fixing this would require significant in-kernel * modifications, and isn't worth doing until we see it in the * wild. */ if (DT_IS_RESTORE(ip[1])) { ip[0] = DT_OP_RET; (*off) += sizeof (ip[0]); } else if (DT_IS_MOV_O7(ip[1])) { ip[0] = DT_MAKE_RETL(DT_RS2(ip[1])); } else { ip[0] = DT_OP_NOP; (*off) += sizeof (ip[0]); } } return (0); } #elif defined(__i386) || defined(__amd64) #define DT_OP_NOP 0x90 #define DT_OP_RET 0xc3 #define DT_OP_CALL 0xe8 #define DT_OP_JMP32 0xe9 #define DT_OP_REX_RAX 0x48 #define DT_OP_XOR_EAX_0 0x33 #define DT_OP_XOR_EAX_1 0xc0 static int dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, uint32_t *off) { uint8_t *ip = (uint8_t *)(p + rela->r_offset - 1); uint8_t ret; /* * On x86, the first byte of the instruction is the call opcode and * the next four bytes are the 32-bit address; the relocation is for * the address operand. We back up the offset to the first byte of * the instruction. For is-enabled probes, we later advance the offset * so that it hits the first nop in the instruction sequence. */ (*off) -= 1; /* * We only know about some specific relocation types. Luckily * these types have the same values on both 32-bit and 64-bit * x86 architectures. */ if (GELF_R_TYPE(rela->r_info) != R_386_PC32 && GELF_R_TYPE(rela->r_info) != R_386_PLT32) return (-1); /* * We may have already processed this object file in an earlier linker * invocation. Check to see if the present instruction sequence matches * the one we would install. For is-enabled probes, we advance the * offset to the first nop instruction in the sequence to match the * text modification code below. */ if (!isenabled) { if ((ip[0] == DT_OP_NOP || ip[0] == DT_OP_RET) && ip[1] == DT_OP_NOP && ip[2] == DT_OP_NOP && ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP) return (0); } else if (dtp->dt_oflags & DTRACE_O_LP64) { if (ip[0] == DT_OP_REX_RAX && ip[1] == DT_OP_XOR_EAX_0 && ip[2] == DT_OP_XOR_EAX_1 && (ip[3] == DT_OP_NOP || ip[3] == DT_OP_RET) && ip[4] == DT_OP_NOP) { (*off) += 3; return (0); } } else { if (ip[0] == DT_OP_XOR_EAX_0 && ip[1] == DT_OP_XOR_EAX_1 && (ip[2] == DT_OP_NOP || ip[2] == DT_OP_RET) && ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP) { (*off) += 2; return (0); } } /* * We expect either a call instrution with a 32-bit displacement or a * jmp instruction with a 32-bit displacement acting as a tail-call. */ if (ip[0] != DT_OP_CALL && ip[0] != DT_OP_JMP32) { dt_dprintf("found %x instead of a call or jmp instruction at " "%llx\n", ip[0], (u_longlong_t)rela->r_offset); return (-1); } ret = (ip[0] == DT_OP_JMP32) ? DT_OP_RET : DT_OP_NOP; /* * Establish the instruction sequence -- all nops for probes, and an * instruction to clear the return value register (%eax/%rax) followed * by nops for is-enabled probes. For is-enabled probes, we advance * the offset to the first nop. This isn't stricly necessary but makes * for more readable disassembly when the probe is enabled. */ if (!isenabled) { ip[0] = ret; ip[1] = DT_OP_NOP; ip[2] = DT_OP_NOP; ip[3] = DT_OP_NOP; ip[4] = DT_OP_NOP; } else if (dtp->dt_oflags & DTRACE_O_LP64) { ip[0] = DT_OP_REX_RAX; ip[1] = DT_OP_XOR_EAX_0; ip[2] = DT_OP_XOR_EAX_1; ip[3] = ret; ip[4] = DT_OP_NOP; (*off) += 3; } else { ip[0] = DT_OP_XOR_EAX_0; ip[1] = DT_OP_XOR_EAX_1; ip[2] = ret; ip[3] = DT_OP_NOP; ip[4] = DT_OP_NOP; (*off) += 2; } return (0); } #else #error unknown ISA #endif /*PRINTFLIKE5*/ static int dt_link_error(dtrace_hdl_t *dtp, Elf *elf, int fd, dt_link_pair_t *bufs, const char *format, ...) { va_list ap; dt_link_pair_t *pair; va_start(ap, format); dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap); va_end(ap); if (elf != NULL) (void) elf_end(elf); if (fd >= 0) (void) close(fd); while ((pair = bufs) != NULL) { bufs = pair->dlp_next; dt_free(dtp, pair->dlp_str); dt_free(dtp, pair->dlp_sym); dt_free(dtp, pair); } return (dt_set_errno(dtp, EDT_COMPILER)); } static int process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp) { static const char dt_prefix[] = "__dtrace"; static const char dt_enabled[] = "enabled"; static const char dt_symprefix[] = "$dtrace"; static const char dt_symfmt[] = "%s%ld.%s"; char probename[DTRACE_NAMELEN]; int fd, i, ndx, eprobe, mod = 0; Elf *elf = NULL; GElf_Ehdr ehdr; Elf_Scn *scn_rel, *scn_sym, *scn_str, *scn_tgt; Elf_Data *data_rel, *data_sym, *data_str, *data_tgt; GElf_Shdr shdr_rel, shdr_sym, shdr_str, shdr_tgt; GElf_Sym rsym, fsym, dsym; GElf_Rela rela; char *s, *p, *r; char pname[DTRACE_PROVNAMELEN]; dt_provider_t *pvp; dt_probe_t *prp; uint32_t off, eclass, emachine1, emachine2; size_t symsize, nsym, isym, istr, len; key_t objkey; dt_link_pair_t *pair, *bufs = NULL; dt_strtab_t *strtab; if ((fd = open64(obj, O_RDWR)) == -1) { return (dt_link_error(dtp, elf, fd, bufs, "failed to open %s: %s", obj, strerror(errno))); } if ((elf = elf_begin(fd, ELF_C_RDWR, NULL)) == NULL) { return (dt_link_error(dtp, elf, fd, bufs, "failed to process %s: %s", obj, elf_errmsg(elf_errno()))); } switch (elf_kind(elf)) { case ELF_K_ELF: break; case ELF_K_AR: return (dt_link_error(dtp, elf, fd, bufs, "archives are not " "permitted; use the contents of the archive instead: %s", obj)); default: return (dt_link_error(dtp, elf, fd, bufs, "invalid file type: %s", obj)); } if (gelf_getehdr(elf, &ehdr) == NULL) { return (dt_link_error(dtp, elf, fd, bufs, "corrupt file: %s", obj)); } if (dtp->dt_oflags & DTRACE_O_LP64) { eclass = ELFCLASS64; #if defined(__mips__) emachine1 = emachine2 = EM_MIPS; #elif defined(__powerpc__) emachine1 = emachine2 = EM_PPC64; #elif defined(__sparc) emachine1 = emachine2 = EM_SPARCV9; #elif defined(__i386) || defined(__amd64) emachine1 = emachine2 = EM_AMD64; #endif symsize = sizeof (Elf64_Sym); } else { eclass = ELFCLASS32; #if defined(__arm__) emachine1 = emachine2 = EM_ARM; #elif defined(__mips__) emachine1 = emachine2 = EM_MIPS; #elif defined(__powerpc__) emachine1 = emachine2 = EM_PPC; #elif defined(__sparc) emachine1 = EM_SPARC; emachine2 = EM_SPARC32PLUS; #elif defined(__i386) || defined(__amd64) emachine1 = emachine2 = EM_386; #endif symsize = sizeof (Elf32_Sym); } if (ehdr.e_ident[EI_CLASS] != eclass) { return (dt_link_error(dtp, elf, fd, bufs, "incorrect ELF class for object file: %s", obj)); } if (ehdr.e_machine != emachine1 && ehdr.e_machine != emachine2) { return (dt_link_error(dtp, elf, fd, bufs, "incorrect ELF machine type for object file: %s", obj)); } /* * We use this token as a relatively unique handle for this file on the * system in order to disambiguate potential conflicts between files of * the same name which contain identially named local symbols. */ if ((objkey = ftok(obj, 0)) == (key_t)-1) { return (dt_link_error(dtp, elf, fd, bufs, "failed to generate unique key for object file: %s", obj)); } scn_rel = NULL; while ((scn_rel = elf_nextscn(elf, scn_rel)) != NULL) { if (gelf_getshdr(scn_rel, &shdr_rel) == NULL) goto err; /* * Skip any non-relocation sections. */ if (shdr_rel.sh_type != SHT_RELA && shdr_rel.sh_type != SHT_REL) continue; if ((data_rel = elf_getdata(scn_rel, NULL)) == NULL) goto err; /* * Grab the section, section header and section data for the * symbol table that this relocation section references. */ if ((scn_sym = elf_getscn(elf, shdr_rel.sh_link)) == NULL || gelf_getshdr(scn_sym, &shdr_sym) == NULL || (data_sym = elf_getdata(scn_sym, NULL)) == NULL) goto err; /* * Ditto for that symbol table's string table. */ if ((scn_str = elf_getscn(elf, shdr_sym.sh_link)) == NULL || gelf_getshdr(scn_str, &shdr_str) == NULL || (data_str = elf_getdata(scn_str, NULL)) == NULL) goto err; /* * Grab the section, section header and section data for the * target section for the relocations. For the relocations * we're looking for -- this will typically be the text of the * object file. */ if ((scn_tgt = elf_getscn(elf, shdr_rel.sh_info)) == NULL || gelf_getshdr(scn_tgt, &shdr_tgt) == NULL || (data_tgt = elf_getdata(scn_tgt, NULL)) == NULL) goto err; /* * We're looking for relocations to symbols matching this form: * * __dtrace[enabled]____ * * For the generated object, we need to record the location * identified by the relocation, and create a new relocation * in the generated object that will be resolved at link time * to the location of the function in which the probe is * embedded. In the target object, we change the matched symbol * so that it will be ignored at link time, and we modify the * target (text) section to replace the call instruction with * one or more nops. * * If the function containing the probe is locally scoped * (static), we create an alias used by the relocation in the * generated object. The alias, a new symbol, will be global * (so that the relocation from the generated object can be * resolved), and hidden (so that it is converted to a local * symbol at link time). Such aliases have this form: * * $dtrace. * * We take a first pass through all the relocations to * populate our string table and count the number of extra * symbols we'll require. */ strtab = dt_strtab_create(1); nsym = 0; isym = data_sym->d_size / symsize; istr = data_str->d_size; for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) { if (shdr_rel.sh_type == SHT_RELA) { if (gelf_getrela(data_rel, i, &rela) == NULL) continue; } else { GElf_Rel rel; if (gelf_getrel(data_rel, i, &rel) == NULL) continue; rela.r_offset = rel.r_offset; rela.r_info = rel.r_info; rela.r_addend = 0; } if (gelf_getsym(data_sym, GELF_R_SYM(rela.r_info), &rsym) == NULL) { dt_strtab_destroy(strtab); goto err; } s = (char *)data_str->d_buf + rsym.st_name; if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0) continue; if (dt_symtab_lookup(data_sym, isym, rela.r_offset, shdr_rel.sh_info, &fsym, (emachine1 == EM_PPC64), elf) != 0) { dt_strtab_destroy(strtab); goto err; } if (GELF_ST_BIND(fsym.st_info) != STB_LOCAL) continue; if (fsym.st_name > data_str->d_size) { dt_strtab_destroy(strtab); goto err; } s = (char *)data_str->d_buf + fsym.st_name; /* * If this symbol isn't of type function, we've really * driven off the rails or the object file is corrupt. */ if (GELF_ST_TYPE(fsym.st_info) != STT_FUNC) { dt_strtab_destroy(strtab); return (dt_link_error(dtp, elf, fd, bufs, "expected %s to be of type function", s)); } len = snprintf(NULL, 0, dt_symfmt, dt_symprefix, objkey, s) + 1; if ((p = dt_alloc(dtp, len)) == NULL) { dt_strtab_destroy(strtab); goto err; } (void) snprintf(p, len, dt_symfmt, dt_symprefix, objkey, s); if (dt_strtab_index(strtab, p) == -1) { nsym++; (void) dt_strtab_insert(strtab, p); } dt_free(dtp, p); } /* * If needed, allocate the additional space for the symbol * table and string table copying the old data into the new * buffers, and marking the buffers as dirty. We inject those * newly allocated buffers into the libelf data structures, but * are still responsible for freeing them once we're done with * the elf handle. */ if (nsym > 0) { /* * The first byte of the string table is reserved for * the \0 entry. */ len = dt_strtab_size(strtab) - 1; assert(len > 0); assert(dt_strtab_index(strtab, "") == 0); dt_strtab_destroy(strtab); if ((pair = dt_alloc(dtp, sizeof (*pair))) == NULL) goto err; if ((pair->dlp_str = dt_alloc(dtp, data_str->d_size + len)) == NULL) { dt_free(dtp, pair); goto err; } if ((pair->dlp_sym = dt_alloc(dtp, data_sym->d_size + nsym * symsize)) == NULL) { dt_free(dtp, pair->dlp_str); dt_free(dtp, pair); goto err; } pair->dlp_next = bufs; bufs = pair; bcopy(data_str->d_buf, pair->dlp_str, data_str->d_size); data_str->d_buf = pair->dlp_str; data_str->d_size += len; (void) elf_flagdata(data_str, ELF_C_SET, ELF_F_DIRTY); shdr_str.sh_size += len; (void) gelf_update_shdr(scn_str, &shdr_str); bcopy(data_sym->d_buf, pair->dlp_sym, data_sym->d_size); data_sym->d_buf = pair->dlp_sym; data_sym->d_size += nsym * symsize; (void) elf_flagdata(data_sym, ELF_C_SET, ELF_F_DIRTY); shdr_sym.sh_size += nsym * symsize; (void) gelf_update_shdr(scn_sym, &shdr_sym); nsym += isym; } else { dt_strtab_destroy(strtab); } /* * Now that the tables have been allocated, perform the * modifications described above. */ for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) { if (shdr_rel.sh_type == SHT_RELA) { if (gelf_getrela(data_rel, i, &rela) == NULL) continue; } else { GElf_Rel rel; if (gelf_getrel(data_rel, i, &rel) == NULL) continue; rela.r_offset = rel.r_offset; rela.r_info = rel.r_info; rela.r_addend = 0; } ndx = GELF_R_SYM(rela.r_info); if (gelf_getsym(data_sym, ndx, &rsym) == NULL || rsym.st_name > data_str->d_size) goto err; s = (char *)data_str->d_buf + rsym.st_name; if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0) continue; s += sizeof (dt_prefix) - 1; /* * Check to see if this is an 'is-enabled' check as * opposed to a normal probe. */ if (strncmp(s, dt_enabled, sizeof (dt_enabled) - 1) == 0) { s += sizeof (dt_enabled) - 1; eprobe = 1; *eprobesp = 1; dt_dprintf("is-enabled probe\n"); } else { eprobe = 0; dt_dprintf("normal probe\n"); } if (*s++ != '_') goto err; if ((p = strstr(s, "___")) == NULL || p - s >= sizeof (pname)) goto err; bcopy(s, pname, p - s); pname[p - s] = '\0'; if (dt_symtab_lookup(data_sym, isym, rela.r_offset, shdr_rel.sh_info, &fsym, (emachine1 == EM_PPC64), elf) != 0) goto err; if (fsym.st_name > data_str->d_size) goto err; assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC); /* * If a NULL relocation name is passed to * dt_probe_define(), the function name is used for the * relocation. The relocation needs to use a mangled * name if the symbol is locally scoped; the function * name may need to change if we've found the global * alias for the locally scoped symbol (we prefer * global symbols to locals in dt_symtab_lookup()). */ s = (char *)data_str->d_buf + fsym.st_name; r = NULL; if (GELF_ST_BIND(fsym.st_info) == STB_LOCAL) { dsym = fsym; dsym.st_name = istr; dsym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_FUNC); dsym.st_other = ELF64_ST_VISIBILITY(STV_ELIMINATE); (void) gelf_update_sym(data_sym, isym, &dsym); r = (char *)data_str->d_buf + istr; istr += 1 + sprintf(r, dt_symfmt, dt_symprefix, objkey, s); isym++; assert(isym <= nsym); } else if (strncmp(s, dt_symprefix, strlen(dt_symprefix)) == 0) { r = s; if ((s = strchr(s, '.')) == NULL) goto err; s++; } if ((pvp = dt_provider_lookup(dtp, pname)) == NULL) { return (dt_link_error(dtp, elf, fd, bufs, "no such provider %s", pname)); } if (strlcpy(probename, p + 3, sizeof (probename)) >= sizeof (probename)) return (dt_link_error(dtp, elf, fd, bufs, "invalid probe name %s", probename)); (void) strhyphenate(probename); if ((prp = dt_probe_lookup(pvp, probename)) == NULL) return (dt_link_error(dtp, elf, fd, bufs, "no such probe %s", probename)); assert(fsym.st_value <= rela.r_offset); off = rela.r_offset - fsym.st_value; if (dt_modtext(dtp, data_tgt->d_buf, eprobe, &rela, &off) != 0) goto err; if (dt_probe_define(pvp, prp, s, r, off, eprobe) != 0) { return (dt_link_error(dtp, elf, fd, bufs, "failed to allocate space for probe")); } #ifndef illumos /* * Our linker doesn't understand the SUNW_IGNORE ndx and * will try to use this relocation when we build the * final executable. Since we are done processing this * relocation, mark it as inexistant and let libelf * remove it from the file. * If this wasn't done, we would have garbage added to * the executable file as the symbol is going to be * change from UND to ABS. */ if (shdr_rel.sh_type == SHT_RELA) { rela.r_offset = 0; rela.r_info = 0; rela.r_addend = 0; (void) gelf_update_rela(data_rel, i, &rela); } else { GElf_Rel rel; rel.r_offset = 0; rel.r_info = 0; (void) gelf_update_rel(data_rel, i, &rel); } #endif mod = 1; (void) elf_flagdata(data_tgt, ELF_C_SET, ELF_F_DIRTY); /* * This symbol may already have been marked to * be ignored by another relocation referencing * the same symbol or if this object file has * already been processed by an earlier link * invocation. */ #ifndef illumos #define SHN_SUNW_IGNORE SHN_ABS #endif if (rsym.st_shndx != SHN_SUNW_IGNORE) { rsym.st_shndx = SHN_SUNW_IGNORE; (void) gelf_update_sym(data_sym, ndx, &rsym); } } } if (mod && elf_update(elf, ELF_C_WRITE) == -1) goto err; (void) elf_end(elf); (void) close(fd); #ifndef illumos if (nsym > 0) #endif while ((pair = bufs) != NULL) { bufs = pair->dlp_next; dt_free(dtp, pair->dlp_str); dt_free(dtp, pair->dlp_sym); dt_free(dtp, pair); } return (0); err: return (dt_link_error(dtp, elf, fd, bufs, "an error was encountered while processing %s", obj)); } int dtrace_program_link(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, uint_t dflags, const char *file, int objc, char *const objv[]) { #ifndef illumos char tfile[PATH_MAX]; #endif char drti[PATH_MAX]; dof_hdr_t *dof; int fd, status, i, cur; char *cmd, tmp; size_t len; int eprobes = 0, ret = 0; #ifndef illumos if (access(file, R_OK) == 0) { fprintf(stderr, "dtrace: target object (%s) already exists. " "Please remove the target\ndtrace: object and rebuild all " "the source objects if you wish to run the DTrace\n" "dtrace: linking process again\n", file); /* * Several build infrastructures run DTrace twice (e.g. * postgres) and we don't want the build to fail. Return * 0 here since this isn't really a fatal error. */ return (0); } #endif /* * A NULL program indicates a special use in which we just link * together a bunch of object files specified in objv and then * unlink(2) those object files. */ if (pgp == NULL) { const char *fmt = "%s -o %s -r"; len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file) + 1; for (i = 0; i < objc; i++) len += strlen(objv[i]) + 1; cmd = alloca(len); cur = snprintf(cmd, len, fmt, dtp->dt_ld_path, file); for (i = 0; i < objc; i++) cur += snprintf(cmd + cur, len - cur, " %s", objv[i]); if ((status = system(cmd)) == -1) { return (dt_link_error(dtp, NULL, -1, NULL, "failed to run %s: %s", dtp->dt_ld_path, strerror(errno))); } if (WIFSIGNALED(status)) { return (dt_link_error(dtp, NULL, -1, NULL, "failed to link %s: %s failed due to signal %d", file, dtp->dt_ld_path, WTERMSIG(status))); } if (WEXITSTATUS(status) != 0) { return (dt_link_error(dtp, NULL, -1, NULL, "failed to link %s: %s exited with status %d\n", file, dtp->dt_ld_path, WEXITSTATUS(status))); } for (i = 0; i < objc; i++) { if (strcmp(objv[i], file) != 0) (void) unlink(objv[i]); } return (0); } for (i = 0; i < objc; i++) { if (process_obj(dtp, objv[i], &eprobes) != 0) return (-1); /* errno is set for us */ } /* * If there are is-enabled probes then we need to force use of DOF * version 2. */ if (eprobes && pgp->dp_dofversion < DOF_VERSION_2) pgp->dp_dofversion = DOF_VERSION_2; if ((dof = dtrace_dof_create(dtp, pgp, dflags)) == NULL) return (-1); /* errno is set for us */ #ifdef illumos /* * Create a temporary file and then unlink it if we're going to * combine it with drti.o later. We can still refer to it in child * processes as /dev/fd/. */ if ((fd = open64(file, O_RDWR | O_CREAT | O_TRUNC, 0666)) == -1) { return (dt_link_error(dtp, NULL, -1, NULL, "failed to open %s: %s", file, strerror(errno))); } #else snprintf(tfile, sizeof(tfile), "%s.XXXXXX", file); if ((fd = mkostemp(tfile, O_CLOEXEC)) == -1) return (dt_link_error(dtp, NULL, -1, NULL, "failed to create temporary file %s: %s", tfile, strerror(errno))); #endif /* * If -xlinktype=DOF has been selected, just write out the DOF. * Otherwise proceed to the default of generating and linking ELF. */ switch (dtp->dt_linktype) { case DT_LTYP_DOF: if (dt_write(dtp, fd, dof, dof->dofh_filesz) < dof->dofh_filesz) ret = errno; if (close(fd) != 0 && ret == 0) ret = errno; if (ret != 0) { return (dt_link_error(dtp, NULL, -1, NULL, "failed to write %s: %s", file, strerror(ret))); } return (0); case DT_LTYP_ELF: break; /* fall through to the rest of dtrace_program_link() */ default: return (dt_link_error(dtp, NULL, -1, NULL, "invalid link type %u\n", dtp->dt_linktype)); } #ifdef illumos if (!dtp->dt_lazyload) (void) unlink(file); #endif if (dtp->dt_oflags & DTRACE_O_LP64) status = dump_elf64(dtp, dof, fd); else status = dump_elf32(dtp, dof, fd); #ifdef illumos if (status != 0 || lseek(fd, 0, SEEK_SET) != 0) { return (dt_link_error(dtp, NULL, -1, NULL, "failed to write %s: %s", file, strerror(errno))); } #else if (status != 0) return (dt_link_error(dtp, NULL, -1, NULL, "failed to write %s: %s", tfile, strerror(dtrace_errno(dtp)))); #endif if (!dtp->dt_lazyload) { #ifdef illumos const char *fmt = "%s -o %s -r -Blocal -Breduce /dev/fd/%d %s"; if (dtp->dt_oflags & DTRACE_O_LP64) { (void) snprintf(drti, sizeof (drti), "%s/64/drti.o", _dtrace_libdir); } else { (void) snprintf(drti, sizeof (drti), "%s/drti.o", _dtrace_libdir); } len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, fd, drti) + 1; cmd = alloca(len); (void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, fd, drti); #else const char *fmt = "%s -o %s -r %s %s"; dt_dirpath_t *dp = dt_list_next(&dtp->dt_lib_path); (void) snprintf(drti, sizeof (drti), "%s/drti.o", dp->dir_path); len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, tfile, drti) + 1; cmd = alloca(len); (void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, tfile, drti); #endif if ((status = system(cmd)) == -1) { ret = dt_link_error(dtp, NULL, fd, NULL, "failed to run %s: %s", dtp->dt_ld_path, strerror(errno)); goto done; } if (WIFSIGNALED(status)) { ret = dt_link_error(dtp, NULL, fd, NULL, "failed to link %s: %s failed due to signal %d", file, dtp->dt_ld_path, WTERMSIG(status)); goto done; } if (WEXITSTATUS(status) != 0) { ret = dt_link_error(dtp, NULL, fd, NULL, "failed to link %s: %s exited with status %d\n", file, dtp->dt_ld_path, WEXITSTATUS(status)); goto done; } (void) close(fd); /* release temporary file */ #ifdef __FreeBSD__ /* * Now that we've linked drti.o, reduce the global __SUNW_dof * symbol to a local symbol. This is needed to so that multiple * generated object files (for different providers, for * instance) can be linked together. This is accomplished using * the -Blocal flag with Sun's linker, but GNU ld doesn't appear * to have an equivalent option. */ asprintf(&cmd, "%s --localize-hidden %s", dtp->dt_objcopy_path, file); if ((status = system(cmd)) == -1) { ret = dt_link_error(dtp, NULL, -1, NULL, "failed to run %s: %s", dtp->dt_objcopy_path, strerror(errno)); free(cmd); goto done; } free(cmd); if (WIFSIGNALED(status)) { ret = dt_link_error(dtp, NULL, -1, NULL, "failed to link %s: %s failed due to signal %d", file, dtp->dt_objcopy_path, WTERMSIG(status)); goto done; } if (WEXITSTATUS(status) != 0) { ret = dt_link_error(dtp, NULL, -1, NULL, "failed to link %s: %s exited with status %d\n", file, dtp->dt_objcopy_path, WEXITSTATUS(status)); goto done; } #endif } else { #ifdef __FreeBSD__ if (rename(tfile, file) != 0) { ret = dt_link_error(dtp, NULL, fd, NULL, "failed to rename %s to %s: %s", tfile, file, strerror(errno)); goto done; } #endif (void) close(fd); } done: dtrace_dof_destroy(dtp, dof); #ifdef __FreeBSD__ if (!dtp->dt_lazyload) (void) unlink(tfile); #endif return (ret); } Index: stable/11/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.c =================================================================== --- stable/11/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.c (revision 326301) +++ stable/11/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.c (revision 326302) @@ -1,1209 +1,1210 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * DTrace Process Control * * This file provides a set of routines that permit libdtrace and its clients * to create and grab process handles using libproc, and to share these handles * between library mechanisms that need libproc access, such as ustack(), and * client mechanisms that need libproc access, such as dtrace(1M) -c and -p. * The library provides several mechanisms in the libproc control layer: * * Reference Counting: The library code and client code can independently grab * the same process handles without interfering with one another. Only when * the reference count drops to zero and the handle is not being cached (see * below for more information on caching) will Prelease() be called on it. * * Handle Caching: If a handle is grabbed PGRAB_RDONLY (e.g. by ustack()) and * the reference count drops to zero, the handle is not immediately released. * Instead, libproc handles are maintained on dph_lrulist in order from most- * recently accessed to least-recently accessed. Idle handles are maintained * until a pre-defined LRU cache limit is exceeded, permitting repeated calls * to ustack() to avoid the overhead of releasing and re-grabbing processes. * * Process Control: For processes that are grabbed for control (~PGRAB_RDONLY) * or created by dt_proc_create(), a control thread is created to provide * callbacks on process exit and symbol table caching on dlopen()s. * * MT-Safety: Libproc is not MT-Safe, so dt_proc_lock() and dt_proc_unlock() * are provided to synchronize access to the libproc handle between libdtrace * code and client code and the control thread's use of the ps_prochandle. * * NOTE: MT-Safety is NOT provided for libdtrace itself, or for use of the * dtrace_proc_grab/dtrace_proc_create mechanisms. Like all exported libdtrace * calls, these are assumed to be MT-Unsafe. MT-Safety is ONLY provided for * synchronization between libdtrace control threads and the client thread. * * The ps_prochandles themselves are maintained along with a dt_proc_t struct * in a hash table indexed by PID. This provides basic locking and reference * counting. The dt_proc_t is also maintained in LRU order on dph_lrulist. * The dph_lrucnt and dph_lrulim count the number of cacheable processes and * the current limit on the number of actively cached entries. * * The control thread for a process establishes breakpoints at the rtld_db * locations of interest, updates mappings and symbol tables at these points, * and handles exec and fork (by always following the parent). The control * thread automatically exits when the process dies or control is lost. * * A simple notification mechanism is provided for libdtrace clients using * dtrace_handle_proc() for notification of PS_UNDEAD or PS_LOST events. If * such an event occurs, the dt_proc_t itself is enqueued on a notification * list and the control thread broadcasts to dph_cv. dtrace_sleep() will wake * up using this condition and will then call the client handler as necessary. */ #include #ifdef illumos #include #endif #include #include #include #include #include #include #include #ifndef illumos #include #include #define SYS_forksys SYS_fork #endif #define IS_SYS_EXEC(w) (w == SYS_execve) #define IS_SYS_FORK(w) (w == SYS_vfork || w == SYS_forksys) static dt_bkpt_t * dt_proc_bpcreate(dt_proc_t *dpr, uintptr_t addr, dt_bkpt_f *func, void *data) { struct ps_prochandle *P = dpr->dpr_proc; dt_bkpt_t *dbp; assert(DT_MUTEX_HELD(&dpr->dpr_lock)); if ((dbp = dt_zalloc(dpr->dpr_hdl, sizeof (dt_bkpt_t))) != NULL) { dbp->dbp_func = func; dbp->dbp_data = data; dbp->dbp_addr = addr; if (Psetbkpt(P, dbp->dbp_addr, &dbp->dbp_instr) == 0) dbp->dbp_active = B_TRUE; dt_list_append(&dpr->dpr_bps, dbp); } return (dbp); } static void dt_proc_bpdestroy(dt_proc_t *dpr, int delbkpts) { int state = Pstate(dpr->dpr_proc); dt_bkpt_t *dbp, *nbp; assert(DT_MUTEX_HELD(&dpr->dpr_lock)); for (dbp = dt_list_next(&dpr->dpr_bps); dbp != NULL; dbp = nbp) { if (delbkpts && dbp->dbp_active && state != PS_LOST && state != PS_UNDEAD) { (void) Pdelbkpt(dpr->dpr_proc, dbp->dbp_addr, dbp->dbp_instr); } nbp = dt_list_next(dbp); dt_list_delete(&dpr->dpr_bps, dbp); dt_free(dpr->dpr_hdl, dbp); } } static void dt_proc_bpmatch(dtrace_hdl_t *dtp, dt_proc_t *dpr) { #ifdef illumos const lwpstatus_t *psp = &Pstatus(dpr->dpr_proc)->pr_lwp; #else unsigned long pc; #endif dt_bkpt_t *dbp; assert(DT_MUTEX_HELD(&dpr->dpr_lock)); #ifndef illumos proc_regget(dpr->dpr_proc, REG_PC, &pc); proc_bkptregadj(&pc); #endif for (dbp = dt_list_next(&dpr->dpr_bps); dbp != NULL; dbp = dt_list_next(dbp)) { #ifdef illumos if (psp->pr_reg[R_PC] == dbp->dbp_addr) break; #else if (pc == dbp->dbp_addr) break; #endif } if (dbp == NULL) { dt_dprintf("pid %d: spurious breakpoint wakeup for %lx\n", #ifdef illumos (int)dpr->dpr_pid, (ulong_t)psp->pr_reg[R_PC]); #else (int)dpr->dpr_pid, pc); #endif return; } dt_dprintf("pid %d: hit breakpoint at %lx (%lu)\n", (int)dpr->dpr_pid, (ulong_t)dbp->dbp_addr, ++dbp->dbp_hits); dbp->dbp_func(dtp, dpr, dbp->dbp_data); (void) Pxecbkpt(dpr->dpr_proc, dbp->dbp_instr); } static void dt_proc_bpenable(dt_proc_t *dpr) { dt_bkpt_t *dbp; assert(DT_MUTEX_HELD(&dpr->dpr_lock)); for (dbp = dt_list_next(&dpr->dpr_bps); dbp != NULL; dbp = dt_list_next(dbp)) { if (!dbp->dbp_active && Psetbkpt(dpr->dpr_proc, dbp->dbp_addr, &dbp->dbp_instr) == 0) dbp->dbp_active = B_TRUE; } dt_dprintf("breakpoints enabled\n"); } static void dt_proc_bpdisable(dt_proc_t *dpr) { dt_bkpt_t *dbp; assert(DT_MUTEX_HELD(&dpr->dpr_lock)); for (dbp = dt_list_next(&dpr->dpr_bps); dbp != NULL; dbp = dt_list_next(dbp)) { if (dbp->dbp_active && Pdelbkpt(dpr->dpr_proc, dbp->dbp_addr, dbp->dbp_instr) == 0) dbp->dbp_active = B_FALSE; } dt_dprintf("breakpoints disabled\n"); } static void dt_proc_notify(dtrace_hdl_t *dtp, dt_proc_hash_t *dph, dt_proc_t *dpr, const char *msg) { dt_proc_notify_t *dprn = dt_alloc(dtp, sizeof (dt_proc_notify_t)); if (dprn == NULL) { dt_dprintf("failed to allocate notification for %d %s\n", (int)dpr->dpr_pid, msg); } else { dprn->dprn_dpr = dpr; if (msg == NULL) dprn->dprn_errmsg[0] = '\0'; else (void) strlcpy(dprn->dprn_errmsg, msg, sizeof (dprn->dprn_errmsg)); (void) pthread_mutex_lock(&dph->dph_lock); dprn->dprn_next = dph->dph_notify; dph->dph_notify = dprn; (void) pthread_cond_broadcast(&dph->dph_cv); (void) pthread_mutex_unlock(&dph->dph_lock); } } /* * Check to see if the control thread was requested to stop when the victim * process reached a particular event (why) rather than continuing the victim. * If 'why' is set in the stop mask, we wait on dpr_cv for dt_proc_continue(). * If 'why' is not set, this function returns immediately and does nothing. */ static void dt_proc_stop(dt_proc_t *dpr, uint8_t why) { assert(DT_MUTEX_HELD(&dpr->dpr_lock)); assert(why != DT_PROC_STOP_IDLE); if (dpr->dpr_stop & why) { dpr->dpr_stop |= DT_PROC_STOP_IDLE; dpr->dpr_stop &= ~why; (void) pthread_cond_broadcast(&dpr->dpr_cv); /* * We disable breakpoints while stopped to preserve the * integrity of the program text for both our own disassembly * and that of the kernel. */ dt_proc_bpdisable(dpr); while (dpr->dpr_stop & DT_PROC_STOP_IDLE) (void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock); dt_proc_bpenable(dpr); } } /*ARGSUSED*/ static void dt_proc_bpmain(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *fname) { dt_dprintf("pid %d: breakpoint at %s()\n", (int)dpr->dpr_pid, fname); dt_proc_stop(dpr, DT_PROC_STOP_MAIN); } static void dt_proc_rdevent(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *evname) { rd_event_msg_t rdm; rd_err_e err; if ((err = rd_event_getmsg(dpr->dpr_rtld, &rdm)) != RD_OK) { dt_dprintf("pid %d: failed to get %s event message: %s\n", (int)dpr->dpr_pid, evname, rd_errstr(err)); return; } dt_dprintf("pid %d: rtld event %s type=%d state %d\n", (int)dpr->dpr_pid, evname, rdm.type, rdm.u.state); switch (rdm.type) { case RD_DLACTIVITY: if (rdm.u.state != RD_CONSISTENT) break; Pupdate_syms(dpr->dpr_proc); if (dt_pid_create_probes_module(dtp, dpr) != 0) dt_proc_notify(dtp, dtp->dt_procs, dpr, dpr->dpr_errmsg); break; case RD_PREINIT: Pupdate_syms(dpr->dpr_proc); dt_proc_stop(dpr, DT_PROC_STOP_PREINIT); break; case RD_POSTINIT: Pupdate_syms(dpr->dpr_proc); dt_proc_stop(dpr, DT_PROC_STOP_POSTINIT); break; } } static void dt_proc_rdwatch(dt_proc_t *dpr, rd_event_e event, const char *evname) { rd_notify_t rdn; rd_err_e err; if ((err = rd_event_addr(dpr->dpr_rtld, event, &rdn)) != RD_OK) { dt_dprintf("pid %d: failed to get event address for %s: %s\n", (int)dpr->dpr_pid, evname, rd_errstr(err)); return; } if (rdn.type != RD_NOTIFY_BPT) { dt_dprintf("pid %d: event %s has unexpected type %d\n", (int)dpr->dpr_pid, evname, rdn.type); return; } (void) dt_proc_bpcreate(dpr, rdn.u.bptaddr, #ifdef illumos (dt_bkpt_f *)dt_proc_rdevent, (void *)evname); #else /* XXX ugly */ (dt_bkpt_f *)dt_proc_rdevent, __DECONST(void *, evname)); #endif } /* * Common code for enabling events associated with the run-time linker after * attaching to a process or after a victim process completes an exec(2). */ static void dt_proc_attach(dt_proc_t *dpr, int exec) { #ifdef illumos const pstatus_t *psp = Pstatus(dpr->dpr_proc); #endif rd_err_e err; GElf_Sym sym; assert(DT_MUTEX_HELD(&dpr->dpr_lock)); if (exec) { #ifdef illumos if (psp->pr_lwp.pr_errno != 0) return; /* exec failed: nothing needs to be done */ #endif dt_proc_bpdestroy(dpr, B_FALSE); #ifdef illumos Preset_maps(dpr->dpr_proc); #endif } if ((dpr->dpr_rtld = Prd_agent(dpr->dpr_proc)) != NULL && (err = rd_event_enable(dpr->dpr_rtld, B_TRUE)) == RD_OK) { #ifdef illumos dt_proc_rdwatch(dpr, RD_PREINIT, "RD_PREINIT"); #endif dt_proc_rdwatch(dpr, RD_POSTINIT, "RD_POSTINIT"); #ifdef illumos dt_proc_rdwatch(dpr, RD_DLACTIVITY, "RD_DLACTIVITY"); #endif } else { dt_dprintf("pid %d: failed to enable rtld events: %s\n", (int)dpr->dpr_pid, dpr->dpr_rtld ? rd_errstr(err) : "rtld_db agent initialization failed"); } Pupdate_maps(dpr->dpr_proc); if (Pxlookup_by_name(dpr->dpr_proc, LM_ID_BASE, "a.out", "main", &sym, NULL) == 0) { (void) dt_proc_bpcreate(dpr, (uintptr_t)sym.st_value, (dt_bkpt_f *)dt_proc_bpmain, "a.out`main"); } else { dt_dprintf("pid %d: failed to find a.out`main: %s\n", (int)dpr->dpr_pid, strerror(errno)); } } /* * Wait for a stopped process to be set running again by some other debugger. * This is typically not required by /proc-based debuggers, since the usual * model is that one debugger controls one victim. But DTrace, as usual, has * its own needs: the stop() action assumes that prun(1) or some other tool * will be applied to resume the victim process. This could be solved by * adding a PCWRUN directive to /proc, but that seems like overkill unless * other debuggers end up needing this functionality, so we implement a cheap * equivalent to PCWRUN using the set of existing kernel mechanisms. * * Our intent is really not just to wait for the victim to run, but rather to * wait for it to run and then stop again for a reason other than the current * PR_REQUESTED stop. Since PCWSTOP/Pstopstatus() can be applied repeatedly * to a stopped process and will return the same result without affecting the * victim, we can just perform these operations repeatedly until Pstate() * changes, the representative LWP ID changes, or the stop timestamp advances. * dt_proc_control() will then rediscover the new state and continue as usual. * When the process is still stopped in the same exact state, we sleep for a * brief interval before waiting again so as not to spin consuming CPU cycles. */ static void dt_proc_waitrun(dt_proc_t *dpr) { -printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + printf("%s:%s(%d): not implemented\n", __FUNCTION__, __FILE__, + __LINE__); #ifdef DOODAD struct ps_prochandle *P = dpr->dpr_proc; const lwpstatus_t *psp = &Pstatus(P)->pr_lwp; int krflag = psp->pr_flags & (PR_KLC | PR_RLC); timestruc_t tstamp = psp->pr_tstamp; lwpid_t lwpid = psp->pr_lwpid; const long wstop = PCWSTOP; int pfd = Pctlfd(P); assert(DT_MUTEX_HELD(&dpr->dpr_lock)); assert(psp->pr_flags & PR_STOPPED); assert(Pstate(P) == PS_STOP); /* * While we are waiting for the victim to run, clear PR_KLC and PR_RLC * so that if the libdtrace client is killed, the victim stays stopped. * dt_proc_destroy() will also observe this and perform PRELEASE_HANG. */ (void) Punsetflags(P, krflag); Psync(P); (void) pthread_mutex_unlock(&dpr->dpr_lock); while (!dpr->dpr_quit) { if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR) continue; /* check dpr_quit and continue waiting */ (void) pthread_mutex_lock(&dpr->dpr_lock); (void) Pstopstatus(P, PCNULL, 0); psp = &Pstatus(P)->pr_lwp; /* * If we've reached a new state, found a new representative, or * the stop timestamp has changed, restore PR_KLC/PR_RLC to its * original setting and then return with dpr_lock held. */ if (Pstate(P) != PS_STOP || psp->pr_lwpid != lwpid || bcmp(&psp->pr_tstamp, &tstamp, sizeof (tstamp)) != 0) { (void) Psetflags(P, krflag); Psync(P); return; } (void) pthread_mutex_unlock(&dpr->dpr_lock); (void) poll(NULL, 0, MILLISEC / 2); } (void) pthread_mutex_lock(&dpr->dpr_lock); #endif } typedef struct dt_proc_control_data { dtrace_hdl_t *dpcd_hdl; /* DTrace handle */ dt_proc_t *dpcd_proc; /* proccess to control */ } dt_proc_control_data_t; /* * Main loop for all victim process control threads. We initialize all the * appropriate /proc control mechanisms, and then enter a loop waiting for * the process to stop on an event or die. We process any events by calling * appropriate subroutines, and exit when the victim dies or we lose control. * * The control thread synchronizes the use of dpr_proc with other libdtrace * threads using dpr_lock. We hold the lock for all of our operations except * waiting while the process is running: this is accomplished by writing a * PCWSTOP directive directly to the underlying /proc//ctl file. If the * libdtrace client wishes to exit or abort our wait, SIGCANCEL can be used. */ static void * dt_proc_control(void *arg) { dt_proc_control_data_t *datap = arg; dtrace_hdl_t *dtp = datap->dpcd_hdl; dt_proc_t *dpr = datap->dpcd_proc; dt_proc_hash_t *dph = dpr->dpr_hdl->dt_procs; struct ps_prochandle *P = dpr->dpr_proc; int pid = dpr->dpr_pid; #ifdef illumos int pfd = Pctlfd(P); const long wstop = PCWSTOP; #endif int notify = B_FALSE; /* * We disable the POSIX thread cancellation mechanism so that the * client program using libdtrace can't accidentally cancel our thread. * dt_proc_destroy() uses SIGCANCEL explicitly to simply poke us out * of PCWSTOP with EINTR, at which point we will see dpr_quit and exit. */ (void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); /* * Set up the corresponding process for tracing by libdtrace. We want * to be able to catch breakpoints and efficiently single-step over * them, and we need to enable librtld_db to watch libdl activity. */ (void) pthread_mutex_lock(&dpr->dpr_lock); #ifdef illumos (void) Punsetflags(P, PR_ASYNC); /* require synchronous mode */ (void) Psetflags(P, PR_BPTADJ); /* always adjust eip on x86 */ (void) Punsetflags(P, PR_FORK); /* do not inherit on fork */ (void) Pfault(P, FLTBPT, B_TRUE); /* always trace breakpoints */ (void) Pfault(P, FLTTRACE, B_TRUE); /* always trace single-step */ /* * We must trace exit from exec() system calls so that if the exec is * successful, we can reset our breakpoints and re-initialize libproc. */ (void) Psysexit(P, SYS_execve, B_TRUE); /* * We must trace entry and exit for fork() system calls in order to * disable our breakpoints temporarily during the fork. We do not set * the PR_FORK flag, so if fork succeeds the child begins executing and * does not inherit any other tracing behaviors or a control thread. */ (void) Psysentry(P, SYS_vfork, B_TRUE); (void) Psysexit(P, SYS_vfork, B_TRUE); (void) Psysentry(P, SYS_forksys, B_TRUE); (void) Psysexit(P, SYS_forksys, B_TRUE); Psync(P); /* enable all /proc changes */ #endif dt_proc_attach(dpr, B_FALSE); /* enable rtld breakpoints */ /* * If PR_KLC is set, we created the process; otherwise we grabbed it. * Check for an appropriate stop request and wait for dt_proc_continue. */ #ifdef illumos if (Pstatus(P)->pr_flags & PR_KLC) #else if (proc_getflags(P) & PR_KLC) #endif dt_proc_stop(dpr, DT_PROC_STOP_CREATE); else dt_proc_stop(dpr, DT_PROC_STOP_GRAB); if (Psetrun(P, 0, 0) == -1) { dt_dprintf("pid %d: failed to set running: %s\n", (int)dpr->dpr_pid, strerror(errno)); } (void) pthread_mutex_unlock(&dpr->dpr_lock); /* * Wait for the process corresponding to this control thread to stop, * process the event, and then set it running again. We want to sleep * with dpr_lock *unheld* so that other parts of libdtrace can use the * ps_prochandle in the meantime (e.g. ustack()). To do this, we write * a PCWSTOP directive directly to the underlying /proc//ctl file. * Once the process stops, we wake up, grab dpr_lock, and then call * Pwait() (which will return immediately) and do our processing. */ while (!dpr->dpr_quit) { const lwpstatus_t *psp; #ifdef illumos if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR) continue; /* check dpr_quit and continue waiting */ #else /* Wait for the process to report status. */ proc_wstatus(P); if (errno == EINTR) continue; /* check dpr_quit and continue waiting */ #endif (void) pthread_mutex_lock(&dpr->dpr_lock); #ifdef illumos pwait_locked: if (Pstopstatus(P, PCNULL, 0) == -1 && errno == EINTR) { (void) pthread_mutex_unlock(&dpr->dpr_lock); continue; /* check dpr_quit and continue waiting */ } #endif switch (Pstate(P)) { case PS_STOP: #ifdef illumos psp = &Pstatus(P)->pr_lwp; #else psp = proc_getlwpstatus(P); #endif dt_dprintf("pid %d: proc stopped showing %d/%d\n", pid, psp->pr_why, psp->pr_what); /* * If the process stops showing PR_REQUESTED, then the * DTrace stop() action was applied to it or another * debugging utility (e.g. pstop(1)) asked it to stop. * In either case, the user's intention is for the * process to remain stopped until another external * mechanism (e.g. prun(1)) is applied. So instead of * setting the process running ourself, we wait for * someone else to do so. Once that happens, we return * to our normal loop waiting for an event of interest. */ if (psp->pr_why == PR_REQUESTED) { dt_proc_waitrun(dpr); (void) pthread_mutex_unlock(&dpr->dpr_lock); continue; } /* * If the process stops showing one of the events that * we are tracing, perform the appropriate response. * Note that we ignore PR_SUSPENDED, PR_CHECKPOINT, and * PR_JOBCONTROL by design: if one of these conditions * occurs, we will fall through to Psetrun() but the * process will remain stopped in the kernel by the * corresponding mechanism (e.g. job control stop). */ if (psp->pr_why == PR_FAULTED && psp->pr_what == FLTBPT) dt_proc_bpmatch(dtp, dpr); else if (psp->pr_why == PR_SYSENTRY && IS_SYS_FORK(psp->pr_what)) dt_proc_bpdisable(dpr); else if (psp->pr_why == PR_SYSEXIT && IS_SYS_FORK(psp->pr_what)) dt_proc_bpenable(dpr); else if (psp->pr_why == PR_SYSEXIT && IS_SYS_EXEC(psp->pr_what)) dt_proc_attach(dpr, B_TRUE); break; case PS_LOST: #ifdef illumos if (Preopen(P) == 0) goto pwait_locked; #endif dt_dprintf("pid %d: proc lost: %s\n", pid, strerror(errno)); dpr->dpr_quit = B_TRUE; notify = B_TRUE; break; case PS_UNDEAD: dt_dprintf("pid %d: proc died\n", pid); dpr->dpr_quit = B_TRUE; notify = B_TRUE; break; } if (Pstate(P) != PS_UNDEAD && Psetrun(P, 0, 0) == -1) { dt_dprintf("pid %d: failed to set running: %s\n", (int)dpr->dpr_pid, strerror(errno)); } (void) pthread_mutex_unlock(&dpr->dpr_lock); } /* * If the control thread detected PS_UNDEAD or PS_LOST, then enqueue * the dt_proc_t structure on the dt_proc_hash_t notification list. */ if (notify) dt_proc_notify(dtp, dph, dpr, NULL); /* * Destroy and remove any remaining breakpoints, set dpr_done and clear * dpr_tid to indicate the control thread has exited, and notify any * waiting thread in dt_proc_destroy() that we have succesfully exited. */ (void) pthread_mutex_lock(&dpr->dpr_lock); dt_proc_bpdestroy(dpr, B_TRUE); dpr->dpr_done = B_TRUE; dpr->dpr_tid = 0; (void) pthread_cond_broadcast(&dpr->dpr_cv); (void) pthread_mutex_unlock(&dpr->dpr_lock); return (NULL); } /*PRINTFLIKE3*/ static struct ps_prochandle * dt_proc_error(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *format, ...) { va_list ap; va_start(ap, format); dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap); va_end(ap); if (dpr->dpr_proc != NULL) Prelease(dpr->dpr_proc, 0); dt_free(dtp, dpr); (void) dt_set_errno(dtp, EDT_COMPILER); return (NULL); } dt_proc_t * dt_proc_lookup(dtrace_hdl_t *dtp, struct ps_prochandle *P, int remove) { dt_proc_hash_t *dph = dtp->dt_procs; #ifdef illumos pid_t pid = Pstatus(P)->pr_pid; #else pid_t pid = proc_getpid(P); #endif dt_proc_t *dpr, **dpp = &dph->dph_hash[pid & (dph->dph_hashlen - 1)]; for (dpr = *dpp; dpr != NULL; dpr = dpr->dpr_hash) { if (dpr->dpr_pid == pid) break; else dpp = &dpr->dpr_hash; } assert(dpr != NULL); assert(dpr->dpr_proc == P); if (remove) *dpp = dpr->dpr_hash; /* remove from pid hash chain */ return (dpr); } static void dt_proc_destroy(dtrace_hdl_t *dtp, struct ps_prochandle *P) { dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE); dt_proc_hash_t *dph = dtp->dt_procs; dt_proc_notify_t *npr, **npp; int rflag; assert(dpr != NULL); /* * If neither PR_KLC nor PR_RLC is set, then the process is stopped by * an external debugger and we were waiting in dt_proc_waitrun(). * Leave the process in this condition using PRELEASE_HANG. */ #ifdef illumos if (!(Pstatus(dpr->dpr_proc)->pr_flags & (PR_KLC | PR_RLC))) { #else if (!(proc_getflags(dpr->dpr_proc) & (PR_KLC | PR_RLC))) { #endif dt_dprintf("abandoning pid %d\n", (int)dpr->dpr_pid); rflag = PRELEASE_HANG; #ifdef illumos } else if (Pstatus(dpr->dpr_proc)->pr_flags & PR_KLC) { #else } else if (proc_getflags(dpr->dpr_proc) & PR_KLC) { #endif dt_dprintf("killing pid %d\n", (int)dpr->dpr_pid); rflag = PRELEASE_KILL; /* apply kill-on-last-close */ } else { dt_dprintf("releasing pid %d\n", (int)dpr->dpr_pid); rflag = 0; /* apply run-on-last-close */ } if (dpr->dpr_tid) { /* * Set the dpr_quit flag to tell the daemon thread to exit. We * send it a SIGCANCEL to poke it out of PCWSTOP or any other * long-term /proc system call. Our daemon threads have POSIX * cancellation disabled, so EINTR will be the only effect. We * then wait for dpr_done to indicate the thread has exited. * * We can't use pthread_kill() to send SIGCANCEL because the * interface forbids it and we can't use pthread_cancel() * because with cancellation disabled it won't actually * send SIGCANCEL to the target thread, so we use _lwp_kill() * to do the job. This is all built on evil knowledge of * the details of the cancellation mechanism in libc. */ (void) pthread_mutex_lock(&dpr->dpr_lock); dpr->dpr_quit = B_TRUE; #ifdef illumos (void) _lwp_kill(dpr->dpr_tid, SIGCANCEL); #else pthread_kill(dpr->dpr_tid, SIGTHR); #endif /* * If the process is currently idling in dt_proc_stop(), re- * enable breakpoints and poke it into running again. */ if (dpr->dpr_stop & DT_PROC_STOP_IDLE) { dt_proc_bpenable(dpr); dpr->dpr_stop &= ~DT_PROC_STOP_IDLE; (void) pthread_cond_broadcast(&dpr->dpr_cv); } while (!dpr->dpr_done) (void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock); (void) pthread_mutex_unlock(&dpr->dpr_lock); } /* * Before we free the process structure, remove this dt_proc_t from the * lookup hash, and then walk the dt_proc_hash_t's notification list * and remove this dt_proc_t if it is enqueued. */ (void) pthread_mutex_lock(&dph->dph_lock); (void) dt_proc_lookup(dtp, P, B_TRUE); npp = &dph->dph_notify; while ((npr = *npp) != NULL) { if (npr->dprn_dpr == dpr) { *npp = npr->dprn_next; dt_free(dtp, npr); } else { npp = &npr->dprn_next; } } (void) pthread_mutex_unlock(&dph->dph_lock); /* * Remove the dt_proc_list from the LRU list, release the underlying * libproc handle, and free our dt_proc_t data structure. */ if (dpr->dpr_cacheable) { assert(dph->dph_lrucnt != 0); dph->dph_lrucnt--; } dt_list_delete(&dph->dph_lrulist, dpr); Prelease(dpr->dpr_proc, rflag); dt_free(dtp, dpr); } static int dt_proc_create_thread(dtrace_hdl_t *dtp, dt_proc_t *dpr, uint_t stop) { dt_proc_control_data_t data; sigset_t nset, oset; pthread_attr_t a; int err; (void) pthread_mutex_lock(&dpr->dpr_lock); dpr->dpr_stop |= stop; /* set bit for initial rendezvous */ (void) pthread_attr_init(&a); (void) pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED); (void) sigfillset(&nset); (void) sigdelset(&nset, SIGABRT); /* unblocked for assert() */ #ifdef illumos (void) sigdelset(&nset, SIGCANCEL); /* see dt_proc_destroy() */ #else (void) sigdelset(&nset, SIGUSR1); /* see dt_proc_destroy() */ #endif data.dpcd_hdl = dtp; data.dpcd_proc = dpr; (void) pthread_sigmask(SIG_SETMASK, &nset, &oset); err = pthread_create(&dpr->dpr_tid, &a, dt_proc_control, &data); (void) pthread_sigmask(SIG_SETMASK, &oset, NULL); /* * If the control thread was created, then wait on dpr_cv for either * dpr_done to be set (the victim died or the control thread failed) * or DT_PROC_STOP_IDLE to be set, indicating that the victim is now * stopped by /proc and the control thread is at the rendezvous event. * On success, we return with the process and control thread stopped: * the caller can then apply dt_proc_continue() to resume both. */ if (err == 0) { while (!dpr->dpr_done && !(dpr->dpr_stop & DT_PROC_STOP_IDLE)) (void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock); /* * If dpr_done is set, the control thread aborted before it * reached the rendezvous event. This is either due to PS_LOST * or PS_UNDEAD (i.e. the process died). We try to provide a * small amount of useful information to help figure it out. */ if (dpr->dpr_done) { #ifdef illumos const psinfo_t *prp = Ppsinfo(dpr->dpr_proc); int stat = prp ? prp->pr_wstat : 0; int pid = dpr->dpr_pid; #else int stat = proc_getwstat(dpr->dpr_proc); int pid = proc_getpid(dpr->dpr_proc); #endif if (proc_state(dpr->dpr_proc) == PS_LOST) { (void) dt_proc_error(dpr->dpr_hdl, dpr, "failed to control pid %d: process exec'd " "set-id or unobservable program\n", pid); } else if (WIFSIGNALED(stat)) { (void) dt_proc_error(dpr->dpr_hdl, dpr, "failed to control pid %d: process died " "from signal %d\n", pid, WTERMSIG(stat)); } else { (void) dt_proc_error(dpr->dpr_hdl, dpr, "failed to control pid %d: process exited " "with status %d\n", pid, WEXITSTATUS(stat)); } err = ESRCH; /* cause grab() or create() to fail */ } } else { (void) dt_proc_error(dpr->dpr_hdl, dpr, "failed to create control thread for process-id %d: %s\n", (int)dpr->dpr_pid, strerror(err)); } if (err == 0) (void) pthread_mutex_unlock(&dpr->dpr_lock); (void) pthread_attr_destroy(&a); return (err); } struct ps_prochandle * dt_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv, proc_child_func *pcf, void *child_arg) { dt_proc_hash_t *dph = dtp->dt_procs; dt_proc_t *dpr; int err; if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL) return (NULL); /* errno is set for us */ (void) pthread_mutex_init(&dpr->dpr_lock, NULL); (void) pthread_cond_init(&dpr->dpr_cv, NULL); #ifdef illumos if ((dpr->dpr_proc = Pcreate(file, argv, &err, NULL, 0)) == NULL) { #else if ((err = proc_create(file, argv, pcf, child_arg, &dpr->dpr_proc)) != 0) { #endif return (dt_proc_error(dtp, dpr, "failed to execute %s: %s\n", file, Pcreate_error(err))); } dpr->dpr_hdl = dtp; #ifdef illumos dpr->dpr_pid = Pstatus(dpr->dpr_proc)->pr_pid; #else dpr->dpr_pid = proc_getpid(dpr->dpr_proc); #endif (void) Punsetflags(dpr->dpr_proc, PR_RLC); (void) Psetflags(dpr->dpr_proc, PR_KLC); if (dt_proc_create_thread(dtp, dpr, dtp->dt_prcmode) != 0) return (NULL); /* dt_proc_error() has been called for us */ dpr->dpr_hash = dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)]; dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)] = dpr; dt_list_prepend(&dph->dph_lrulist, dpr); dt_dprintf("created pid %d\n", (int)dpr->dpr_pid); dpr->dpr_refs++; return (dpr->dpr_proc); } struct ps_prochandle * dt_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags, int nomonitor) { dt_proc_hash_t *dph = dtp->dt_procs; uint_t h = pid & (dph->dph_hashlen - 1); dt_proc_t *dpr, *opr; int err; /* * Search the hash table for the pid. If it is already grabbed or * created, move the handle to the front of the lrulist, increment * the reference count, and return the existing ps_prochandle. */ for (dpr = dph->dph_hash[h]; dpr != NULL; dpr = dpr->dpr_hash) { if (dpr->dpr_pid == pid && !dpr->dpr_stale) { /* * If the cached handle was opened read-only and * this request is for a writeable handle, mark * the cached handle as stale and open a new handle. * Since it's stale, unmark it as cacheable. */ if (dpr->dpr_rdonly && !(flags & PGRAB_RDONLY)) { dt_dprintf("upgrading pid %d\n", (int)pid); dpr->dpr_stale = B_TRUE; dpr->dpr_cacheable = B_FALSE; dph->dph_lrucnt--; break; } dt_dprintf("grabbed pid %d (cached)\n", (int)pid); dt_list_delete(&dph->dph_lrulist, dpr); dt_list_prepend(&dph->dph_lrulist, dpr); dpr->dpr_refs++; return (dpr->dpr_proc); } } if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL) return (NULL); /* errno is set for us */ (void) pthread_mutex_init(&dpr->dpr_lock, NULL); (void) pthread_cond_init(&dpr->dpr_cv, NULL); #ifdef illumos if ((dpr->dpr_proc = Pgrab(pid, flags, &err)) == NULL) { #else if ((err = proc_attach(pid, flags, &dpr->dpr_proc)) != 0) { #endif return (dt_proc_error(dtp, dpr, "failed to grab pid %d: %s\n", (int)pid, Pgrab_error(err))); } dpr->dpr_hdl = dtp; dpr->dpr_pid = pid; (void) Punsetflags(dpr->dpr_proc, PR_KLC); (void) Psetflags(dpr->dpr_proc, PR_RLC); /* * If we are attempting to grab the process without a monitor * thread, then mark the process cacheable only if it's being * grabbed read-only. If we're currently caching more process * handles than dph_lrulim permits, attempt to find the * least-recently-used handle that is currently unreferenced and * release it from the cache. Otherwise we are grabbing the process * for control: create a control thread for this process and store * its ID in dpr->dpr_tid. */ if (nomonitor || (flags & PGRAB_RDONLY)) { if (dph->dph_lrucnt >= dph->dph_lrulim) { for (opr = dt_list_prev(&dph->dph_lrulist); opr != NULL; opr = dt_list_prev(opr)) { if (opr->dpr_cacheable && opr->dpr_refs == 0) { dt_proc_destroy(dtp, opr->dpr_proc); break; } } } if (flags & PGRAB_RDONLY) { dpr->dpr_cacheable = B_TRUE; dpr->dpr_rdonly = B_TRUE; dph->dph_lrucnt++; } } else if (dt_proc_create_thread(dtp, dpr, DT_PROC_STOP_GRAB) != 0) return (NULL); /* dt_proc_error() has been called for us */ dpr->dpr_hash = dph->dph_hash[h]; dph->dph_hash[h] = dpr; dt_list_prepend(&dph->dph_lrulist, dpr); dt_dprintf("grabbed pid %d\n", (int)pid); dpr->dpr_refs++; return (dpr->dpr_proc); } void dt_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P) { dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE); dt_proc_hash_t *dph = dtp->dt_procs; assert(dpr != NULL); assert(dpr->dpr_refs != 0); if (--dpr->dpr_refs == 0 && (!dpr->dpr_cacheable || dph->dph_lrucnt > dph->dph_lrulim)) dt_proc_destroy(dtp, P); } void dt_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P) { dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE); (void) pthread_mutex_lock(&dpr->dpr_lock); if (dpr->dpr_stop & DT_PROC_STOP_IDLE) { dpr->dpr_stop &= ~DT_PROC_STOP_IDLE; (void) pthread_cond_broadcast(&dpr->dpr_cv); } (void) pthread_mutex_unlock(&dpr->dpr_lock); } void dt_proc_lock(dtrace_hdl_t *dtp, struct ps_prochandle *P) { dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE); int err = pthread_mutex_lock(&dpr->dpr_lock); assert(err == 0); /* check for recursion */ } void dt_proc_unlock(dtrace_hdl_t *dtp, struct ps_prochandle *P) { dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE); int err = pthread_mutex_unlock(&dpr->dpr_lock); assert(err == 0); /* check for unheld lock */ } void dt_proc_hash_create(dtrace_hdl_t *dtp) { if ((dtp->dt_procs = dt_zalloc(dtp, sizeof (dt_proc_hash_t) + sizeof (dt_proc_t *) * _dtrace_pidbuckets - 1)) != NULL) { (void) pthread_mutex_init(&dtp->dt_procs->dph_lock, NULL); (void) pthread_cond_init(&dtp->dt_procs->dph_cv, NULL); dtp->dt_procs->dph_hashlen = _dtrace_pidbuckets; dtp->dt_procs->dph_lrulim = _dtrace_pidlrulim; } } void dt_proc_hash_destroy(dtrace_hdl_t *dtp) { dt_proc_hash_t *dph = dtp->dt_procs; dt_proc_t *dpr; while ((dpr = dt_list_next(&dph->dph_lrulist)) != NULL) dt_proc_destroy(dtp, dpr->dpr_proc); dtp->dt_procs = NULL; dt_free(dtp, dph); } struct ps_prochandle * dtrace_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv, proc_child_func *pcf, void *child_arg) { dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target"); struct ps_prochandle *P = dt_proc_create(dtp, file, argv, pcf, child_arg); if (P != NULL && idp != NULL && idp->di_id == 0) { #ifdef illumos idp->di_id = Pstatus(P)->pr_pid; /* $target = created pid */ #else idp->di_id = proc_getpid(P); /* $target = created pid */ #endif } return (P); } struct ps_prochandle * dtrace_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags) { dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target"); struct ps_prochandle *P = dt_proc_grab(dtp, pid, flags, 0); if (P != NULL && idp != NULL && idp->di_id == 0) idp->di_id = pid; /* $target = grabbed pid */ return (P); } void dtrace_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P) { dt_proc_release(dtp, P); } void dtrace_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P) { dt_proc_continue(dtp, P); } Index: stable/11 =================================================================== --- stable/11 (revision 326301) +++ stable/11 (revision 326302) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r325042