From c86760c1e925e03680ab5754ac2dd95108032e53 Mon Sep 17 00:00:00 2001 From: Michael Matz Date: Mon, 25 May 2020 17:27:01 +0200 Subject: [PATCH] macos: Read exported symbols from dylibs up to now we simply assumed that any undefined symbols will be provided by some shared libs on the cmdline. Now we check this. --- libtcc.c | 3 +- tcc.h | 3 + tccelf.c | 4 +- tccmacho.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 168 insertions(+), 21 deletions(-) diff --git a/libtcc.c b/libtcc.c index 4fef0a09..7c485b9b 100644 --- a/libtcc.c +++ b/libtcc.c @@ -1124,7 +1124,8 @@ ST_FUNC int tcc_add_file_internal(TCCState *s1, const char *filename, int flags) ret = tcc_load_dll(s1, fd, filename, (flags & AFF_REFERENCED_DLL) != 0); #else - ret = 0; + ret = macho_load_dll(s1, fd, filename, + (flags & AFF_REFERENCED_DLL) != 0); #endif } break; diff --git a/tcc.h b/tcc.h index 89c33e98..d28e02b3 100644 --- a/tcc.h +++ b/tcc.h @@ -1531,6 +1531,8 @@ ST_FUNC void resolve_common_syms(TCCState *s1); ST_FUNC void relocate_syms(TCCState *s1, Section *symtab, int do_resolve); ST_FUNC void relocate_section(TCCState *s1, Section *s); +ST_FUNC ssize_t full_read(int fd, void *buf, size_t count); +ST_FUNC void *load_data(int fd, unsigned long file_offset, unsigned long size); ST_FUNC int tcc_object_type(int fd, ElfW(Ehdr) *h); ST_FUNC int tcc_load_object_file(TCCState *s1, int fd, unsigned long file_offset); ST_FUNC int tcc_load_archive(TCCState *s1, int fd, int alacarte); @@ -1747,6 +1749,7 @@ PUB_FUNC int tcc_get_dllexports(const char *filename, char **pp); /* ------------ tccmacho.c ----------------- */ #ifdef TCC_TARGET_MACHO ST_FUNC int macho_output_file(TCCState * s1, const char *filename); +ST_FUNC int macho_load_dll(TCCState *s1, int fd, const char *filename, int lev); #endif /* ------------ tccrun.c ----------------- */ #ifdef TCC_IS_NATIVE diff --git a/tccelf.c b/tccelf.c index bddd13ea..8d3f9653 100644 --- a/tccelf.c +++ b/tccelf.c @@ -2547,7 +2547,7 @@ LIBTCCAPI int tcc_output_file(TCCState *s, const char *filename) return ret; } -ssize_t full_read(int fd, void *buf, size_t count) { +ST_FUNC ssize_t full_read(int fd, void *buf, size_t count) { char *cbuf = buf; size_t rnum = 0; while (1) { @@ -2559,7 +2559,7 @@ ssize_t full_read(int fd, void *buf, size_t count) { } } -static void *load_data(int fd, unsigned long file_offset, unsigned long size) +ST_FUNC void *load_data(int fd, unsigned long file_offset, unsigned long size) { void *data; diff --git a/tccmacho.c b/tccmacho.c index 18ddb48d..3a9dd77d 100644 --- a/tccmacho.c +++ b/tccmacho.c @@ -18,6 +18,7 @@ #include "tcc.h" #define DEBUG_MACHO 0 +#define dprintf if (DEBUG_MACHO) printf struct mach_header { uint32_t magic; /* mach magic number identifier */ @@ -231,7 +232,9 @@ struct macho { #define LC_SYMTAB 0x2 #define LC_DYSYMTAB 0xb #define LC_LOAD_DYLIB 0xc +#define LC_ID_DYLIB 0xd #define LC_LOAD_DYLINKER 0xe +#define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD) #define LC_MAIN (0x28|LC_REQ_DYLD) /* Hack for now, 46_grep.c needs fopen, but due to aliasing games @@ -375,10 +378,9 @@ static int check_symbols(TCCState *s1, struct macho *mo) unsigned bind = ELFW(ST_BIND)(sym->st_info); unsigned vis = ELFW(ST_VISIBILITY)(sym->st_other); - if (DEBUG_MACHO) - printf("%4d (%4d): %09llx %4d %4d %4d %3d %s\n", - sym_index, elf_index, sym->st_value, - type, bind, vis, sym->st_shndx, name); + dprintf("%4d (%4d): %09llx %4d %4d %4d %3d %s\n", + sym_index, elf_index, sym->st_value, + type, bind, vis, sym->st_shndx, name); if (bind == STB_LOCAL) { if (mo->ilocal == -1) mo->ilocal = sym_index - 1; @@ -394,15 +396,12 @@ static int check_symbols(TCCState *s1, struct macho *mo) mo->iundef = sym_index - 1; if (ELFW(ST_BIND)(sym->st_info) == STB_WEAK) continue; - if (get_sym_attr(s1, elf_index, 0)) { + if (find_elf_sym(s1->dynsymtab_section, name)) { /* Mark the symbol as coming from a dylib so that - relocate_syms doesn't complain. Normally we would have - checked that any dylib in fact exports this one, and listed - those in s1->dynsymtab, which would cause us to enter this - symbol into s1->dynsym, which is checked by relocate_syms. - But for now we fake this and regard all undefined as - coming from a dylib, and we don't use the dynsymtab/dynsym - scheme. */ + relocate_syms doesn't complain. Normally bind_exe_dynsyms + would do this check, and place the symbol into dynsym + which is checked by relocate_syms. But Mach-O doesn't use + bind_exe_dynsyms. */ sym->st_shndx = SHN_FROMDLL; continue; } @@ -589,7 +588,11 @@ static void collect_sections(TCCState *s1, struct macho *mo) dysymlc->cmdsize = sizeof(*dysymlc); add_lc(mo, dysymlc); - add_dylib(mo, "/usr/lib/libSystem.B.dylib"); + for(i = 0; i < s1->nb_loaded_dlls; i++) { + DLLReference *dllref = s1->loaded_dlls[i]; + if (dllref->level == 0) + add_dylib(mo, dllref->name); + } mo->linkedit = new_section(s1, "LINKEDIT", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE); /* LINKEDIT can't be empty (XXX remove once we have symbol table) */ @@ -683,16 +686,14 @@ static void collect_sections(TCCState *s1, struct macho *mo) for (s = mo->sk_to_sect[sk].s; s; s = s->prev) { al = s->sh_addralign; curaddr = (curaddr + al - 1) & -al; - if (DEBUG_MACHO) - printf("curaddr now 0x%llx\n", curaddr); + dprintf("curaddr now 0x%llx\n", curaddr); s->sh_addr = curaddr; curaddr += s->sh_size; if (s->sh_type != SHT_NOBITS) { fileofs = (fileofs + al - 1) & -al; s->sh_offset = fileofs; fileofs += s->sh_size; - if (DEBUG_MACHO) - printf("fileofs now %lld\n", fileofs); + dprintf("fileofs now %lld\n", fileofs); } if (sec) mo->elfsectomacho[s->sh_num] = numsec; @@ -799,7 +800,7 @@ ST_FUNC int macho_output_file(TCCState *s1, const char *filename) int fd, mode, file_type; FILE *fp; int ret = -1; - struct macho mo = {0,}; + struct macho mo = {}; file_type = s1->output_type; if (file_type == TCC_OUTPUT_OBJ) @@ -845,3 +846,145 @@ ST_FUNC int macho_output_file(TCCState *s1, const char *filename) fclose(fp); return ret; } + +#define FAT_MAGIC 0xcafebabe +#define FAT_CIGAM 0xbebafeca +#define FAT_MAGIC_64 0xcafebabf +#define FAT_CIGAM_64 0xbfbafeca + +struct fat_header { + uint32_t magic; /* FAT_MAGIC or FAT_MAGIC_64 */ + uint32_t nfat_arch; /* number of structs that follow */ +}; + +struct fat_arch { + int cputype; /* cpu specifier (int) */ + int cpusubtype; /* machine specifier (int) */ + uint32_t offset; /* file offset to this object file */ + uint32_t size; /* size of this object file */ + uint32_t align; /* alignment as a power of 2 */ +}; + +#define SWAP(x) (swap ? ntohl(x) : (x)) +ST_FUNC int macho_load_dll(TCCState *s1, int fd, const char *filename, int lev) +{ + unsigned char buf[sizeof(struct mach_header_64)]; + void *buf2; + uint32_t machofs = 0; + struct fat_header fh; + struct mach_header mh; + struct load_command *lc; + int i, swap = 0; + const char *soname = filename; + struct nlist_64 *symtab = 0; + uint32_t nsyms = 0; + char *strtab = 0; + uint32_t strsize = 0; + uint32_t iextdef = 0; + uint32_t nextdef = 0; + DLLReference *dllref; + + again: + if (full_read(fd, buf, sizeof(buf)) != sizeof(buf)) + return -1; + memcpy(&fh, buf, sizeof(fh)); + if (fh.magic == FAT_MAGIC || fh.magic == FAT_CIGAM) { + struct fat_arch *fa = load_data(fd, sizeof(fh), + fh.nfat_arch * sizeof(*fa)); + swap = fh.magic == FAT_CIGAM; + for (i = 0; i < SWAP(fh.nfat_arch); i++) + if (SWAP(fa[i].cputype) == 0x01000007 /* CPU_TYPE_X86_64 */ + && SWAP(fa[i].cpusubtype) == 3) /* CPU_SUBTYPE_X86_ALL */ + break; + if (i == SWAP(fh.nfat_arch)) { + tcc_free(fa); + return -1; + } + machofs = SWAP(fa[i].offset); + tcc_free(fa); + lseek(fd, machofs, SEEK_SET); + goto again; + } else if (fh.magic == FAT_MAGIC_64 || fh.magic == FAT_CIGAM_64) { + tcc_warning("%s: Mach-O fat 64bit files of type 0x%x not handled", + filename, fh.magic); + return -1; + } + + memcpy(&mh, buf, sizeof(mh)); + if (mh.magic != MH_MAGIC_64) + return -1; + dprintf("found Mach-O at %d\n", machofs); + buf2 = load_data(fd, machofs + sizeof(struct mach_header_64), mh.sizeofcmds); + for (i = 0, lc = buf2; i < mh.ncmds; i++) { + dprintf("lc %2d: 0x%08x\n", i, lc->cmd); + if (lc->cmd == LC_SYMTAB) { + struct symtab_command *sc = (struct symtab_command*)lc; + nsyms = sc->nsyms; + symtab = load_data(fd, machofs + sc->symoff, nsyms * sizeof(*symtab)); + strsize = sc->strsize; + strtab = load_data(fd, machofs + sc->stroff, strsize); + } else if (lc->cmd == LC_ID_DYLIB) { + struct dylib_command *dc = (struct dylib_command*)lc; + soname = (char*)lc + dc->name; + dprintf(" ID_DYLIB %d 0x%x 0x%x %s\n", + dc->timestamp, dc->current_version, + dc->compatibility_version, soname); + } else if (lc->cmd == LC_REEXPORT_DYLIB) { + struct dylib_command *dc = (struct dylib_command*)lc; + char *name = (char*)lc + dc->name; + dprintf(" REEXPORT %s\n", name); + int subfd = open(name, O_RDONLY | O_BINARY); + if (subfd < 0) + tcc_warning("can't open %s (reexported from %s)", name, filename); + else { + /* Hopefully the REEXPORTs never form a cycle, we don't check + for that! */ + macho_load_dll(s1, subfd, name, lev + 1); + close(subfd); + } + } else if (lc->cmd == LC_DYSYMTAB) { + struct dysymtab_command *dc = (struct dysymtab_command*)lc; + iextdef = dc->iextdefsym; + nextdef = dc->nextdefsym; + } + lc = (struct load_command*) ((char*)lc + lc->cmdsize); + } + + /* if the dll is already loaded, do not load it */ + for(i = 0; i < s1->nb_loaded_dlls; i++) { + dllref = s1->loaded_dlls[i]; + if (!strcmp(soname, dllref->name)) { + /* but update level if needed */ + if (lev < dllref->level) + dllref->level = lev; + goto the_end; + } + } + dllref = tcc_mallocz(sizeof(DLLReference) + strlen(soname)); + dllref->level = lev; + strcpy(dllref->name, soname); + dynarray_add(&s1->loaded_dlls, &s1->nb_loaded_dlls, dllref); + + if (!nsyms || !nextdef) + tcc_warning("%s doesn't export any symbols?", filename); + + //dprintf("symbols (all):\n"); + dprintf("symbols (exported):\n"); + dprintf(" n: typ sec desc value name\n"); + //for (i = 0; i < nsyms; i++) { + for (i = iextdef; i < iextdef + nextdef; i++) { + struct nlist_64 *sym = symtab + i; + dprintf("%5d: %3d %3d 0x%04x 0x%016llx %s\n", + i, sym->n_type, sym->n_sect, sym->n_desc, sym->n_value, + strtab + sym->n_strx); + set_elf_sym(s1->dynsymtab_section, 0, 0, + ELFW(ST_INFO)(STB_GLOBAL, STT_NOTYPE), + 0, SHN_UNDEF, strtab + sym->n_strx); + } + + the_end: + tcc_free(strtab); + tcc_free(symtab); + tcc_free(buf2); + return 0; +}