Index: sys/arm/broadcom/bcm2835/bcm2835_audio.c =================================================================== --- sys/arm/broadcom/bcm2835/bcm2835_audio.c +++ sys/arm/broadcom/bcm2835/bcm2835_audio.c @@ -116,6 +116,12 @@ uint64_t retrieved_samples; uint64_t underruns; int starved; + struct bcm_log_vars { + unsigned int bsize ; + int slept_for_lack_of_space ; + } log_vars; +#define DEFAULT_LOG_VALUES \ + ((struct bcm_log_vars) { .bsize = 0 , .slept_for_lack_of_space = 0 }) }; struct bcm2835_audio_info { @@ -135,6 +141,7 @@ uint32_t flags_pending; + int verbose_trace; /* Worker thread state */ int worker_state; }; @@ -143,6 +150,35 @@ #define BCM2835_AUDIO_LOCKED(sc) mtx_assert(&(sc)->lock, MA_OWNED) #define BCM2835_AUDIO_UNLOCK(sc) mtx_unlock(&(sc)->lock) +/* things that really have to be reported */ +#define REPORT_ERROR(sc,...) \ + do{ device_printf((sc)->dev,__VA_ARGS__); }while(0) +/* things that shouldn't clobber the output */ +#define INFORM_THAT(sc,...) \ + do { \ + if(sc->verbose_trace>0){ \ + device_printf((sc)->dev,__VA_ARGS__); \ + } \ + }while(0) +/* things that might clobber the output */ +#define WARN_THAT(sc,...) \ + do { \ + if(sc->verbose_trace>1){ \ + device_printf((sc)->dev,__VA_ARGS__); \ + } \ + }while(0) +/* things that are expected to (will) clobber the output */ +#define TRACE(sc,...) \ + do { \ + if(sc->verbose_trace>2){ \ + device_printf((sc)->dev,__VA_ARGS__); \ + } \ + }while(0) + +/* Useful for circular buffer calcs */ +#define MOD_DIFF(front,rear,mod) (((mod) + (front) - (rear)) % (mod)) + + static const char * dest_description(uint32_t dest) { @@ -216,10 +252,21 @@ m.type); } } else if (m.type == VC_AUDIO_MSG_TYPE_COMPLETE) { - struct bcm2835_audio_chinfo *ch = m.u.complete.cookie; + unsigned int signaled = 0; + struct bcm2835_audio_chinfo *ch ; +#if defined(__aarch64__) + ch = (void *) ((((size_t)m.u.complete.callback) << 32) + | ((size_t)m.u.complete.cookie)); +#else + ch = (void *) (m.u.complete.cookie); +#endif + int count = m.u.complete.count & 0xffff; int perr = (m.u.complete.count & (1U << 30)) != 0; + + TRACE(sc,"in:: count:0x%x perr:%d\n",m.u.complete.count,perr); + ch->callbacks++; if (perr) ch->underruns++; @@ -239,18 +286,41 @@ device_printf(sc->dev, "available_space == %d, count = %d, perr=%d\n", ch->available_space, count, perr); device_printf(sc->dev, - "retrieved_samples = %lld, submitted_samples = %lld\n", - ch->retrieved_samples, ch->submitted_samples); + "retrieved_samples = %ju, submitted_samples = %ju\n", + (uintmax_t) ch->retrieved_samples, (uintmax_t) ch->submitted_samples); } - ch->available_space += count; - ch->retrieved_samples += count; } - if (perr || (ch->available_space >= VCHIQ_AUDIO_PACKET_SIZE)) - cv_signal(&sc->worker_cv); + ch->available_space += count; + ch->retrieved_samples += count; + /* + * XXXMDC + * Experimental: if VC says it's empty, believe it + * Has to come after the usual adjustments + */ + if(perr){ + ch->available_space = VCHIQ_AUDIO_BUFFER_SIZE; + perr = ch->retrieved_samples; // shd be != 0 + } + + if ((ch->available_space >= 1*VCHIQ_AUDIO_PACKET_SIZE)){ + cv_signal(&sc->worker_cv); + signaled = 1; + } } BCM2835_AUDIO_UNLOCK(sc); + if(perr){ + WARN_THAT(sc, + "VC starved; reported %u for a total of %u\n" + "worker %s\n" , + count,perr, + (signaled ? "signaled": "not signaled") + ); + } } else - printf("%s: unknown m.type: %d\n", __func__, m.type); + WARN_THAT(sc, + "%s: unknown m.type: %d\n", + __func__, m.type + ); } /* VCHIQ stuff */ @@ -262,13 +332,13 @@ /* Initialize and create a VCHI connection */ status = vchi_initialise(&sc->vchi_instance); if (status != 0) { - printf("vchi_initialise failed: %d\n", status); + REPORT_ERROR(sc,"vchi_initialise failed: %d\n", status); return; } status = vchi_connect(NULL, 0, sc->vchi_instance); if (status != 0) { - printf("vchi_connect failed: %d\n", status); + REPORT_ERROR(sc,"vchi_connect failed: %d\n", status); return; } @@ -300,7 +370,7 @@ if (sc->vchi_handle != VCHIQ_SERVICE_HANDLE_INVALID) { success = vchi_service_close(sc->vchi_handle); if (success != 0) - printf("vchi_service_close failed: %d\n", success); + REPORT_ERROR(sc,"vchi_service_close failed: %d\n", success); vchi_service_release(sc->vchi_handle); sc->vchi_handle = VCHIQ_SERVICE_HANDLE_INVALID; } @@ -330,7 +400,10 @@ &m, sizeof m, VCHI_FLAGS_BLOCK_UNTIL_QUEUED, NULL); if (ret != 0) - printf("%s: vchi_msg_queue failed (err %d)\n", __func__, ret); + REPORT_ERROR(sc, + "%s: vchi_msg_queue failed (err %d)\n", + __func__, ret + ); } } @@ -345,11 +418,15 @@ m.type = VC_AUDIO_MSG_TYPE_STOP; m.u.stop.draining = 0; + INFORM_THAT(sc,"sending stop\n"); ret = vchi_msg_queue(sc->vchi_handle, &m, sizeof m, VCHI_FLAGS_BLOCK_UNTIL_QUEUED, NULL); if (ret != 0) - printf("%s: vchi_msg_queue failed (err %d)\n", __func__, ret); + REPORT_ERROR(sc, + "%s: vchi_msg_queue failed (err %d)\n", + __func__, ret + ); } } @@ -365,7 +442,10 @@ &m, sizeof m, VCHI_FLAGS_BLOCK_UNTIL_QUEUED, NULL); if (ret != 0) - printf("%s: vchi_msg_queue failed (err %d)\n", __func__, ret); + REPORT_ERROR(sc, + "%s: vchi_msg_queue failed (err %d)\n", + __func__, ret + ); } } @@ -387,7 +467,10 @@ &m, sizeof m, VCHI_FLAGS_BLOCK_UNTIL_QUEUED, NULL); if (ret != 0) - printf("%s: vchi_msg_queue failed (err %d)\n", __func__, ret); + REPORT_ERROR(sc, + "%s: vchi_msg_queue failed (err %d)\n", + __func__, ret + ); } } @@ -407,7 +490,10 @@ &m, sizeof m, VCHI_FLAGS_BLOCK_UNTIL_QUEUED, NULL); if (ret != 0) - printf("%s: vchi_msg_queue failed (err %d)\n", __func__, ret); + REPORT_ERROR(sc, + "%s: vchi_msg_queue failed (err %d)\n", + __func__, ret + ); } } @@ -415,18 +501,25 @@ bcm2835_audio_buffer_should_sleep(struct bcm2835_audio_chinfo *ch) { + ch->log_vars.slept_for_lack_of_space = 0; if (ch->playback_state != PLAYBACK_PLAYING) return (true); /* Not enough data */ - if (sndbuf_getready(ch->buffer) < VCHIQ_AUDIO_PACKET_SIZE) { - printf("starve\n"); + /* XXXMDC Take unsubmitted stuff into account */ + if (sndbuf_getready(ch->buffer) + - MOD_DIFF( + ch->unsubmittedptr, + sndbuf_getreadyptr(ch->buffer), + sndbuf_getsize(ch->buffer) + ) < VCHIQ_AUDIO_PACKET_SIZE) { ch->starved++; return (true); } /* Not enough free space */ if (ch->available_space < VCHIQ_AUDIO_PACKET_SIZE) { + ch->log_vars.slept_for_lack_of_space = 1; return (true); } @@ -447,22 +540,27 @@ m.type = VC_AUDIO_MSG_TYPE_WRITE; m.u.write.count = count; m.u.write.max_packet = VCHIQ_AUDIO_PACKET_SIZE; - m.u.write.callback = NULL; - m.u.write.cookie = ch; +#if defined(__aarch64__) + m.u.write.callback = (uint32_t)(((size_t) ch) >> 32) & 0xffffffff; + m.u.write.cookie = (uint32_t)(((size_t) ch) & 0xffffffff); +#else + m.u.write.callback = (uint32_t) NULL; + m.u.write.cookie = (uint32_t) ch; +#endif m.u.write.silence = 0; ret = vchi_msg_queue(sc->vchi_handle, &m, sizeof m, VCHI_FLAGS_BLOCK_UNTIL_QUEUED, NULL); if (ret != 0) - printf("%s: vchi_msg_queue failed (err %d)\n", __func__, ret); + REPORT_ERROR(sc,"%s: vchi_msg_queue failed (err %d)\n", __func__, ret); while (count > 0) { int bytes = MIN((int)m.u.write.max_packet, (int)count); ret = vchi_msg_queue(sc->vchi_handle, buf, bytes, VCHI_FLAGS_BLOCK_UNTIL_QUEUED, NULL); if (ret != 0) - printf("%s: vchi_msg_queue failed: %d\n", + REPORT_ERROR(sc,"%s: vchi_msg_queue failed: %d\n", __func__, ret); buf = (char *)buf + bytes; count -= bytes; @@ -494,6 +592,10 @@ while ((sc->flags_pending == 0) && bcm2835_audio_buffer_should_sleep(ch)) { cv_wait_sig(&sc->worker_cv, &sc->lock); + if((sc-> flags_pending == 0) + && ch->log_vars.slept_for_lack_of_space) { + TRACE(sc,"slept for lack of space\n"); + } } flags = sc->flags_pending; /* Clear pending flags */ @@ -520,16 +622,32 @@ BCM2835_AUDIO_LOCK(sc); bcm2835_audio_reset_channel(&sc->pch); ch->playback_state = PLAYBACK_IDLE; + long sub_total = ch->submitted_samples; + long retd = ch->retrieved_samples; BCM2835_AUDIO_UNLOCK(sc); + INFORM_THAT(sc, + "stopped audio. submitted a total of %lu " + "having been acked %lu\n", + sub_total, retd + ); continue; } /* Requested to start playback */ if ((flags & AUDIO_PLAY) && (ch->playback_state == PLAYBACK_IDLE)) { + INFORM_THAT(sc, + "starting audio\n" + ); + unsigned int bsize = sndbuf_getsize(ch->buffer); BCM2835_AUDIO_LOCK(sc); ch->playback_state = PLAYBACK_PLAYING; + ch->log_vars.bsize = bsize; BCM2835_AUDIO_UNLOCK(sc); + INFORM_THAT(sc, + "buffer size is %u\n", + bsize + ); bcm2835_audio_start(ch); } @@ -538,20 +656,69 @@ if (sndbuf_getready(ch->buffer) == 0) continue; - - count = sndbuf_getready(ch->buffer); + uint32_t i_count; + + /* XXXMDC Take unsubmitted stuff into account */ + count + = i_count + = sndbuf_getready(ch->buffer) + - MOD_DIFF( + ch->unsubmittedptr, + sndbuf_getreadyptr(ch->buffer), + sndbuf_getsize(ch->buffer) + ); size = sndbuf_getsize(ch->buffer); - readyptr = sndbuf_getreadyptr(ch->buffer); + readyptr = ch->unsubmittedptr; + int size_changed=0; + unsigned int available; BCM2835_AUDIO_LOCK(sc); - if (readyptr + count > size) + if(size != ch->log_vars.bsize){ + ch->log_vars.bsize = size; + size_changed = 1; + } + available = ch->available_space; + /* + * XXXMDC + * + * On arm64, got into situations where + * readyptr was less than a packet away + * from the end of the buffer, which led + * to count being set to 0 and, inexorably, starvation. + * Code below tries to take that into account. + * The problem might have been fixed with some of the + * other changes that were made in the meantime, + * but for now this works fine. + */ + if (readyptr + count > size){ count = size - readyptr; - count = min(count, ch->available_space); - count -= (count % VCHIQ_AUDIO_PACKET_SIZE); + } + if(count > ch->available_space){ + count = ch->available_space; + count -= (count % VCHIQ_AUDIO_PACKET_SIZE); + }else if (count > VCHIQ_AUDIO_PACKET_SIZE){ + count -= (count % VCHIQ_AUDIO_PACKET_SIZE); + }else if (size > count + readyptr) { + count = 0; + } BCM2835_AUDIO_UNLOCK(sc); - - if (count < VCHIQ_AUDIO_PACKET_SIZE) + if(count % VCHIQ_AUDIO_PACKET_SIZE != 0){ + WARN_THAT(sc, + "count: %u initial count: %u " + "size: %u readyptr: %u available: %u" + "\n", + count,i_count,size,readyptr, available); + } + if(size_changed) INFORM_THAT(sc,"bsize changed to %u\n",size); + + if (count == 0){ + WARN_THAT(sc, + "not enough room for a packet: count %d," + " i_count %d, rptr %d, size %d\n", + count, i_count, readyptr, size + ); continue; + } buf = (uint8_t*)sndbuf_getbuf(ch->buffer) + readyptr; @@ -560,8 +727,17 @@ ch->unsubmittedptr = (ch->unsubmittedptr + count) % sndbuf_getsize(ch->buffer); ch->available_space -= count; ch->submitted_samples += count; + long sub = count; + long sub_total = ch->submitted_samples; + long retd = ch->retrieved_samples; KASSERT(ch->available_space >= 0, ("ch->available_space == %d\n", ch->available_space)); BCM2835_AUDIO_UNLOCK(sc); + + TRACE(sc, + "submitted %lu for a total of %lu having been acked %lu; " + "rptr %d, had %u available \n", + sub, sub_total, retd, readyptr, available); + } BCM2835_AUDIO_LOCK(sc); @@ -580,7 +756,9 @@ sc->worker_state = WORKER_RUNNING; if (kproc_create(bcm2835_audio_worker, (void*)sc, &newp, 0, 0, "bcm2835_audio_worker") != 0) { - printf("failed to create bcm2835_audio_worker\n"); + REPORT_ERROR(sc, + "failed to create bcm2835_audio_worker\n" + ); } } @@ -613,6 +791,8 @@ return NULL; } + ch->log_vars = DEFAULT_LOG_VALUES; + BCM2835_AUDIO_LOCK(sc); bcm2835_worker_update_params(sc); BCM2835_AUDIO_UNLOCK(sc); @@ -833,6 +1013,9 @@ SYSCTL_ADD_INT(ctx, tree, OID_AUTO, "starved", CTLFLAG_RD, &sc->pch.starved, sc->pch.starved, "number of starved conditions"); + SYSCTL_ADD_INT(ctx, tree, OID_AUTO, "trace", + CTLFLAG_RW, &sc->verbose_trace, + sc->verbose_trace, "enable tracing of transfers"); } static void @@ -864,6 +1047,7 @@ bcm2835_audio_open(sc); sc->volume = 75; sc->dest = DEST_AUTO; + sc->verbose_trace = 0; if (mixer_init(sc->dev, &bcmmixer_class, sc)) { device_printf(sc->dev, "mixer_init failed\n"); Index: sys/arm/broadcom/bcm2835/vc_vchi_audioserv_defs.h =================================================================== --- sys/arm/broadcom/bcm2835/vc_vchi_audioserv_defs.h +++ sys/arm/broadcom/bcm2835/vc_vchi_audioserv_defs.h @@ -114,8 +114,8 @@ typedef struct { uint32_t count; /* in bytes */ - void *callback; - void *cookie; + uint32_t callback; + uint32_t cookie; uint16_t silence; uint16_t max_packet; } VC_AUDIO_WRITE_T; @@ -131,8 +131,8 @@ typedef struct { int32_t count; /* Success value */ - void *callback; - void *cookie; + uint32_t callback; + uint32_t cookie; } VC_AUDIO_COMPLETE_T; /* Message header for all messages in HOST->VC direction */ Index: sys/arm64/conf/GENERIC-VCHIQ =================================================================== --- /dev/null +++ sys/arm64/conf/GENERIC-VCHIQ @@ -0,0 +1,23 @@ +# +# GENERIC-VCHIQ +# +# Custom kernel for arm64 plus VCHIQ +# +# $FreeBSD$ + +#NO_UNIVERSE + +include GENERIC +ident GENERIC-VCHIQ + +device vchiq + +# If you want to have any chance of compiling this in a RPI Zero 2 +# uncomment the stuff below + +# nomakeoptions DEBUG +# nomakeoptions WITH_CTF +# nooptions DDB_CTF +# makeoptions MALLOC_PRODUCTION=1 + + Index: sys/contrib/vchiq/interface/compat/vchi_bsd.c =================================================================== --- sys/contrib/vchiq/interface/compat/vchi_bsd.c +++ sys/contrib/vchiq/interface/compat/vchi_bsd.c @@ -341,7 +341,6 @@ int ret ; ret = 0; - mtx_lock(&s->mtx); while (s->value == 0) { @@ -349,13 +348,11 @@ ret = cv_wait_sig(&s->cv, &s->mtx); s->waiters--; - if (ret == EINTR) { + /* XXXMDC As per its semaphore.c, linux can only return EINTR */ + if (ret) { mtx_unlock(&s->mtx); - return (-EINTR); + return -EINTR; } - - if (ret == ERESTART) - continue; } s->value--; @@ -442,8 +439,7 @@ int fatal_signal_pending(VCHIQ_THREAD_T thr) { - printf("Implement ME: %s\n", __func__); - return (0); + return (curproc_sigkilled()); } /* Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c =================================================================== --- sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c +++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c @@ -65,9 +65,24 @@ #define MAX_FRAGMENTS (VCHIQ_NUM_CURRENT_BULKS * 2) +/* + * XXXMDC + * Do this less ad-hoc-y -- e.g. + * https://github.com/raspberrypi/linux/commit/c683db8860a80562a2bb5b451d77b3e471d24f36 + */ +#if defined(__aarch64__) +int g_cache_line_size = 64; +#else int g_cache_line_size = 32; +#endif static int g_fragment_size; +unsigned int g_long_bulk_space = 0; +#define VM_PAGE_TO_VC_BULK_PAGE(x) (\ + g_long_bulk_space ? VM_PAGE_TO_PHYS(x)\ + : PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(x))\ +) + typedef struct vchiq_2835_state_struct { int inited; VCHIQ_ARM_STATE_T arm_state; @@ -113,6 +128,59 @@ *addr = PHYS_TO_VCBUS(segs[0].ds_addr); } +#if defined(__aarch64__) /* See comment in free_pagelist */ +static int +invalidate_cachelines_in_range_of_ppage( + vm_page_t p, + size_t offset, + size_t count +) +{ + if(offset + count > PAGE_SIZE){ return EINVAL; } + uint8_t *dst = (uint8_t*)pmap_quick_enter_page(p); + if (!dst){ + return ENOMEM; + } + cpu_dcache_inv_range((vm_offset_t)dst + offset, count); + pmap_quick_remove_page((vm_offset_t)dst); + return 0; +} + +/* XXXMDC bulk instead of loading and invalidating single pages? */ +static void +invalidate_cachelines_in_range_of_ppage_seq( + vm_page_t *p, + size_t start, + size_t count +) +{ + if(start >= PAGE_SIZE) goto invalid_input; + +#define _NEXT_AT(x,_m) (((x)+((_m)-1)) & ~((_m)-1)) /* for power of two m */ + size_t offset = _NEXT_AT(start,g_cache_line_size); +#undef _NEXT_AT + count = (offset < start + count) ? count - (offset - start) : 0; + offset = offset & (PAGE_SIZE - 1); + for( + size_t done = 0; + count > done; + p++, done += PAGE_SIZE - offset, offset = 0 + ){ + size_t in_page = PAGE_SIZE - offset; + size_t todo = (count-done > in_page) ? in_page : count-done; + int e = invalidate_cachelines_in_range_of_ppage(*p, offset, todo); + if(e != 0) + goto problem_in_loop; + } + return; + +problem_in_loop: +invalid_input: + WARN_ON(1); + return; +} +#endif + static int copyout_page(vm_page_t p, size_t offset, void *kaddr, size_t size) { @@ -171,7 +239,7 @@ goto failed_load; } - WARN_ON(((int)g_slot_mem & (PAGE_SIZE - 1)) != 0); + WARN_ON(((size_t)g_slot_mem & (PAGE_SIZE - 1)) != 0); vchiq_slot_zero = vchiq_init_slots(g_slot_mem, g_slot_mem_size); if (!vchiq_slot_zero) { @@ -204,8 +272,8 @@ bcm_mbox_write(BCM2835_MBOX_CHAN_VCHIQ, (unsigned int)g_slot_phys); vchiq_log_info(vchiq_arm_log_level, - "vchiq_init - done (slots %x, phys %x)", - (unsigned int)vchiq_slot_zero, g_slot_phys); + "vchiq_init - done (slots %zx, phys %zx)", + (size_t)vchiq_slot_zero, g_slot_phys); vchiq_call_connected_callbacks(); @@ -393,13 +461,14 @@ ** from increased speed as a result. */ + static int create_pagelist(char __user *buf, size_t count, unsigned short type, struct proc *p, BULKINFO_T *bi) { PAGELIST_T *pagelist; vm_page_t* pages; - unsigned long *addrs; + uint32_t *addrs; unsigned int num_pages, i; vm_offset_t offset; int pagelist_size; @@ -436,7 +505,7 @@ err = bus_dmamem_alloc(bi->pagelist_dma_tag, (void **)&pagelist, BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bi->pagelist_dma_map); - if (err) { + if (err || !pagelist) { vchiq_log_error(vchiq_core_log_level, "Unable to allocate pagelist memory"); err = -ENOMEM; goto failed_alloc; @@ -449,14 +518,12 @@ if (err) { vchiq_log_error(vchiq_core_log_level, "cannot load DMA map for pagelist memory"); err = -ENOMEM; + bi->pagelist = pagelist; goto failed_load; } vchiq_log_trace(vchiq_arm_log_level, - "create_pagelist - %x (%d bytes @%p)", (unsigned int)pagelist, count, buf); - - if (!pagelist) - return -ENOMEM; + "create_pagelist - %zx (%zu bytes @%p)", (size_t)pagelist, count, buf); addrs = pagelist->addrs; pages = (vm_page_t*)(addrs + num_pages); @@ -467,8 +534,9 @@ if (actual_pages != num_pages) { vm_page_unhold_pages(pages, actual_pages); - free(pagelist, M_VCPAGELIST); - return (-ENOMEM); + err = -ENOMEM; + bi->pagelist = pagelist; + goto failed_hold; } pagelist->length = count; @@ -477,27 +545,28 @@ /* Group the pages into runs of contiguous pages */ - base_addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[0])); + size_t run_ceil = g_long_bulk_space ? 0x100 : PAGE_SIZE; + unsigned int pg_addr_rshift = g_long_bulk_space ? 4 : 0; + base_addr = (void *) VM_PAGE_TO_VC_BULK_PAGE(pages[0]); next_addr = base_addr + PAGE_SIZE; addridx = 0; run = 0; - +#define _PG_BLOCK(base,run) \ + ((((size_t) (base)) >> pg_addr_rshift) & ~(run_ceil-1)) + (run) for (i = 1; i < num_pages; i++) { - addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[i])); - if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) { + addr = (void *)VM_PAGE_TO_VC_BULK_PAGE(pages[i]); + if ((addr == next_addr) && (run < run_ceil - 1)) { next_addr += PAGE_SIZE; run++; } else { - addrs[addridx] = (unsigned long)base_addr + run; - addridx++; + addrs[addridx++] = (uint32_t) _PG_BLOCK(base_addr,run); base_addr = addr; next_addr = addr + PAGE_SIZE; run = 0; } } - - addrs[addridx] = (unsigned long)base_addr + run; - addridx++; + addrs[addridx++] = _PG_BLOCK(base_addr, run); +#undef _PG_BLOCK /* Partial cache lines (fragments) require special measures */ if ((type == PAGELIST_READ) && @@ -519,12 +588,24 @@ g_free_fragments = *(char **) g_free_fragments; up(&g_free_fragments_mutex); pagelist->type = - PAGELIST_READ_WITH_FRAGMENTS + - (fragments - g_fragments_base)/g_fragment_size; + PAGELIST_READ_WITH_FRAGMENTS + + (fragments - g_fragments_base)/g_fragment_size; +#if defined(__aarch64__) + bus_dmamap_sync(bcm_slots_dma_tag, bcm_slots_dma_map, BUS_DMASYNC_PREREAD); +#endif } +#if defined(__aarch64__) + if(type == PAGELIST_READ){ + cpu_dcache_wbinv_range((vm_offset_t)buf,count); + }else{ + cpu_dcache_wb_range((vm_offset_t)buf,count); + } + dsb(sy); +#else pa = pmap_extract(PCPU_GET(curpmap), (vm_offset_t)buf); dcache_wbinv_poc((vm_offset_t)buf, pa, count); +#endif bus_dmamap_sync(bi->pagelist_dma_tag, bi->pagelist_dma_map, BUS_DMASYNC_PREWRITE); @@ -532,6 +613,8 @@ return 0; +failed_hold: + bus_dmamap_unload(bi->pagelist_dma_tag,bi->pagelist_dma_map); failed_load: bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map); failed_alloc: @@ -550,7 +633,7 @@ pagelist = bi->pagelist; vchiq_log_trace(vchiq_arm_log_level, - "free_pagelist - %x, %d (%lu bytes @%p)", (unsigned int)pagelist, actual, pagelist->length, bi->buf); + "free_pagelist - %zx, %d (%u bytes @%p)", (size_t)pagelist, actual, pagelist->length, bi->buf); num_pages = (pagelist->length + pagelist->offset + PAGE_SIZE - 1) / @@ -558,6 +641,27 @@ pages = (vm_page_t*)(pagelist->addrs + num_pages); +#if defined(__aarch64__) + /* + * On arm64, even if the user keeps their end of the bargain + * -- do NOT touch the buffers sent to VC -- but reads around the + * pagelist after the invalidation above, the arm might preemptively + * load (and validate) cache lines for areas inside the page list, + * so we must invalidate them again. + * + * The functional test does it and without this it doesn't pass. + * + * XXXMDC might it be enough to invalidate a couple of pages at + * the ends of the page list? + */ + if(pagelist->type >= PAGELIST_READ && actual > 0) + invalidate_cachelines_in_range_of_ppage_seq( + pages, + pagelist->offset, + actual + ); +#endif + /* Deal with any partial cache lines (fragments) */ if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { char *fragments = g_fragments_base + @@ -594,13 +698,18 @@ up(&g_free_fragments_sema); } - for (i = 0; i < num_pages; i++) { - if (pagelist->type != PAGELIST_WRITE) { + if (pagelist->type != PAGELIST_WRITE) { + for (i = 0; i < num_pages; i++) { vm_page_dirty(pages[i]); pagelist_page_free(pages[i]); } } +#if defined(__aarch64__) + /* XXXMDC necessary? */ + dsb(sy); +#endif + bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map); bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map); bus_dma_tag_destroy(bi->pagelist_dma_tag); Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_arm.c =================================================================== --- sys/contrib/vchiq/interface/vchiq_arm/vchiq_arm.c +++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_arm.c @@ -386,7 +386,7 @@ user_service_free(void *userdata) { USER_SERVICE_T *user_service = userdata; - + _sema_destroy(&user_service->insert_event); _sema_destroy(&user_service->remove_event); @@ -410,7 +410,7 @@ /* Wake the user-thread blocked in close_ or remove_service */ up(&user_service->close_event); - + user_service->close_pending = 0; } } @@ -442,8 +442,8 @@ #define _IOC_TYPE(x) IOCGROUP(x) vchiq_log_trace(vchiq_arm_log_level, - "vchiq_ioctl - instance %x, cmd %s, arg %p", - (unsigned int)instance, + "vchiq_ioctl - instance %zx, cmd %s, arg %p", + (size_t)instance, ((_IOC_TYPE(cmd) == VCHIQ_IOC_MAGIC) && (_IOC_NR(cmd) <= VCHIQ_IOC_MAX)) ? ioctl_names[_IOC_NR(cmd)] : "", arg); @@ -745,10 +745,11 @@ break; } vchiq_log_info(vchiq_arm_log_level, - "found bulk_waiter %x for pid %d", - (unsigned int)waiter, current->p_pid); + "found bulk_waiter %zx for pid %d", + (size_t)waiter, current->p_pid); args.userdata = &waiter->bulk_waiter; } + status = vchiq_bulk_transfer (args.handle, VCHI_MEM_HANDLE_INVALID, @@ -776,8 +777,8 @@ list_add(&waiter->list, &instance->bulk_waiter_list); lmutex_unlock(&instance->bulk_waiter_list_mutex); vchiq_log_info(vchiq_arm_log_level, - "saved bulk_waiter %x for pid %d", - (unsigned int)waiter, current->p_pid); + "saved bulk_waiter %zx for pid %d", + (size_t)waiter, current->p_pid); memcpy((void *) &(((VCHIQ_QUEUE_BULK_TRANSFER_T *) @@ -860,9 +861,9 @@ if (args.msgbufsize < msglen) { vchiq_log_error( vchiq_arm_log_level, - "header %x: msgbufsize" + "header %zx: msgbufsize" " %x < msglen %x", - (unsigned int)header, + (size_t)header, args.msgbufsize, msglen); WARN(1, "invalid message " @@ -1031,8 +1032,8 @@ ret = -EFAULT; } else { vchiq_log_error(vchiq_arm_log_level, - "header %x: bufsize %x < size %x", - (unsigned int)header, args.bufsize, + "header %zx: bufsize %x < size %x", + (size_t)header, args.bufsize, header->size); WARN(1, "invalid size\n"); ret = -EMSGSIZE; @@ -1093,7 +1094,7 @@ } break; case VCHIQ_IOC_LIB_VERSION: { - unsigned int lib_version = (unsigned int)arg; + size_t lib_version = (size_t)arg; if (lib_version < VCHIQ_VERSION_MIN) ret = -EINVAL; @@ -1155,18 +1156,14 @@ return ret; } -static void -instance_dtr(void *data) -{ - kfree(data); -} /**************************************************************************** * * vchiq_open * ***************************************************************************/ +static void instance_dtr(void *data); static int vchiq_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td) @@ -1206,7 +1203,7 @@ INIT_LIST_HEAD(&instance->bulk_waiter_list); devfs_set_cdevpriv(instance, instance_dtr); - } + } else { vchiq_log_error(vchiq_arm_log_level, "Unknown minor device"); @@ -1222,143 +1219,151 @@ * ***************************************************************************/ + static int -vchiq_close(struct cdev *dev, int flags __unused, int fmt __unused, - struct thread *td) +_vchiq_close_instance(VCHIQ_INSTANCE_T instance) { int ret = 0; - if (1) { - VCHIQ_INSTANCE_T instance; - VCHIQ_STATE_T *state = vchiq_get_state(); - VCHIQ_SERVICE_T *service; - int i; - - if ((ret = devfs_get_cdevpriv((void**)&instance))) { - printf("devfs_get_cdevpriv failed: error %d\n", ret); - return (ret); - } - - vchiq_log_info(vchiq_arm_log_level, - "vchiq_release: instance=%lx", - (unsigned long)instance); - - if (!state) { - ret = -EPERM; - goto out; - } + VCHIQ_STATE_T *state = vchiq_get_state(); + VCHIQ_SERVICE_T *service; + int i; - /* Ensure videocore is awake to allow termination. */ - vchiq_use_internal(instance->state, NULL, - USE_TYPE_VCHIQ); + vchiq_log_info(vchiq_arm_log_level, + "vchiq_release: instance=%lx", + (unsigned long)instance); - lmutex_lock(&instance->completion_mutex); + if (!state) { + ret = -EPERM; + goto out; + } - /* Wake the completion thread and ask it to exit */ - instance->closing = 1; - up(&instance->insert_event); + /* Ensure videocore is awake to allow termination. */ + vchiq_use_internal(instance->state, NULL, + USE_TYPE_VCHIQ); - lmutex_unlock(&instance->completion_mutex); + lmutex_lock(&instance->completion_mutex); - /* Wake the slot handler if the completion queue is full. */ - up(&instance->remove_event); + /* Wake the completion thread and ask it to exit */ + instance->closing = 1; + up(&instance->insert_event); - /* Mark all services for termination... */ - i = 0; - while ((service = next_service_by_instance(state, instance, - &i)) != NULL) { - USER_SERVICE_T *user_service = service->base.userdata; + lmutex_unlock(&instance->completion_mutex); - /* Wake the slot handler if the msg queue is full. */ - up(&user_service->remove_event); + /* Wake the slot handler if the completion queue is full. */ + up(&instance->remove_event); - vchiq_terminate_service_internal(service); - unlock_service(service); - } + /* Mark all services for termination... */ + i = 0; + while ((service = next_service_by_instance(state, instance, + &i)) != NULL) { + USER_SERVICE_T *user_service = service->base.userdata; - /* ...and wait for them to die */ - i = 0; - while ((service = next_service_by_instance(state, instance, &i)) - != NULL) { - USER_SERVICE_T *user_service = service->base.userdata; + /* Wake the slot handler if the msg queue is full. */ + up(&user_service->remove_event); - down(&service->remove_event); + vchiq_terminate_service_internal(service); + unlock_service(service); + } - BUG_ON(service->srvstate != VCHIQ_SRVSTATE_FREE); + /* ...and wait for them to die */ + i = 0; + while ((service = next_service_by_instance(state, instance, &i)) + != NULL) { + USER_SERVICE_T *user_service = service->base.userdata; - spin_lock(&msg_queue_spinlock); + down(&service->remove_event); - while (user_service->msg_remove != - user_service->msg_insert) { - VCHIQ_HEADER_T *header = user_service-> - msg_queue[user_service->msg_remove & - (MSG_QUEUE_SIZE - 1)]; - user_service->msg_remove++; - spin_unlock(&msg_queue_spinlock); + BUG_ON(service->srvstate != VCHIQ_SRVSTATE_FREE); - if (header) - vchiq_release_message( - service->handle, - header); - spin_lock(&msg_queue_spinlock); - } + spin_lock(&msg_queue_spinlock); + while (user_service->msg_remove != + user_service->msg_insert) { + VCHIQ_HEADER_T *header = user_service-> + msg_queue[user_service->msg_remove & + (MSG_QUEUE_SIZE - 1)]; + user_service->msg_remove++; spin_unlock(&msg_queue_spinlock); - unlock_service(service); + if (header) + vchiq_release_message( + service->handle, + header); + spin_lock(&msg_queue_spinlock); } - /* Release any closed services */ - while (instance->completion_remove != - instance->completion_insert) { - VCHIQ_COMPLETION_DATA_T *completion; - VCHIQ_SERVICE_T *service1; - completion = &instance->completions[ - instance->completion_remove & - (MAX_COMPLETIONS - 1)]; - service1 = completion->service_userdata; - if (completion->reason == VCHIQ_SERVICE_CLOSED) - { - USER_SERVICE_T *user_service = - service->base.userdata; - - /* Wake any blocked user-thread */ - if (instance->use_close_delivered) - up(&user_service->close_event); - unlock_service(service1); - } - instance->completion_remove++; - } + spin_unlock(&msg_queue_spinlock); - /* Release the PEER service count. */ - vchiq_release_internal(instance->state, NULL); + unlock_service(service); + } + /* Release any closed services */ + while (instance->completion_remove != + instance->completion_insert) { + VCHIQ_COMPLETION_DATA_T *completion; + VCHIQ_SERVICE_T *service; + completion = &instance->completions[ + instance->completion_remove & + (MAX_COMPLETIONS - 1)]; + service = completion->service_userdata; + if (completion->reason == VCHIQ_SERVICE_CLOSED) { - struct list_head *pos, *next; - list_for_each_safe(pos, next, - &instance->bulk_waiter_list) { - struct bulk_waiter_node *waiter; - waiter = list_entry(pos, - struct bulk_waiter_node, - list); - list_del(pos); - vchiq_log_info(vchiq_arm_log_level, - "bulk_waiter - cleaned up %x " - "for pid %d", - (unsigned int)waiter, waiter->pid); - _sema_destroy(&waiter->bulk_waiter.event); - kfree(waiter); - } - } + USER_SERVICE_T *user_service = + service->base.userdata; + /* Wake any blocked user-thread */ + if (instance->use_close_delivered) + up(&user_service->close_event); + + unlock_service(service); + } + instance->completion_remove++; } - else { - vchiq_log_error(vchiq_arm_log_level, - "Unknown minor device"); - ret = -ENXIO; + + /* Release the PEER service count. */ + vchiq_release_internal(instance->state, NULL); + + { + struct list_head *pos, *next; + list_for_each_safe(pos, next, + &instance->bulk_waiter_list) { + struct bulk_waiter_node *waiter; + waiter = list_entry(pos, + struct bulk_waiter_node, + list); + list_del(pos); + vchiq_log_info(vchiq_arm_log_level, + "bulk_waiter - cleaned up %zx " + "for pid %d", + (size_t)waiter, waiter->pid); + _sema_destroy(&waiter->bulk_waiter.event); + kfree(waiter); + } } out: return ret; + +} + +static void +instance_dtr(void *data) +{ + VCHIQ_INSTANCE_T instance = data; + _vchiq_close_instance(instance); + kfree(data); +} + +static int +vchiq_close(struct cdev *dev, int flags __unused, int fmt __unused, + struct thread *td) +{ + + /* XXXMDC it's privdata that tracks opens */ + /* XXXMDC only get closes when there are no more open fds on a vnode */ + + return(0); + } /**************************************************************************** @@ -1435,9 +1440,9 @@ instance = service->instance; if (instance && !instance->mark) { len = snprintf(buf, sizeof(buf), - "Instance %x: pid %d,%s completions " + "Instance %zx: pid %d,%s completions " "%d/%d", - (unsigned int)instance, instance->pid, + (size_t)instance, instance->pid, instance->connected ? " connected, " : "", instance->completion_insert - @@ -1465,8 +1470,8 @@ char buf[80]; int len; - len = snprintf(buf, sizeof(buf), " instance %x", - (unsigned int)service->instance); + len = snprintf(buf, sizeof(buf), " instance %zx", + (size_t)service->instance); if ((service->base.callback == service_callback) && user_service->is_vchi) { Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.h =================================================================== --- sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.h +++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.h @@ -184,12 +184,21 @@ #if VCHIQ_ENABLE_DEBUG #define DEBUG_INITIALISE(local) int *debug_ptr = (local)->debug; +#if defined(__aarch64__) +#define DEBUG_TRACE(d) \ + do { debug_ptr[DEBUG_ ## d] = __LINE__; dsb(sy); } while (0) +#define DEBUG_VALUE(d, v) \ + do { debug_ptr[DEBUG_ ## d] = (v); dsb(sy); } while (0) +#define DEBUG_COUNT(d) \ + do { debug_ptr[DEBUG_ ## d]++; dsb(sy); } while (0) +#else #define DEBUG_TRACE(d) \ do { debug_ptr[DEBUG_ ## d] = __LINE__; dsb(); } while (0) #define DEBUG_VALUE(d, v) \ do { debug_ptr[DEBUG_ ## d] = (v); dsb(); } while (0) #define DEBUG_COUNT(d) \ do { debug_ptr[DEBUG_ ## d]++; dsb(); } while (0) +#endif #else /* VCHIQ_ENABLE_DEBUG */ @@ -265,7 +274,7 @@ typedef struct remote_event_struct { int armed; int fired; - struct semaphore *event; + uint32_t event; } REMOTE_EVENT_T; typedef struct opaque_platform_state_t *VCHIQ_PLATFORM_STATE_T; Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.c =================================================================== --- sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.c +++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.c @@ -417,13 +417,15 @@ vchiq_platform_conn_state_changed(state, oldstate, newstate); } +#define ACTUAL_EVENT_SEM_ADDR(ref,offset)\ + ((struct semaphore *)(((size_t) ref) + ((size_t) offset))) static inline void -remote_event_create(REMOTE_EVENT_T *event) +remote_event_create(VCHIQ_STATE_T *ref, REMOTE_EVENT_T *event) { event->armed = 0; /* Don't clear the 'fired' flag because it may already have been set ** by the other side. */ - _sema_init(event->event, 0); + _sema_init(ACTUAL_EVENT_SEM_ADDR(ref,event->event), 0); } __unused static inline void @@ -433,13 +435,18 @@ } static inline int -remote_event_wait(REMOTE_EVENT_T *event) +remote_event_wait(VCHIQ_STATE_T *ref, REMOTE_EVENT_T *event) { if (!event->fired) { event->armed = 1; +#if defined(__aarch64__) + dsb(sy); +#else dsb(); +#endif + if (!event->fired) { - if (down_interruptible(event->event) != 0) { + if (down_interruptible(ACTUAL_EVENT_SEM_ADDR(ref,event->event)) != 0) { event->armed = 0; return 0; } @@ -453,26 +460,32 @@ } static inline void -remote_event_signal_local(REMOTE_EVENT_T *event) +remote_event_signal_local(VCHIQ_STATE_T *ref, REMOTE_EVENT_T *event) { +/* + * Mirror + * https://github.com/raspberrypi/linux/commit/a50c4c9a65779ca835746b5fd79d3d5278afbdbe + * for extra safety + */ + event->fired = 1; event->armed = 0; - up(event->event); + up(ACTUAL_EVENT_SEM_ADDR(ref,event->event)); } static inline void -remote_event_poll(REMOTE_EVENT_T *event) +remote_event_poll(VCHIQ_STATE_T *ref, REMOTE_EVENT_T *event) { if (event->fired && event->armed) - remote_event_signal_local(event); + remote_event_signal_local(ref,event); } void remote_event_pollall(VCHIQ_STATE_T *state) { - remote_event_poll(&state->local->sync_trigger); - remote_event_poll(&state->local->sync_release); - remote_event_poll(&state->local->trigger); - remote_event_poll(&state->local->recycle); + remote_event_poll(state , &state->local->sync_trigger); + remote_event_poll(state , &state->local->sync_release); + remote_event_poll(state , &state->local->trigger); + remote_event_poll(state , &state->local->recycle); } /* Round up message sizes so that any space at the end of a slot is always big @@ -553,7 +566,7 @@ wmb(); /* ... and ensure the slot handler runs. */ - remote_event_signal_local(&state->local->trigger); + remote_event_signal_local(state, &state->local->trigger); } /* Called from queue_message, by the slot handler and application threads, @@ -1016,7 +1029,7 @@ (lmutex_lock_interruptible(&state->sync_mutex) != 0)) return VCHIQ_RETRY; - remote_event_wait(&local->sync_release); + remote_event_wait(state, &local->sync_release); rmb(); @@ -1097,9 +1110,6 @@ size); } - /* Make sure the new header is visible to the peer. */ - wmb(); - remote_event_signal(&state->remote->sync_trigger); if (VCHIQ_MSG_TYPE(msgid) != VCHIQ_MSG_PAUSE) @@ -1824,8 +1834,17 @@ state->slot_data)->version; up(&state->connect); break; +/* + * XXXMDC Apparently nothing uses this + * https://github.com/raspberrypi/linux/commit/14f4d72fb799a9b3170a45ab80d4a3ddad541960 + * but taking out the master bits is a whole new job + */ case VCHIQ_MSG_BULK_RX: - case VCHIQ_MSG_BULK_TX: { + case VCHIQ_MSG_BULK_TX: + WARN_ON(1); + break; +#if 0 + { VCHIQ_BULK_QUEUE_T *queue; WARN_ON(!state->is_master); queue = (type == VCHIQ_MSG_BULK_RX) ? @@ -1887,9 +1906,11 @@ lmutex_unlock(&service->bulk_mutex); if (resolved) notify_bulks(service, queue, - 1/*retry_poll*/); + 1//retry_poll + ); } - } break; + } +#endif case VCHIQ_MSG_BULK_RX_DONE: case VCHIQ_MSG_BULK_TX_DONE: WARN_ON(state->is_master); @@ -2050,7 +2071,7 @@ while (1) { DEBUG_COUNT(SLOT_HANDLER_COUNT); DEBUG_TRACE(SLOT_HANDLER_LINE); - remote_event_wait(&local->trigger); + remote_event_wait(state, &local->trigger); rmb(); @@ -2140,8 +2161,7 @@ VCHIQ_SHARED_STATE_T *local = state->local; while (1) { - remote_event_wait(&local->recycle); - + remote_event_wait(state, &local->recycle); process_free_queue(state); } return 0; @@ -2164,7 +2184,7 @@ int type; unsigned int localport, remoteport; - remote_event_wait(&local->sync_trigger); + remote_event_wait(state, &local->sync_trigger); rmb(); @@ -2281,7 +2301,7 @@ VCHIQ_SLOT_ZERO_T * vchiq_init_slots(void *mem_base, int mem_size) { - int mem_align = (VCHIQ_SLOT_SIZE - (int)mem_base) & VCHIQ_SLOT_MASK; + int mem_align = (int)((VCHIQ_SLOT_SIZE - (long)mem_base) & VCHIQ_SLOT_MASK); VCHIQ_SLOT_ZERO_T *slot_zero = (VCHIQ_SLOT_ZERO_T *)((char *)mem_base + mem_align); int num_slots = (mem_size - mem_align)/VCHIQ_SLOT_SIZE; @@ -2477,24 +2497,24 @@ state->data_use_count = 0; state->data_quota = state->slot_queue_available - 1; - local->trigger.event = &state->trigger_event; - remote_event_create(&local->trigger); + local->trigger.event = offsetof(VCHIQ_STATE_T, trigger_event); + remote_event_create(state, &local->trigger); local->tx_pos = 0; - local->recycle.event = &state->recycle_event; - remote_event_create(&local->recycle); + local->recycle.event = offsetof(VCHIQ_STATE_T, recycle_event); + remote_event_create(state, &local->recycle); local->slot_queue_recycle = state->slot_queue_available; - local->sync_trigger.event = &state->sync_trigger_event; - remote_event_create(&local->sync_trigger); + local->sync_trigger.event = offsetof(VCHIQ_STATE_T, sync_trigger_event); + remote_event_create(state, &local->sync_trigger); - local->sync_release.event = &state->sync_release_event; - remote_event_create(&local->sync_release); + local->sync_release.event = offsetof(VCHIQ_STATE_T, sync_release_event); + remote_event_create(state, &local->sync_release); /* At start-of-day, the slot is empty and available */ ((VCHIQ_HEADER_T *)SLOT_DATA_FROM_INDEX(state, local->slot_sync))->msgid = VCHIQ_MSGID_PADDING; - remote_event_signal_local(&local->sync_release); + remote_event_signal_local(state, &local->sync_release); local->debug[DEBUG_ENTRIES] = DEBUG_MAX; @@ -3381,7 +3401,7 @@ (dir == VCHIQ_BULK_TRANSMIT) ? VCHIQ_POLL_TXNOTIFY : VCHIQ_POLL_RXNOTIFY); } else { - int payload[2] = { (int)bulk->data, bulk->size }; + uint32_t payload[2] = { (uint32_t)(uintptr_t)bulk->data, bulk->size }; VCHIQ_ELEMENT_T element = { payload, sizeof(payload) }; status = queue_message(state, NULL, @@ -3525,7 +3545,6 @@ release_message_sync(VCHIQ_STATE_T *state, VCHIQ_HEADER_T *header) { header->msgid = VCHIQ_MSGID_PADDING; - wmb(); remote_event_signal(&state->remote->sync_release); } Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c =================================================================== --- sys/contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c +++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c @@ -47,7 +47,11 @@ #include #include +/* XXXMDC Is this necessary at all? */ +#if defined(__aarch64__) +#else #include +#endif #include "vchiq_arm.h" #include "vchiq_2835.h" @@ -78,13 +82,31 @@ static struct bcm_vchiq_softc *bcm_vchiq_sc = NULL; -#define BSD_DTB 1 -#define UPSTREAM_DTB 2 + +#define CONFIG_INVALID 0 +#define CONFIG_VALID 1 << 0 +#define BSD_REG_ADDRS 1 << 1 +#define LONG_BULK_SPACE 1 << 2 + +/* + * Also controls the use of the standard VC address offset for bulk data DMA + * (normal bulks use that offset; bulks for long address spaces use physical + * page addresses) + */ +extern unsigned int g_long_bulk_space; + + +/* + * XXXMDC + * The man page for ofw_bus_is_compatible describes ``features'' + * as ``can be used''. Here we use understand them as ``must be used'' + */ + static struct ofw_compat_data compat_data[] = { - {"broadcom,bcm2835-vchiq", BSD_DTB}, - {"brcm,bcm2835-vchiq", UPSTREAM_DTB}, - {"brcm,bcm2711-vchiq", UPSTREAM_DTB}, - {NULL, 0} + {"broadcom,bcm2835-vchiq", BSD_REG_ADDRS | CONFIG_VALID}, + {"brcm,bcm2835-vchiq", CONFIG_VALID}, + {"brcm,bcm2711-vchiq", LONG_BULK_SPACE | CONFIG_VALID}, + {NULL, CONFIG_INVALID} }; #define vchiq_read_4(reg) \ @@ -119,13 +141,23 @@ void remote_event_signal(REMOTE_EVENT_T *event) { - event->fired = 1; + wmb(); + + event->fired = 1; /* The test on the next line also ensures the write on the previous line has completed */ + /* UPDATE: not on arm64, it would seem... */ +#if defined(__aarch64__) + dsb(sy); +#endif if (event->armed) { /* trigger vc interrupt */ +#if defined(__aarch64__) + dsb(sy); +#else dsb(); +#endif vchiq_write_4(0x48, 0); } } @@ -134,13 +166,17 @@ bcm_vchiq_probe(device_t dev) { - if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) + if ((ofw_bus_search_compatible(dev, compat_data)->ocd_data & CONFIG_VALID) == 0) return (ENXIO); device_set_desc(dev, "BCM2835 VCHIQ"); return (BUS_PROBE_DEFAULT); } +/* debug_sysctl */ +extern int vchiq_core_log_level; +extern int vchiq_arm_log_level; + static int bcm_vchiq_attach(device_t dev) { @@ -168,14 +204,36 @@ return (ENXIO); } - if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == UPSTREAM_DTB) + uintptr_t dev_compat_d = ofw_bus_search_compatible(dev, compat_data)->ocd_data; + /* XXXMDC: shouldn't happen (checked for in probe)--but, for symmetry */ + if ((dev_compat_d & CONFIG_VALID) == 0){ + device_printf(dev, "attempting to attach using invalid config.\n"); + bus_release_resource(dev, SYS_RES_IRQ, rid, sc->irq_res); + return (EINVAL); + } + if ((dev_compat_d & BSD_REG_ADDRS) == 0) sc->regs_offset = -0x40; + if(dev_compat_d & LONG_BULK_SPACE) + g_long_bulk_space = 1; node = ofw_bus_get_node(dev); if ((OF_getencprop(node, "cache-line-size", &cell, sizeof(cell))) > 0) g_cache_line_size = cell; vchiq_core_initialize(); + + /* debug_sysctl */ + struct sysctl_ctx_list *ctx_l = device_get_sysctl_ctx(dev); + struct sysctl_oid *tree_node = device_get_sysctl_tree(dev); + struct sysctl_oid_list *tree = SYSCTL_CHILDREN(tree_node); + SYSCTL_ADD_INT( + ctx_l, tree, OID_AUTO, "log", CTLFLAG_RW, + &vchiq_core_log_level, vchiq_core_log_level, "log level" + ); + SYSCTL_ADD_INT( + ctx_l, tree, OID_AUTO, "arm_log", CTLFLAG_RW, + &vchiq_arm_log_level, vchiq_arm_log_level, "arm log level" + ); /* Setup and enable the timer */ if (bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE, Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_pagelist.h =================================================================== --- sys/contrib/vchiq/interface/vchiq_arm/vchiq_pagelist.h +++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_pagelist.h @@ -42,10 +42,10 @@ #define PAGELIST_READ_WITH_FRAGMENTS 2 typedef struct pagelist_struct { - unsigned long length; - unsigned short type; - unsigned short offset; - unsigned long addrs[1]; /* N.B. 12 LSBs hold the number of following + uint32_t length; + uint16_t type; + uint16_t offset; + uint32_t addrs[1]; /* N.B. 12 LSBs hold the number of following pages at consecutive addresses. */ } PAGELIST_T; Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_shim.c =================================================================== --- sys/contrib/vchiq/interface/vchiq_arm/vchiq_shim.c +++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_shim.c @@ -398,7 +398,7 @@ ***********************************************************/ int32_t vchi_held_msg_release(VCHI_HELD_MSG_T *message) { - vchiq_release_message((VCHIQ_SERVICE_HANDLE_T)message->service, + vchiq_release_message((VCHIQ_SERVICE_HANDLE_T)(size_t)message->service, (VCHIQ_HEADER_T *)message->message); return 0; @@ -444,7 +444,7 @@ *msg_size = header->size; message_handle->service = - (struct opaque_vchi_service_t *)service->handle; + (struct opaque_vchi_service_t *)(unsigned long)service->handle; message_handle->message = header; return 0;