Index: sys/arm64/coresight/coresight.h
===================================================================
--- sys/arm64/coresight/coresight.h
+++ sys/arm64/coresight/coresight.h
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
@@ -60,7 +60,8 @@
 
 enum cs_dev_type {
 	CORESIGHT_ETMV4,
-	CORESIGHT_TMC,
+	CORESIGHT_TMC_ETF,
+	CORESIGHT_TMC_ETR,
 	CORESIGHT_DYNAMIC_REPLICATOR,
 	CORESIGHT_FUNNEL,
 	CORESIGHT_CPU_DEBUG,
@@ -125,9 +126,10 @@
 	uint32_t low;
 	uint32_t high;
 	uint32_t bufsize;
-	uint32_t flags;
-#define	ETR_FLAG_ALLOCATE	(1 << 0)
-#define	ETR_FLAG_RELEASE	(1 << 1)
+	vm_page_t *pages;
+	int npages;
+	int curpage;
+	vm_offset_t curpage_offset;
 };
 
 struct coresight_event {
@@ -154,11 +156,14 @@
 struct coresight_platform_data *coresight_fdt_get_platform_data(device_t dev);
 struct coresight_platform_data *coresight_acpi_get_platform_data(device_t dev);
 struct endpoint * coresight_get_output_endpoint(struct coresight_platform_data *pdata);
-struct coresight_device * coresight_get_output_device(struct endpoint *endp, struct endpoint **);
+struct coresight_device * coresight_get_output_device(struct coresight_device *cs_dev, struct endpoint *endp, struct endpoint **);
 int coresight_register(struct coresight_desc *desc);
 int coresight_init_event(int cpu, struct coresight_event *event);
+void coresight_start(int cpu, struct coresight_event *event);
+void coresight_stop(int cpu, struct coresight_event *event);
 void coresight_enable(int cpu, struct coresight_event *event);
 void coresight_disable(int cpu, struct coresight_event *event);
 void coresight_read(int cpu, struct coresight_event *event);
+void coresight_dump(int cpu, struct coresight_event *event);
 
 #endif /* !_ARM64_CORESIGHT_CORESIGHT_H_ */
Index: sys/arm64/coresight/coresight.c
===================================================================
--- sys/arm64/coresight/coresight.c
+++ sys/arm64/coresight/coresight.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
@@ -42,10 +42,132 @@
 #include 
 
 #include 
+#include 
+
+#define	CORESIGHT_DEBUG
+#undef CORESIGHT_DEBUG
+
+#ifdef CORESIGHT_DEBUG
+#define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
+#else
+#define	dprintf(fmt, ...)
+#endif
 
 static struct mtx cs_mtx;
 struct coresight_device_list cs_devs;
 
+static struct hwt_backend backend;
+static struct coresight_event cs_event[MAXCPU];
+
+static void
+coresight_event_init(struct hwt_context *hwt)
+{
+	struct coresight_event *event;
+
+	dprintf("%s: cpu_id %d\n", __func__, hwt->cpu_id);
+
+	event = &cs_event[hwt->cpu_id];
+	memset(event, 0, sizeof(struct coresight_event));
+	event->etr.started = 0;
+	event->etr.low = 0;
+	event->etr.high = 0;
+	event->etr.pages = hwt->pages;
+	event->etr.npages = hwt->npages;
+	event->etr.bufsize = hwt->npages * PAGE_SIZE;
+	event->excp_level = 1; /* Kernel */
+	event->excp_level = 0; /* User level */
+	event->src = CORESIGHT_ETMV4;
+	event->sink = CORESIGHT_TMC_ETR;
+
+	/*
+	 * Set the trace ID required for ETM component.
+	 * TODO: this should be derived from pmctrace.
+	 */
+
+	event->etm.trace_id = 0x10;
+	coresight_init_event(hwt->cpu_id, event);
+}
+
+static void
+coresight_event_start(struct hwt_context *hwt)
+{
+	struct coresight_event *event;
+
+	dprintf("%s: cpu_id %d\n", __func__, hwt->cpu_id);
+
+	event = &cs_event[hwt->cpu_id];
+
+	coresight_start(hwt->cpu_id, event);
+}
+
+static void
+coresight_event_stop(struct hwt_context *hwt)
+{
+	struct coresight_event *event;
+
+	event = &cs_event[hwt->cpu_id];
+
+	coresight_stop(hwt->cpu_id, event);
+}
+
+static void
+coresight_event_enable(struct hwt_context *hwt)
+{
+	struct coresight_event *event;
+
+	event = &cs_event[hwt->cpu_id];
+
+	coresight_enable(hwt->cpu_id, event);
+}
+
+static void
+coresight_event_disable(struct hwt_context *hwt)
+{
+	struct coresight_event *event;
+
+	event = &cs_event[hwt->cpu_id];
+
+	coresight_disable(hwt->cpu_id, event);
+}
+
+static void
+coresight_event_dump(struct hwt_context *hwt)
+{
+	struct coresight_event *event;
+
+	event = &cs_event[hwt->cpu_id];
+
+	coresight_dump(hwt->cpu_id, event);
+}
+
+static int
+coresight_event_read(struct hwt_context *hwt, int *curpage,
+    vm_offset_t *curpage_offset)
+{
+	struct coresight_event *event;
+
+	event = &cs_event[hwt->cpu_id];
+
+	KASSERT(event != NULL, ("No event found"));
+
+	coresight_read(hwt->cpu_id, event);
+
+	*curpage = event->etr.curpage;
+	*curpage_offset = event->etr.curpage_offset;
+
+	return (0);
+}
+
+static struct hwt_backend_ops coresight_ops = {
+	.hwt_event_init = coresight_event_init,
+	.hwt_event_start = coresight_event_start,
+	.hwt_event_stop = coresight_event_stop,
+	.hwt_event_enable = coresight_event_enable,
+	.hwt_event_disable = coresight_event_disable,
+	.hwt_event_dump = coresight_event_dump,
+	.hwt_event_read = coresight_event_read,
+};
+
 int
 coresight_register(struct coresight_desc *desc)
 {
@@ -61,6 +183,11 @@
 	TAILQ_INSERT_TAIL(&cs_devs, cs_dev, link);
 	mtx_unlock(&cs_mtx);
 
+	if (desc->dev_type == CORESIGHT_TMC_ETR) {
+		backend.ops = &coresight_ops;
+		hwt_register(&backend);
+	}
+
 	return (0);
 }
 
@@ -81,7 +208,8 @@
 }
 
 struct coresight_device *
-coresight_get_output_device(struct endpoint *endp, struct endpoint **out_endp)
+coresight_get_output_device(struct coresight_device *cs_dev0,
+    struct endpoint *endp, struct endpoint **out_endp)
 {
 	struct coresight_platform_data *pdata;
 	struct coresight_device *cs_dev;
@@ -94,7 +222,11 @@
 			case CORESIGHT_BUS_FDT:
 #ifdef FDT
 				if (endp->their_node == endp2->my_node) {
-					*out_endp = endp2;
+					*out_endp =
+					    malloc(sizeof(struct endpoint),
+						M_CORESIGHT, M_WAITOK | M_ZERO);
+					memcpy(*out_endp, endp2,
+					    sizeof(struct endpoint));
 					return (cs_dev);
 				}
 #endif
@@ -103,7 +235,11 @@
 			case CORESIGHT_BUS_ACPI:
 #ifdef DEV_ACPI
 				if (endp->their_handle == endp2->my_handle) {
-					*out_endp = endp2;
+					*out_endp =
+					    malloc(sizeof(struct endpoint),
+						M_CORESIGHT, M_WAITOK | M_ZERO);
+					memcpy(*out_endp, endp2,
+					    sizeof(struct endpoint));
 					return (cs_dev);
 				}
 #endif
Index: sys/arm64/coresight/coresight_cmd.c
===================================================================
--- sys/arm64/coresight/coresight_cmd.c
+++ sys/arm64/coresight/coresight_cmd.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
@@ -57,7 +57,7 @@
 		if (endp->input != 0)
 			continue;
 
-		out = coresight_get_output_device(endp, &out_endp);
+		out = coresight_get_output_device(cs_dev, endp, &out_endp);
 		if (out != NULL) {
 			if (LIST_EMPTY(&event->endplist)) {
 				/* Add source device */
@@ -122,9 +122,39 @@
 		CORESIGHT_INIT(cs_dev->dev);
 	}
 
+	/* Configure all devices in the path. */
+	LIST_FOREACH(endp, &event->endplist, endplink) {
+		cs_dev = endp->cs_dev;
+		CORESIGHT_CONFIGURE(cs_dev->dev, event);
+	}
+
 	return (0);
 }
 
+void
+coresight_start(int cpu, struct coresight_event *event)
+{
+	struct coresight_device *cs_dev;
+	struct endpoint *endp;
+
+	LIST_FOREACH(endp, &event->endplist, endplink) {
+		cs_dev = endp->cs_dev;
+		CORESIGHT_START(cs_dev->dev, endp, event);
+	}
+}
+
+void
+coresight_stop(int cpu, struct coresight_event *event)
+{
+	struct coresight_device *cs_dev;
+	struct endpoint *endp;
+
+	LIST_FOREACH(endp, &event->endplist, endplink) {
+		cs_dev = endp->cs_dev;
+		CORESIGHT_STOP(cs_dev->dev, endp, event);
+	}
+}
+
 void
 coresight_enable(int cpu, struct coresight_event *event)
 {
@@ -149,11 +179,26 @@
 	}
 }
 
+void
+coresight_dump(int cpu, struct coresight_event *event)
+{
+	struct coresight_device *cs_dev;
+	struct endpoint *endp;
+
+	LIST_FOREACH(endp, &event->endplist, endplink) {
+		cs_dev = endp->cs_dev;
+		CORESIGHT_DUMP(cs_dev->dev);
+	}
+}
+
 void
 coresight_read(int cpu, struct coresight_event *event)
 {
+	struct coresight_device *cs_dev;
 	struct endpoint *endp;
 
-	LIST_FOREACH(endp, &event->endplist, endplink)
-		CORESIGHT_READ(endp->cs_dev->dev, endp, event);
+	LIST_FOREACH(endp, &event->endplist, endplink) {
+		cs_dev = endp->cs_dev;
+		CORESIGHT_READ(cs_dev->dev, endp, event);
+	}
 }
Index: sys/arm64/coresight/coresight_etm4x.h
===================================================================
--- sys/arm64/coresight/coresight_etm4x.h
+++ sys/arm64/coresight/coresight_etm4x.h
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by BAE Systems, the University of Cambridge
Index: sys/arm64/coresight/coresight_etm4x.c
===================================================================
--- sys/arm64/coresight/coresight_etm4x.c
+++ sys/arm64/coresight/coresight_etm4x.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by BAE Systems, the University of Cambridge
@@ -75,6 +75,8 @@
 	uint32_t reg;
 	int i;
 
+	dprintf("%s%d\n", __func__, device_get_unit(dev));
+
 	sc = device_get_softc(dev);
 
 	/* Configure ETM */
@@ -88,6 +90,7 @@
 	reg |= TRCCONFIGR_INSTP0_LDRSTR;
 	reg |= TRCCONFIGR_COND_ALL;
 	bus_write_4(sc->res, TRCCONFIGR, reg);
+	dprintf("%s: TRCCONFIGR is %x\n", __func__, reg);
 
 	/* Disable all event tracing. */
 	bus_write_4(sc->res, TRCEVENTCTL0R, 0);
@@ -102,6 +105,11 @@
 	/* Set a value for the trace ID */
 	bus_write_4(sc->res, TRCTRACEIDR, event->etm.trace_id);
 
+	dprintf("%s: IDR0 is %x\n", __func__, bus_read_4(sc->res, TRCIDR(0)));
+	dprintf("%s: IDR1 is %x\n", __func__, bus_read_4(sc->res, TRCIDR(1)));
+	dprintf("%s: IDR2 is %x\n", __func__, bus_read_4(sc->res, TRCIDR(2)));
+	dprintf("%s: IDR8 is %x\n", __func__, bus_read_4(sc->res, TRCIDR(8)));
+
 	/*
 	 * Disable the timestamp event. The trace unit still generates
 	 * timestamps due to other reasons such as trace synchronization.
@@ -191,6 +199,16 @@
 	return (0);
 }
 
+static int
+etm_start(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+
+	etm_prepare(dev, event);
+
+	return (0);
+}
+
 static int
 etm_enable(device_t dev, struct endpoint *endp,
     struct coresight_event *event)
@@ -200,7 +218,7 @@
 
 	sc = device_get_softc(dev);
 
-	etm_prepare(dev, event);
+	dprintf("%s%d\n", __func__, device_get_unit(dev));
 
 	/* Enable the trace unit */
 	bus_write_4(sc->res, TRCPRGCTLR, TRCPRGCTLR_EN);
@@ -225,6 +243,8 @@
 
 	sc = device_get_softc(dev);
 
+	dprintf("%s%d\n", __func__, device_get_unit(dev));
+
 	/* Disable the trace unit */
 	bus_write_4(sc->res, TRCPRGCTLR, 0);
 
@@ -258,6 +278,7 @@
 static device_method_t etm_methods[] = {
 	/* Coresight interface */
 	DEVMETHOD(coresight_init,	etm_init),
+	DEVMETHOD(coresight_start,	etm_start),
 	DEVMETHOD(coresight_enable,	etm_enable),
 	DEVMETHOD(coresight_disable,	etm_disable),
 	DEVMETHOD_END
Index: sys/arm64/coresight/coresight_fdt.c
===================================================================
--- sys/arm64/coresight/coresight_fdt.c
+++ sys/arm64/coresight/coresight_fdt.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -47,10 +48,10 @@
 #include 
 
 static int
-coresight_fdt_get_ports(phandle_t dev_node,
-    struct coresight_platform_data *pdata)
+coresight_fdt_get_ports(phandle_t dev_node, phandle_t node,
+    struct coresight_platform_data *pdata, bool input)
 {
-	phandle_t node, child;
+	phandle_t child;
 	pcell_t port_reg;
 	phandle_t xref;
 	char *name;
@@ -58,12 +59,6 @@
 	phandle_t endpoint_child;
 	struct endpoint *endp;
 
-	child = ofw_bus_find_child(dev_node, "ports");
-	if (child)
-		node = child;
-	else
-		node = dev_node;
-
 	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
 		ret = OF_getprop_alloc(child, "name", (void **)&name);
 		if (ret == -1)
@@ -89,8 +84,8 @@
 				endp->their_node = OF_node_from_xref(xref);
 				endp->dev_node = dev_node;
 				endp->reg = port_reg;
-				if (OF_getproplen(endpoint_child,
-				    "slave-mode") >= 0) {
+
+				if (input) {
 					pdata->in_ports++;
 					endp->input = 1;
 				} else
@@ -108,19 +103,27 @@
 }
 
 static int
-coresight_fdt_get_cpu(phandle_t node,
-    struct coresight_platform_data *pdata)
+coresight_fdt_get_cpu(phandle_t node, struct coresight_platform_data *pdata)
 {
+	struct pcpu *pcpu;
 	phandle_t cpu_node;
 	pcell_t xref;
-	pcell_t cpu_reg;
+	pcell_t cpu_reg[2];
+	int i;
 
 	if (OF_getencprop(node, "cpu", &xref, sizeof(xref)) != -1) {
 		cpu_node = OF_node_from_xref(xref);
 		if (OF_getencprop(cpu_node, "reg", (void *)&cpu_reg,
-			sizeof(cpu_reg)) > 0) {
-			pdata->cpu = cpu_reg;
-			return (0);
+		    sizeof(cpu_reg)) > 0) {
+			for (i = 0; i < mp_ncpus; i++) {
+				pcpu = cpuid_to_pcpu[i];
+				if (pcpu->pc_mpidr_low == cpu_reg[1] &&
+				    pcpu->pc_mpidr_high == cpu_reg[0]) {
+					pdata->cpu = pcpu->pc_cpuid;
+printf("cpuid %d\n", pdata->cpu);
+					return (0);
+				}
+			}
 		}
 	}
 
@@ -131,7 +134,7 @@
 coresight_fdt_get_platform_data(device_t dev)
 {
 	struct coresight_platform_data *pdata;
-	phandle_t node;
+	phandle_t node, child;
 
 	node = ofw_bus_get_node(dev);
 
@@ -143,7 +146,14 @@
 	TAILQ_INIT(&pdata->endpoints);
 
 	coresight_fdt_get_cpu(node, pdata);
-	coresight_fdt_get_ports(node, pdata);
+
+	child = ofw_bus_find_child(node, "in-ports");
+	if (child)
+		coresight_fdt_get_ports(node, child, pdata, true);
+
+	child = ofw_bus_find_child(node, "out-ports");
+	if (child)
+		coresight_fdt_get_ports(node, child, pdata, false);
 
 	if (bootverbose)
 		printf("Total ports: in %d out %d\n",
Index: sys/arm64/coresight/coresight_funnel.c
===================================================================
--- sys/arm64/coresight/coresight_funnel.c
+++ sys/arm64/coresight/coresight_funnel.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by BAE Systems, the University of Cambridge
@@ -76,7 +76,7 @@
 }
 
 static int
-funnel_enable(device_t dev, struct endpoint *endp,
+funnel_start(device_t dev, struct endpoint *endp,
     struct coresight_event *event)
 {
 	struct funnel_softc *sc;
@@ -96,7 +96,7 @@
 }
 
 static void
-funnel_disable(device_t dev, struct endpoint *endp,
+funnel_stop(device_t dev, struct endpoint *endp,
     struct coresight_event *event)
 {
 	struct funnel_softc *sc;
@@ -135,8 +135,8 @@
 static device_method_t funnel_methods[] = {
 	/* Coresight interface */
 	DEVMETHOD(coresight_init,	funnel_init),
-	DEVMETHOD(coresight_enable,	funnel_enable),
-	DEVMETHOD(coresight_disable,	funnel_disable),
+	DEVMETHOD(coresight_start,	funnel_start),
+	DEVMETHOD(coresight_stop,	funnel_stop),
 	DEVMETHOD_END
 };
 
Index: sys/arm64/coresight/coresight_funnel_fdt.c
===================================================================
--- sys/arm64/coresight/coresight_funnel_fdt.c
+++ sys/arm64/coresight/coresight_funnel_fdt.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by BAE Systems, the University of Cambridge
@@ -49,7 +49,7 @@
 #include "coresight_if.h"
 
 static struct ofw_compat_data compat_data[] = {
-	{ "arm,coresight-funnel",		HWTYPE_FUNNEL },
+	{ "arm,coresight-dynamic-funnel",	HWTYPE_FUNNEL },
 	{ "arm,coresight-static-funnel",	HWTYPE_STATIC_FUNNEL },
 	{ NULL,					HWTYPE_NONE }
 };
Index: sys/arm64/coresight/coresight_if.m
===================================================================
--- sys/arm64/coresight/coresight_if.m
+++ sys/arm64/coresight/coresight_if.m
@@ -1,5 +1,5 @@
 #-
-# Copyright (c) 2018 Ruslan Bukin 
+# Copyright (c) 2018-2023 Ruslan Bukin 
 # All rights reserved.
 #
 # This software was developed by SRI International and the University of
@@ -39,6 +39,23 @@
 	device_t dev;
 };
 
+METHOD int configure {
+	device_t dev;
+	struct coresight_event *event;
+};
+
+METHOD int start {
+	device_t dev;
+	struct endpoint *endp;
+	struct coresight_event *event;
+};
+
+METHOD void stop {
+	device_t dev;
+	struct endpoint *endp;
+	struct coresight_event *event;
+};
+
 METHOD int enable {
 	device_t dev;
 	struct endpoint *endp;
@@ -51,6 +68,10 @@
 	struct coresight_event *event;
 };
 
+METHOD void dump {
+	device_t dev;
+};
+
 METHOD int read {
 	device_t dev;
 	struct endpoint *endp;
Index: sys/arm64/coresight/coresight_replicator.c
===================================================================
--- sys/arm64/coresight/coresight_replicator.c
+++ sys/arm64/coresight/coresight_replicator.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by BAE Systems, the University of Cambridge
@@ -64,7 +64,7 @@
 }
 
 static int
-replicator_enable(device_t dev, struct endpoint *endp,
+replicator_start(device_t dev, struct endpoint *endp,
     struct coresight_event *event)
 {
 	struct replicator_softc *sc;
@@ -84,7 +84,7 @@
 }
 
 static void
-replicator_disable(device_t dev, struct endpoint *endp,
+replicator_stop(device_t dev, struct endpoint *endp,
     struct coresight_event *event)
 {
 	struct replicator_softc *sc;
@@ -119,8 +119,8 @@
 static device_method_t replicator_methods[] = {
 	/* Coresight interface */
 	DEVMETHOD(coresight_init,	replicator_init),
-	DEVMETHOD(coresight_enable,	replicator_enable),
-	DEVMETHOD(coresight_disable,	replicator_disable),
+	DEVMETHOD(coresight_start,	replicator_start),
+	DEVMETHOD(coresight_stop,	replicator_stop),
 	DEVMETHOD_END
 };
 
Index: sys/arm64/coresight/coresight_tmc.h
===================================================================
--- sys/arm64/coresight/coresight_tmc.h
+++ sys/arm64/coresight/coresight_tmc.h
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
@@ -119,7 +119,7 @@
 DECLARE_CLASS(tmc_driver);
 
 struct tmc_softc {
-	struct resource			*res;
+	struct resource			*res[2];
 	device_t			dev;
 	uint64_t			cycle;
 	struct coresight_platform_data	*pdata;
@@ -128,10 +128,13 @@
 #define	CORESIGHT_ETR			1
 #define	CORESIGHT_ETF			2
 	uint32_t			nev;
-	struct coresight_event		*event;
 	boolean_t			etf_configured;
+	boolean_t			scatter_gather;
+	void				*intrhand;
 };
 
+typedef uint32_t sgte_t;
+
 int tmc_attach(device_t dev);
 
 #endif /* !_ARM64_CORESIGHT_CORESIGHT_TMC_H_ */
Index: sys/arm64/coresight/coresight_tmc.c
===================================================================
--- sys/arm64/coresight/coresight_tmc.c
+++ sys/arm64/coresight/coresight_tmc.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2018-2020 Ruslan Bukin 
+ * Copyright (c) 2018-2023 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
@@ -39,6 +39,14 @@
 #include 
 #include 
 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
 #include 
 #include 
 
@@ -46,18 +54,85 @@
 
 #define	TMC_DEBUG
 #undef TMC_DEBUG
-        
+
 #ifdef TMC_DEBUG
 #define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
 #else
 #define	dprintf(fmt, ...)
 #endif
 
+#define	SG_PT_ENTIRES_PER_PAGE	(PAGE_SIZE / sizeof(sgte_t))
+#define	ETR_SG_ET_MASK			0x3
+#define	ETR_SG_ET_LAST			0x1
+#define	ETR_SG_ET_NORMAL		0x2
+#define	ETR_SG_ET_LINK			0x3
+
+#define	ETR_SG_PAGE_SHIFT		12
+#define	ETR_SG_ADDR_SHIFT		4
+
+#define	ETR_SG_ENTRY(addr, type) \
+	(sgte_t)((((addr) >> ETR_SG_PAGE_SHIFT) << ETR_SG_ADDR_SHIFT) | \
+	    (type & ETR_SG_ET_MASK))
+
 static struct resource_spec tmc_spec[] = {
 	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		0,	RF_ACTIVE | RF_OPTIONAL },
 	{ -1, 0 }
 };
 
+static int
+tmc_alloc_pages(struct tmc_softc *sc, vm_page_t *pages, int npages)
+{
+	vm_paddr_t low, high, boundary;
+	vm_memattr_t memattr;
+	int alignment;
+	vm_pointer_t va;
+	int pflags;
+	vm_page_t m;
+	int tries;
+	int i;
+
+	alignment = PAGE_SIZE;
+	low = 0;
+	high = -1UL;
+	boundary = 0;
+	pflags = VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED |
+	    VM_ALLOC_ZERO;
+	memattr = VM_MEMATTR_DEFAULT;
+
+	for (i = 0; i < npages; i++) {
+		tries = 0;
+retry:
+		m = vm_page_alloc_noobj_contig(pflags, 1, low, high,
+		    alignment, boundary, memattr);
+		if (m == NULL) {
+			if (tries < 3) {
+				if (!vm_page_reclaim_contig(pflags, 1, low,
+				    high, alignment, boundary))
+					vm_wait(NULL);
+				tries++;
+				goto retry;
+			}
+
+			return (ENOMEM);
+		}
+
+		if ((m->flags & PG_ZERO) == 0)
+			pmap_zero_page(m);
+
+		va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
+		cpu_dcache_wb_range(va, PAGE_SIZE);
+		cpu_dcache_inv_range(va, PAGE_SIZE);
+		m->valid = VM_PAGE_BITS_ALL;
+		m->oflags &= ~VPO_UNMANAGED;
+		m->flags |= PG_FICTITIOUS;
+
+		pages[i] = m;
+	}
+
+	return (0);
+}
+
 static int
 tmc_start(device_t dev)
 {
@@ -66,21 +141,27 @@
 
 	sc = device_get_softc(dev);
 
-	if (bus_read_4(sc->res, TMC_CTL) & CTL_TRACECAPTEN)
+	if (sc->dev_type == CORESIGHT_ETR) {
+		dprintf("%s%d\n", __func__, device_get_unit(dev));
+	}
+
+	if (bus_read_4(sc->res[0], TMC_CTL) & CTL_TRACECAPTEN)
 		return (-1);
 
 	/* Enable TMC */
-	bus_write_4(sc->res, TMC_CTL, CTL_TRACECAPTEN);
-	if ((bus_read_4(sc->res, TMC_CTL) & CTL_TRACECAPTEN) == 0)
+	bus_write_4(sc->res[0], TMC_CTL, CTL_TRACECAPTEN);
+	if ((bus_read_4(sc->res[0], TMC_CTL) & CTL_TRACECAPTEN) == 0)
 		panic("Not enabled\n");
 
 	do {
-		reg = bus_read_4(sc->res, TMC_STS);
+		reg = bus_read_4(sc->res[0], TMC_STS);
 	} while ((reg & STS_TMCREADY) == 1);
 
-	if ((bus_read_4(sc->res, TMC_CTL) & CTL_TRACECAPTEN) == 0)
+	if ((bus_read_4(sc->res[0], TMC_CTL) & CTL_TRACECAPTEN) == 0)
 		panic("Not enabled\n");
 
+	dprintf("%s: enabled\n", __func__);
+
 	return (0);
 }
 
@@ -92,17 +173,49 @@
 
 	sc = device_get_softc(dev);
 
-	reg = bus_read_4(sc->res, TMC_CTL);
+	dprintf("%s\n", __func__);
+
+	reg = bus_read_4(sc->res[0], TMC_CTL);
 	reg &= ~CTL_TRACECAPTEN;
-	bus_write_4(sc->res, TMC_CTL, reg);
+	bus_write_4(sc->res[0], TMC_CTL, reg);
 
 	do {
-		reg = bus_read_4(sc->res, TMC_STS);
+		reg = bus_read_4(sc->res[0], TMC_STS);
 	} while ((reg & STS_TMCREADY) == 1);
 
 	return (0);
 }
 
+static void
+tmc_dump(device_t dev)
+{
+	struct tmc_softc *sc;
+	uint32_t reg;
+	size_t hi, lo;
+	size_t rrp, rwp;
+
+	sc = device_get_softc(dev);
+
+	lo = bus_read_4(sc->res[0], TMC_RRP);
+	hi = bus_read_4(sc->res[0], TMC_RRPHI);
+	rrp = lo | (hi << 32);
+
+	lo = bus_read_4(sc->res[0], TMC_RWP);
+	hi = bus_read_4(sc->res[0], TMC_RWPHI);
+	rwp = lo | (hi << 32);
+
+	reg = bus_read_4(sc->res[0], TMC_DEVID);
+	if ((reg & DEVID_CONFIGTYPE_M) == DEVID_CONFIGTYPE_ETR)
+		printf("%s%d: STS %x CTL %x RSZ %x RRP %lx RWP %lx AXICTL %x\n",
+		    __func__,
+		    device_get_unit(dev),
+		    bus_read_4(sc->res[0], TMC_STS),
+		    bus_read_4(sc->res[0], TMC_CTL),
+		    bus_read_4(sc->res[0], TMC_RSZ),
+		    rrp, rwp,
+		    bus_read_4(sc->res[0], TMC_AXICTL));
+}
+
 static int
 tmc_configure_etf(device_t dev)
 {
@@ -112,23 +225,14 @@
 	sc = device_get_softc(dev);
 
 	do {
-		reg = bus_read_4(sc->res, TMC_STS);
+		reg = bus_read_4(sc->res[0], TMC_STS);
 	} while ((reg & STS_TMCREADY) == 0);
 
-	bus_write_4(sc->res, TMC_MODE, MODE_HW_FIFO);
-	bus_write_4(sc->res, TMC_FFCR, FFCR_EN_FMT | FFCR_EN_TI);
+	bus_write_4(sc->res[0], TMC_MODE, MODE_HW_FIFO);
+	bus_write_4(sc->res[0], TMC_FFCR, FFCR_EN_FMT | FFCR_EN_TI);
 
 	tmc_start(dev);
-
-	dprintf("%s: STS %x, CTL %x, RSZ %x, RRP %x, RWP %x, "
-	    "LBUFLEVEL %x, CBUFLEVEL %x\n", __func__,
-	    bus_read_4(sc->res, TMC_STS),
-	    bus_read_4(sc->res, TMC_CTL),
-	    bus_read_4(sc->res, TMC_RSZ),
-	    bus_read_4(sc->res, TMC_RRP),
-	    bus_read_4(sc->res, TMC_RWP),
-	    bus_read_4(sc->res, TMC_CBUFLEVEL),
-	    bus_read_4(sc->res, TMC_LBUFLEVEL));
+	tmc_dump(dev);
 
 	return (0);
 }
@@ -142,44 +246,172 @@
 
 	sc = device_get_softc(dev);
 
-	tmc_stop(dev);
-
 	do {
-		reg = bus_read_4(sc->res, TMC_STS);
+		reg = bus_read_4(sc->res[0], TMC_STS);
 	} while ((reg & STS_TMCREADY) == 0);
 
 	/* Configure TMC */
-	bus_write_4(sc->res, TMC_MODE, MODE_CIRCULAR_BUFFER);
+	bus_write_4(sc->res[0], TMC_MODE, MODE_CIRCULAR_BUFFER);
 
 	reg = AXICTL_PROT_CTRL_BIT1;
 	reg |= AXICTL_WRBURSTLEN_16;
-
-	/*
-	 * SG operation is broken on DragonBoard 410c
-	 * reg |= AXICTL_SG_MODE;
-	 */
-
-	reg |= AXICTL_AXCACHE_OS;
-	bus_write_4(sc->res, TMC_AXICTL, reg);
+	if (sc->scatter_gather)
+		reg |= AXICTL_SG_MODE;
+	/* reg |= AXICTL_AXCACHE_OS; */
+	bus_write_4(sc->res[0], TMC_AXICTL, reg);
 
 	reg = FFCR_EN_FMT | FFCR_EN_TI | FFCR_FON_FLIN |
 	    FFCR_FON_TRIG_EVT | FFCR_TRIGON_TRIGIN;
-	bus_write_4(sc->res, TMC_FFCR, reg);
+	bus_write_4(sc->res[0], TMC_FFCR, reg);
 
-	bus_write_4(sc->res, TMC_TRG, 8);
+	bus_write_4(sc->res[0], TMC_TRG, 8);
 
-	bus_write_4(sc->res, TMC_DBALO, event->etr.low);
-	bus_write_4(sc->res, TMC_DBAHI, event->etr.high);
-	bus_write_4(sc->res, TMC_RSZ, event->etr.bufsize / 4);
-
-	bus_write_4(sc->res, TMC_RRP, event->etr.low);
-	bus_write_4(sc->res, TMC_RWP, event->etr.low);
+	if (sc->scatter_gather) {
+		dprintf("%s: event->etr.pages %p\n", __func__,
+		    event->etr.pages);
+		dprintf("%s: event->etr.npages %d\n", __func__,
+		    event->etr.npages);
+	} else {
+		bus_write_4(sc->res[0], TMC_DBALO, event->etr.low);
+		bus_write_4(sc->res[0], TMC_DBAHI, event->etr.high);
+		bus_write_4(sc->res[0], TMC_RSZ, event->etr.bufsize / 4);
+		bus_write_4(sc->res[0], TMC_RRP, event->etr.low);
+		bus_write_4(sc->res[0], TMC_RWP, event->etr.low);
+	}
 
-	reg = bus_read_4(sc->res, TMC_STS);
+	reg = bus_read_4(sc->res[0], TMC_STS);
 	reg &= ~STS_FULL;
-	bus_write_4(sc->res, TMC_STS, reg);
+	bus_write_4(sc->res[0], TMC_STS, reg);
 
-	tmc_start(dev);
+	return (0);
+}
+
+static vm_page_t *
+tmc_allocate_pgdir(struct tmc_softc *sc, vm_page_t *pages, int nentries,
+    int npt)
+{
+	vm_page_t *pt_dir;
+	vm_paddr_t paddr;
+	int sgtentry;
+	sgte_t *ptr;
+	uint32_t dirpg;
+	int curpg;
+	int type;
+	int error;
+	int i;
+
+	pt_dir = malloc(sizeof(struct vm_page *) * npt, M_DEVBUF,
+	    M_WAITOK | M_ZERO);
+	error = tmc_alloc_pages(sc, pt_dir, npt);
+	if (error) {
+		printf("%s: could not allocate pages\n", __func__);
+		return (NULL);
+	}
+
+	sgtentry = 0;
+	curpg = 0;
+	ptr = (sgte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pt_dir[0]));
+	dirpg = 1;
+
+	for (i = 0; i < nentries - 1; i++) {
+		dprintf("entry %d dirpg %d\n", i, dirpg);
+
+		if (sgtentry == (SG_PT_ENTIRES_PER_PAGE - 1)) {
+			type = ETR_SG_ET_LINK;
+			paddr = VM_PAGE_TO_PHYS(pt_dir[dirpg]);
+		} else {
+			type = ETR_SG_ET_NORMAL;
+			paddr = VM_PAGE_TO_PHYS(pages[curpg]);
+
+#ifdef TMC_DEBUG
+			if ((i % 100) == 0)
+				dprintf("%s: entry (%d/%d) type %d dirpg %d "
+				    "curpg %d paddr %lx\n", __func__, i,
+				    nentries, type, dirpg, curpg, paddr);
+#endif
+
+			curpg++;
+		}
+
+		*ptr = ETR_SG_ENTRY(paddr, type);
+		cpu_dcache_wb_range((vm_pointer_t)ptr, sizeof(sgte_t));
+		ptr++;
+
+		/* Take next directory page. */
+		if (type == ETR_SG_ET_LINK) {
+			ptr = (sgte_t *)PHYS_TO_DMAP(
+				VM_PAGE_TO_PHYS(pt_dir[dirpg]));
+			dirpg++;
+		}
+
+		sgtentry = (sgtentry + 1) % SG_PT_ENTIRES_PER_PAGE;
+	}
+
+	/* Last entry. */
+	paddr = VM_PAGE_TO_PHYS(pages[curpg]);
+	*ptr = ETR_SG_ENTRY(paddr, ETR_SG_ET_LAST);
+	cpu_dcache_wb_range((vm_pointer_t)ptr, sizeof(sgte_t));
+
+	return (pt_dir);
+}
+
+static int
+tmc_configure(device_t dev, struct coresight_event *event)
+{
+	struct tmc_softc *sc;
+	vm_page_t *pt_dir;
+	vm_page_t *pages;
+	uint64_t pbase;
+	uint32_t reg;
+	int nentries;
+	int nlinks;
+	int npages;
+	int npt;
+
+	sc = device_get_softc(dev);
+
+	reg = bus_read_4(sc->res[0], TMC_DEVID);
+	if ((reg & DEVID_CONFIGTYPE_M) != DEVID_CONFIGTYPE_ETR)
+		return (0);
+
+	if (!sc->scatter_gather)
+		return (0);
+
+	npages = event->etr.npages;
+	pages = event->etr.pages;
+
+	if (npages == 0 || pages == NULL)
+		return (EINVAL);
+
+	nlinks = npages / (SG_PT_ENTIRES_PER_PAGE - 1);
+	if (nlinks && ((npages % (SG_PT_ENTIRES_PER_PAGE - 1)) < 2))
+		nlinks--;
+	nentries = nlinks + npages;
+
+	npt = howmany(nentries, SG_PT_ENTIRES_PER_PAGE);
+
+	dprintf("%s: nentries %d, npt %d\n", __func__, nentries, npt);
+
+	pt_dir = tmc_allocate_pgdir(sc, pages, nentries, npt);
+	if (pt_dir == NULL)
+		return (ENOMEM);
+
+#ifdef TMC_DEBUG
+	ptr = (sgte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pt_dir[0]));
+	for (i = 0; i < nentries; i++)
+		dprintf("%s: entry %x\n", __func__, *ptr++);
+#endif
+
+	dprintf("%s: event->etr.pages %p\n", __func__, event->etr.pages);
+	dprintf("%s: event->etr.npages %d\n", __func__, event->etr.npages);
+
+	pbase = (uint64_t)VM_PAGE_TO_PHYS(pt_dir[0]);
+
+	dprintf("%s: pbase %lx\n", __func__, pbase);
+
+	bus_write_4(sc->res[0], TMC_DBALO, pbase & 0xffffffff);
+	bus_write_4(sc->res[0], TMC_DBAHI, pbase >> 32);
+	bus_write_4(sc->res[0], TMC_RSZ, (event->etr.npages * 4096) / 4);
 
 	return (0);
 }
@@ -193,12 +425,12 @@
 	sc = device_get_softc(dev);
 
 	/* Unlock Coresight */
-	bus_write_4(sc->res, CORESIGHT_LAR, CORESIGHT_UNLOCK);
+	bus_write_4(sc->res[0], CORESIGHT_LAR, CORESIGHT_UNLOCK);
 
 	/* Unlock TMC */
-	bus_write_4(sc->res, TMC_LAR, CORESIGHT_UNLOCK);
+	bus_write_4(sc->res[0], TMC_LAR, CORESIGHT_UNLOCK);
 
-	reg = bus_read_4(sc->res, TMC_DEVID);
+	reg = bus_read_4(sc->res[0], TMC_DEVID);
 	reg &= DEVID_CONFIGTYPE_M;
 	switch (reg) {
 	case DEVID_CONFIGTYPE_ETR:
@@ -222,7 +454,7 @@
 }
 
 static int
-tmc_enable(device_t dev, struct endpoint *endp,
+tmc_start_event(device_t dev, struct endpoint *endp,
     struct coresight_event *event)
 {
 	struct tmc_softc *sc;
@@ -241,22 +473,17 @@
 	 * We allow only one running configuration.
 	 */
 
-	if (event->etr.flags & ETR_FLAG_ALLOCATE) {
-		event->etr.flags &= ~ETR_FLAG_ALLOCATE;
-		nev = atomic_fetchadd_int(&sc->nev, 1);
-		if (nev == 0) {
-			sc->event = event;
-			tmc_stop(dev);
-			tmc_configure_etr(dev, endp, event);
-			tmc_start(dev);
-		}
+	nev = atomic_fetchadd_int(&sc->nev, 1);
+	if (nev == 0) {
+		tmc_configure_etr(dev, endp, event);
+		tmc_start(dev);
 	}
 
 	return (0);
 }
 
 static void
-tmc_disable(device_t dev, struct endpoint *endp,
+tmc_stop_event(device_t dev, struct endpoint *endp,
     struct coresight_event *event)
 {
 	struct tmc_softc *sc;
@@ -270,43 +497,54 @@
 
 	KASSERT(sc->dev_type == CORESIGHT_ETR, ("Wrong dev_type"));
 
-	if (event->etr.flags & ETR_FLAG_RELEASE) {
-		event->etr.flags &= ~ETR_FLAG_RELEASE;
-		nev = atomic_fetchadd_int(&sc->nev, -1);
-		if (nev == 1) {
-			tmc_stop(dev);
-			sc->event = NULL;
-		}
-	}
+	nev = atomic_fetchadd_int(&sc->nev, -1);
+	if (nev == 1)
+		tmc_stop(dev);
+}
+
+static void
+tmc_intr(void *arg)
+{
+
+	/* TODO */
+
+	panic("unhandled interrupt");
 }
 
 static int
-tmc_read(device_t dev, struct endpoint *endp,
-    struct coresight_event *event)
+tmc_read(device_t dev, struct endpoint *endp, struct coresight_event *event)
 {
 	struct tmc_softc *sc;
-	uint32_t cur_ptr;
+	vm_page_t page;
+	bool found;
+	uint64_t lo, hi;
+	uint64_t ptr;
+	int i;
 
 	sc = device_get_softc(dev);
-
 	if (sc->dev_type == CORESIGHT_ETF)
 		return (0);
 
-	/*
-	 * Ensure the event we are reading information for
-	 * is currently configured one.
-	 */
-	if (sc->event != event)
-		return (0);
+	lo = bus_read_4(sc->res[0], TMC_RWP);
+	hi = bus_read_4(sc->res[0], TMC_RWPHI);
+	ptr = lo | (hi << 32);
 
-	if (bus_read_4(sc->res, TMC_STS) & STS_FULL) {
-		event->etr.offset = 0;
-		event->etr.cycle++;
-		tmc_stop(dev);
-		tmc_start(dev);
-	} else {
-		cur_ptr = bus_read_4(sc->res, TMC_RWP);
-		event->etr.offset = (cur_ptr - event->etr.low);
+	page = PHYS_TO_VM_PAGE(ptr);
+
+	found = false;
+
+	for (i = 0; i < event->etr.npages; i++) {
+		if (event->etr.pages[i] == page) {
+			found = true;
+			break;
+		}
+	}
+
+	if (found) {
+		event->etr.curpage = i;
+		event->etr.curpage_offset = ptr & 0xfff;
+		dprintf("CUR_PTR %lx, page %d of %d, offset %ld\n",
+		    ptr, i, event->etr.npages, event->etr.curpage_offset);
 	}
 
 	return (0);
@@ -317,18 +555,36 @@
 {
 	struct coresight_desc desc;
 	struct tmc_softc *sc;
+	uint32_t reg;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
-	if (bus_alloc_resources(dev, tmc_spec, &sc->res) != 0) {
+	if (bus_alloc_resources(dev, tmc_spec, sc->res) != 0) {
 		device_printf(dev, "cannot allocate resources for device\n");
 		return (ENXIO);
 	}
 
+	if (sc->res[1] != NULL) {
+		if (bus_setup_intr(dev, sc->res[1],
+		    INTR_TYPE_MISC | INTR_MPSAFE, NULL, tmc_intr, sc,
+		    &sc->intrhand)) {
+			bus_release_resources(dev, tmc_spec, sc->res);
+			device_printf(dev, "cannot setup interrupt handler\n");
+			return (ENXIO);
+		}
+	}
+
 	desc.pdata = sc->pdata;
 	desc.dev = dev;
-	desc.dev_type = CORESIGHT_TMC;
+
+	reg = bus_read_4(sc->res[0], TMC_DEVID);
+	reg &= DEVID_CONFIGTYPE_M;
+	if (reg == DEVID_CONFIGTYPE_ETR)
+		desc.dev_type = CORESIGHT_TMC_ETR;
+	else
+		desc.dev_type = CORESIGHT_TMC_ETF;
+
 	coresight_register(&desc);
 
 	return (0);
@@ -340,8 +596,10 @@
 
 	/* Coresight interface */
 	DEVMETHOD(coresight_init,	tmc_init),
-	DEVMETHOD(coresight_enable,	tmc_enable),
-	DEVMETHOD(coresight_disable,	tmc_disable),
+	DEVMETHOD(coresight_configure,	tmc_configure),
+	DEVMETHOD(coresight_start,	tmc_start_event),
+	DEVMETHOD(coresight_stop,	tmc_stop_event),
+	DEVMETHOD(coresight_dump,	tmc_dump),
 	DEVMETHOD(coresight_read,	tmc_read),
 	DEVMETHOD_END
 };
Index: sys/arm64/coresight/coresight_tmc_fdt.c
===================================================================
--- sys/arm64/coresight/coresight_tmc_fdt.c
+++ sys/arm64/coresight/coresight_tmc_fdt.c
@@ -71,10 +71,20 @@
 tmc_fdt_attach(device_t dev)
 {
 	struct tmc_softc *sc;
+	phandle_t node;
+	ssize_t len;
 
 	sc = device_get_softc(dev);
 	sc->pdata = coresight_fdt_get_platform_data(dev);
 
+	node = ofw_bus_get_node(dev);
+
+	len = OF_getproplen(node, "arm,scatter-gather");
+	if (len >= 0)
+		sc->scatter_gather = true;
+	else
+		sc->scatter_gather = false;
+
 	return (tmc_attach(dev));
 }