#!/usr/bin/env python3 """TCP/UDP proxy for NetGDB. Translates between Debugnet UDP GDB packets from a panicked kernel and TCP GDB packets from a client.""" # CEM: You'll see a mix of debugnet and ND or Netdump names and strings # throughout this file. The original used netdump/ND everywhere, and I only # changed things I touched. A later version will clean up the remaining # references to netdump; this is just a working rough draft. import argparse import asyncio from enum import Enum import logging import select import socket import struct import signal import sys import time ND_HDR_FORMAT = '>IIQII' ND_ACK_FORMAT = '>I' NETDUMP_HDR_SIZE = struct.calcsize(ND_HDR_FORMAT) DEBUGNET_ACK_SIZE = struct.calcsize(ND_ACK_FORMAT) # Pick the same low minimum as, say, DNS, until we're actually MTU and # fragmentation aware. TX_PAYLOAD_SIZE = 512 - NETDUMP_HDR_SIZE # Accept arbitrarily large UDP packets from the network stack. # (INET and INET6 both have 16 bit max payload sizes.) # # The constant is reused for non-blocking TCP recv as well; there, it # is just an arbitrary value. RX_BUFFER_SIZE = 2**16 NETDUMP_MAX_IN_FLIGHT = 64 ND_RETRIES = 10 HERALD_PORT = 20025 tcp_port = 0 client_addr = None debugnet_addr = None debugnet_socket = None tcp_socket = None # The listening socket tcp_connection = None # Actual client connection seqno = 1 debugnet_unacked_tx = {} tcp_partial_rx = [] # DEBUGGING - to print all messages, set to logging.DEBUG logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') profiling = True class Profiler: def __init__(self): self.node_total_resp_time = 0 self.client_total_resp_time = 0 self.send_time = None self.message = None self.recipient = None def record_send(self, recipient, message): if not profiling: return if recipient not in {"node", "client"}: raise ValueError("profiler: recipient must be either 'node' or " "'client'.") self.send_time = time.time() self.message = message self.recipient = recipient def record_receive(self): if not profiling or not self.send_time: return response_time = time.time() - self.send_time if response_time > 1.0: logging.debug("%s responded in %f\nDelayed packet: %s" % (self.recipient, response_time, repr(self.message))) if self.recipient == 'node': self.node_total_resp_time += response_time else: self.client_total_resp_time += response_time profiler = Profiler() class netdump_hdr_type(Enum): HERALD = 1 FINISHED = 2 DATA = 3 class netdump_msg_hdr: def __init__(self, data=None): self.size = NETDUMP_HDR_SIZE if not data: self.type = netdump_hdr_type.HERALD.value self.seqno = 0 self.aux1 = 0 self.length = 0 self.aux2 = 0 else: self.type, self.seqno, self.aux1, self.length, self.aux2 = struct.unpack( ND_HDR_FORMAT, data[:NETDUMP_HDR_SIZE]) def to_bytearray(self): return struct.pack(ND_HDR_FORMAT, self.type, self.seqno, self.aux1, self.length, self.aux2) def to_ack(self): return struct.pack(ND_ACK_FORMAT, self.seqno) def exit_handler(signum, frame): """Close sockets and connections and exit.""" if profiler.node_total_resp_time and profiler.client_total_resp_time: logging.debug("Total time waiting for node: %f" % profiler.node_total_resp_time) logging.debug("Total time waiting for client (includes user response " "time): %f" % profiler.client_total_resp_time) if debugnet_socket: finisher = netdump_msg_hdr() finisher.type = netdump_hdr_type.FINISHED.value debugnet_socket.send(finisher.to_bytearray()) sys.exit(0) def openport(socket_type, port): """Set up a socket to be a server.""" sock = socket.socket(socket.AF_INET, socket_type) sock.setblocking(0) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) try: sock.bind(("", port)) except socket.error: logging.critical("Could not bind to socket port %d, most likely " "already in use. Check running processes and try " "again." % port) exit_handler(None, None) return sock def node_connect(): """Establish connection with NetGDB on node""" global debugnet_addr, debugnet_socket # Wait for herald packet from NetGDB herald_socket = openport(socket.SOCK_DGRAM, HERALD_PORT) herald_socket.setblocking(1) print("Waiting for connection from NetGDB client on :%d." % HERALD_PORT) print("Use 'netgdb -s ' at the 'db>' prompt to " "connect.") # TODO: enumerate machine interfaces via something like ioctl and print # possibilities. msgh = None while True: data, address = herald_socket.recvfrom(RX_BUFFER_SIZE) msgh = netdump_msg_hdr(data) if msgh.type == netdump_hdr_type.HERALD.value: break debugnet_addr = address herald_socket.close() # Send initial Ack for herald, which has the side effect of informing # NetGDB of the new port. debugnet_socket = openport(socket.SOCK_DGRAM, 0) debugnet_socket.setblocking(1) debugnet_socket.connect(debugnet_addr) debugnet_socket.send(msgh.to_ack()) # Get and ack the T packet from client # # This is a kludge based on the current initial $T packet blasted out by # FreeBSD's gdb stub for inexplicable reasons (GDB is a debugger-driven # interrogation protocol; sending first makes no sense). while True: data = debugnet_socket.recv(RX_BUFFER_SIZE) hdr = netdump_msg_hdr(data) data = data[NETDUMP_HDR_SIZE:] if data[:2] == b'$T': break logging.debug("Received non-T packet from node") debugnet_socket.send(hdr.to_ack()) debugnet_socket.setblocking(0) print("Connection from NetGDB at %s received." % debugnet_addr[0]) def client_connect(tcp_port): """Wait for a client TCP connection.""" global tcp_socket, tcp_connection, client_addr if not tcp_socket: tcp_socket = openport(socket.SOCK_STREAM, tcp_port) tcp_socket.listen(1) tcp_socket.setblocking(1) bound_address = tcp_socket.getsockname() print("Waiting for connection from GDB client on port %d." % bound_address[1]) print("Use 'target remote :%d' from gdb to connect." % bound_address[1]) tcp_connection, client_addr = tcp_socket.accept() print("Connection from GDB client received.") tcp_socket.setblocking(0) tcp_connection.setblocking(0) def find_new_client(tcp_port): """Close current client connection and wait for another.""" logging.error("Client connection lost. Listening for new client.") tcp_connection.close() tcp_partial_rx.clear() client_connect(tcp_port) def verify_chksum(data): """Ensures the checksum in GDB messages is valid.""" if not data: return False if len(data) < 4: return False if data[0] != b'$': return False if data[-3] != b'#': return False packet = data[1:-3] sum = data[-2:] chksum = 0 for ch in packet: chksum += ord(ch) chksum &= 0xff try: chksum -= int(sum[:2], 16) except ValueError: return False return chksum == 0 def udp_data_send_once(data): """Function to send a data chunk to the node over netdump.""" global seqno if not data: return 0 datalen = len(data) hdr = netdump_msg_hdr() hdr.type = netdump_hdr_type.DATA.value hdr.padding = 0 pkts_sent = 0 for pkt_start in range(0, datalen, TX_PAYLOAD_SIZE): seqno += 1 pktlen = min(datalen - pkt_start, TX_PAYLOAD_SIZE) # Set up header information hdr.seqno = seqno hdr.offset = pkt_start hdr.length = pktlen # Package and send data packet = hdr.to_bytearray() + data[pkt_start: pkt_start + pktlen] logging.debug("Sending packet %s to %s." % (packet[NETDUMP_HDR_SIZE:].decode('utf-8'), ':'.join(map(str, debugnet_addr)))) debugnet_socket.send(packet) logging.debug("Expecting ack with seqno %d" % hdr.seqno) # Note that we're waiting for this packet and record a copy for # retransmit. debugnet_unacked_tx[seqno] = packet pkts_sent += 1 logging.debug("Sent %d packets to node." % pkts_sent) def node_data_handler(): """Takes data received from node's NetGDGB and forwards it to the client.""" data = debugnet_socket.recv(RX_BUFFER_SIZE) profiler.record_receive() if len(data) == DEBUGNET_ACK_SIZE: return node_ack_handler(data) if len(data) < NETDUMP_HDR_SIZE: logging.error("Got runt packet from NetGDB: %d bytes" % len(data)) return None # Received a data packet from node, ack and forward to client hdr = netdump_msg_hdr(data) data = data[hdr.size:] # cut off header logging.debug("Received udp data packet: %s" % data.decode('utf-8')) debugnet_socket.send(hdr.to_ack()) tcp_connection.send(data) profiler.record_send('client', data.decode('utf-8')) return None # Handle an ACK from NetGDB. # # Return True if any unacked data is in flight. False is all TX has been # acked. def node_ack_handler(data): ack_seqno = struct.unpack(ND_ACK_FORMAT, data)[0] logging.debug("Received udp ack %d." % ack_seqno) if ack_seqno not in debugnet_unacked_tx: # Ignore unneeded or spurious ack return bool(debugnet_unacked_tx) # Packet is a current ack. del debugnet_unacked_tx[ack_seqno] if debugnet_unacked_tx: return True # All acks received, done sending packet(s) logging.debug("Found all acks for all packets sent.") return False def forward_gdb_packet(data): if len(data) > 1: rawdata = data[data.find(b'$'): data.find(b'#') + 3] # XXXCEM No real need for us to do this here. if not verify_chksum(rawdata): logging.error("Invalid checksum: %s" % rawdata.decode('utf-8')) # FALLTHROUGH and let client reject. Makes more sense # when client negotiates no-ack GDB mode (future work). # Forward data to node udp_data_send_once(data) profiler.record_send("node", data) def client_data_handler(): """Takes data recieved from client and forwards it to the node.""" global tcp_partial_rx try: # Both failing to recv and successfully empty receive (End of # Stream) are a Connection reset error. data = tcp_connection.recv(RX_BUFFER_SIZE) if not data: raise ConnectionResetError() except ConnectionResetError: find_new_client(tcp_socket.getsockname()[1]) return profiler.record_receive() logging.debug("Received tcp data: %s" % data.decode('utf-8')) # Valid starting possibilities: # * Additional data for tcp_partial_rx # # Valid possibilities in sequence, if tcp_partial_rx is empty or # the initial data completed the packet: # * '+' or '-' (native ack) # * Another full packet # # Valid trailing possibility: # * A partial packet. if tcp_partial_rx: if not b'#' in data: tcp_partial_rx.append(data) return tmp = data[:data.find(b'#') + 3] data = data[data.find(b'#') + 3:] tcp_partial_rx.append(tmp) pkt = b''.join(tcp_partial_rx) tcp_partial_rx.clear() forward_gdb_packet(pkt) while data: if data[0:1] in frozenset((b'+', b'-')): forward_gdb_packet(data[0:1]) data = data[1:] elif data[0:1] == b'$': # Packet start of frame if b'#' in data: # Full packet pkt = data[:data.find(b'#') + 3] data = data[data.find(b'#') + 3:] forward_gdb_packet(pkt) else: # Partial packet tcp_partial_rx.append(data) break else: # Invalid if b'$' in data: invalid = data[:data.find(b'$')] data = data[data.find(b'$'):] else: invalid = data data = b"" logging.error("Unrecognized data from GDB, dropping " + \ "bytes until we see a packet frame: '%s'" % invalid.decode('utf-8')) def eventloop(): # Get connection from node and client node_connect() client_connect(tcp_port) # ### Main Loop ### # XXXCEM this is somewhat out of date and not fully accurate. # Currently assuming that the connection between client and server will be # half-duplex, i.e. only one talks at a time. # When a client message is received, we filter out anything not part of a # $msg#XX format. If anything is left, it is passed on to the # udp_data_send_once function, which cuts it into netdump packets, slaps # headers on them, and sends them off. In the meantime, the node will # be acking to the udp ack port, which we record. # When a server message is received, we ack it, cut the netdump header off, # and forward it to the client. logging.debug("Connections made. NetGDB: %s. GDB: %s" % (':'.join(map(str, debugnet_addr)), ':'.join(map(str, client_addr)))) print("Starting proxy. Press ctrl-C to stop.") send_timestamp = None data_sending = None while True: readers, _, _ = select.select([debugnet_socket, tcp_connection], [], [], 0.2) for reader in readers: if reader == debugnet_socket: data_sending = node_data_handler() elif reader == tcp_connection: client_data_handler() retries = 0 send_timestamp = time.time() # If not all packets have come in after a while, try resending if data_sending and time.time() - send_timestamp > 0.5: logging.error("Not all acks received. Resending packet(s).") if retries >= ND_RETRIES: logging.error("Out of retries! Assuming connectivity lost.") exit(1) for k in sorted(debugnet_unacked_tx.keys()): logging.debug("Resending seqno %d to NetGDB.", k) debugnet_socket.send(debugnet_unacked_tx[k]) retries += 1 send_timestamp = time.time() def main(): global tcp_port # Shutdown on Ctrl+C signal.signal(signal.SIGINT, exit_handler) # Parse Args amd Setup Port Numbers parser = argparse.ArgumentParser( description="NetGDB Proxy. Allows GDB debugging on a paniced kernel " "over the network.") parser.add_argument('-p', '--port', required=False, type=int, help='GDB client listen port.') args = parser.parse_args() # If we weren't given a port, switch to a random one. if args.port: tcp_port = int(args.port) else: logging.debug("No given port, using random port.") eventloop() if __name__ == "__main__": main()