#include "kvm/uip.h" #include #include #include #include #include static int uip_tcp_socket_close(struct uip_tcp_socket *sk, int how) { shutdown(sk->fd, how); if (sk->write_done && sk->read_done) { shutdown(sk->fd, SHUT_RDWR); close(sk->fd); mutex_lock(sk->lock); list_del(&sk->list); mutex_unlock(sk->lock); free(sk->buf); free(sk); } return 0; } static struct uip_tcp_socket *uip_tcp_socket_find(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport) { struct list_head *sk_head; struct mutex *sk_lock; struct uip_tcp_socket *sk; sk_head = &arg->info->tcp_socket_head; sk_lock = &arg->info->tcp_socket_lock; mutex_lock(sk_lock); list_for_each_entry(sk, sk_head, list) { if (sk->sip == sip && sk->dip == dip && sk->sport == sport && sk->dport == dport) { mutex_unlock(sk_lock); return sk; } } mutex_unlock(sk_lock); return NULL; } static struct uip_tcp_socket *uip_tcp_socket_alloc(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport) { struct list_head *sk_head; struct uip_tcp_socket *sk; struct mutex *sk_lock; struct uip_tcp *tcp; struct uip_ip *ip; int ret; tcp = (struct uip_tcp *)arg->eth; ip = (struct uip_ip *)arg->eth; sk_head = &arg->info->tcp_socket_head; sk_lock = &arg->info->tcp_socket_lock; sk = malloc(sizeof(*sk)); memset(sk, 0, sizeof(*sk)); sk->lock = sk_lock; sk->info = arg->info; sk->fd = socket(AF_INET, SOCK_STREAM, 0); sk->addr.sin_family = AF_INET; sk->addr.sin_port = dport; sk->addr.sin_addr.s_addr = dip; pthread_cond_init(&sk->cond, NULL); if (ntohl(dip) == arg->info->host_ip) sk->addr.sin_addr.s_addr = inet_addr("127.0.0.1"); ret = connect(sk->fd, (struct sockaddr *)&sk->addr, sizeof(sk->addr)); if (ret) { free(sk); return NULL; } sk->sip = ip->sip; sk->dip = ip->dip; sk->sport = tcp->sport; sk->dport = tcp->dport; mutex_lock(sk_lock); list_add_tail(&sk->list, sk_head); mutex_unlock(sk_lock); return sk; } /* Caller holds the sk lock */ static void uip_tcp_socket_free(struct uip_tcp_socket *sk) { /* * Here we assume that the virtqueues are already inactive so we don't * race with uip_tx_do_ipv4_tcp. We are racing with * uip_tcp_socket_thread though, but holding the sk lock ensures that it * cannot free data concurrently. */ if (sk->thread) { pthread_cancel(sk->thread); pthread_join(sk->thread, NULL); } sk->write_done = sk->read_done = 1; uip_tcp_socket_close(sk, SHUT_RDWR); } static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_len) { struct uip_info *info; struct uip_eth *eth2; struct uip_tcp *tcp2; struct uip_buf *buf; struct uip_ip *ip2; info = sk->info; /* * Get free buffer to send data to guest */ buf = uip_buf_get_free(info); /* * Cook a ethernet frame */ tcp2 = (struct uip_tcp *)buf->eth; eth2 = (struct uip_eth *)buf->eth; ip2 = (struct uip_ip *)buf->eth; eth2->src = info->host_mac; eth2->dst = info->guest_mac; eth2->type = htons(UIP_ETH_P_IP); ip2->vhl = UIP_IP_VER_4 | UIP_IP_HDR_LEN; ip2->tos = 0; ip2->id = 0; ip2->flgfrag = 0; ip2->ttl = UIP_IP_TTL; ip2->proto = UIP_IP_P_TCP; ip2->csum = 0; ip2->sip = sk->dip; ip2->dip = sk->sip; tcp2->sport = sk->dport; tcp2->dport = sk->sport; tcp2->seq = htonl(sk->seq_server); tcp2->ack = htonl(sk->ack_server); /* * Diable TCP options, tcp hdr len equals 20 bytes */ tcp2->off = UIP_TCP_HDR_LEN; tcp2->flg = flag; tcp2->win = htons(UIP_TCP_WIN_SIZE); tcp2->csum = 0; tcp2->urgent = 0; if (payload_len > 0) memcpy(uip_tcp_payload(tcp2), sk->payload, payload_len); ip2->len = htons(uip_tcp_hdrlen(tcp2) + payload_len + uip_ip_hdrlen(ip2)); ip2->csum = uip_csum_ip(ip2); tcp2->csum = uip_csum_tcp(tcp2); /* * virtio_net_hdr */ buf->vnet_len = info->vnet_hdr_len; memset(buf->vnet, 0, buf->vnet_len); buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); /* * Increase server seq */ sk->seq_server += payload_len; /* * Send data received from socket to guest */ uip_buf_set_used(info, buf); return 0; } static void *uip_tcp_socket_thread(void *p) { struct uip_tcp_socket *sk; int len, left, ret; u8 *pos; kvm__set_thread_name("uip-tcp"); sk = p; while (1) { pos = sk->buf; ret = read(sk->fd, sk->buf, UIP_MAX_TCP_PAYLOAD); if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD) goto out; left = ret; while (left > 0) { mutex_lock(sk->lock); while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0) pthread_cond_wait(&sk->cond, &sk->lock->mutex); mutex_unlock(sk->lock); sk->payload = pos; if (len > left) len = left; if (len > UIP_MAX_TCP_PAYLOAD) len = UIP_MAX_TCP_PAYLOAD; left -= len; pos += len; uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, len); } } out: /* * Close server to guest TCP connection */ uip_tcp_socket_close(sk, SHUT_RD); uip_tcp_payload_send(sk, UIP_TCP_FLAG_FIN | UIP_TCP_FLAG_ACK, 0); sk->seq_server += 1; sk->read_done = 1; pthread_exit(NULL); return NULL; } static int uip_tcp_socket_receive(struct uip_tcp_socket *sk) { int ret; if (sk->thread == 0) { sk->buf = malloc(UIP_MAX_TCP_PAYLOAD); if (!sk->buf) return -ENOMEM; ret = pthread_create(&sk->thread, NULL, uip_tcp_socket_thread, (void *)sk); if (ret) free(sk->buf); return ret; } return 0; } static int uip_tcp_socket_send(struct uip_tcp_socket *sk, struct uip_tcp *tcp) { int len; int ret; u8 *payload; if (sk->write_done) return 0; payload = uip_tcp_payload(tcp); len = uip_tcp_payloadlen(tcp); ret = write(sk->fd, payload, len); if (ret != len) pr_warning("tcp send error"); return ret; } int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg) { struct uip_tcp_socket *sk; struct uip_tcp *tcp; struct uip_ip *ip; int ret; tcp = (struct uip_tcp *)arg->eth; ip = (struct uip_ip *)arg->eth; /* * Guest is trying to start a TCP session, let's fake SYN-ACK to guest */ if (uip_tcp_is_syn(tcp)) { sk = uip_tcp_socket_alloc(arg, ip->sip, ip->dip, tcp->sport, tcp->dport); if (!sk) return -1; sk->window_size = ntohs(tcp->win); /* * Setup ISN number */ sk->isn_guest = uip_tcp_isn(tcp); sk->isn_server = uip_tcp_isn_alloc(); sk->seq_server = sk->isn_server; sk->ack_server = sk->isn_guest + 1; uip_tcp_payload_send(sk, UIP_TCP_FLAG_SYN | UIP_TCP_FLAG_ACK, 0); sk->seq_server += 1; /* * Start receive thread for data from remote to guest */ uip_tcp_socket_receive(sk); goto out; } /* * Find socket we have allocated */ sk = uip_tcp_socket_find(arg, ip->sip, ip->dip, tcp->sport, tcp->dport); if (!sk) return -1; mutex_lock(sk->lock); sk->window_size = ntohs(tcp->win); sk->guest_acked = ntohl(tcp->ack); pthread_cond_signal(&sk->cond); mutex_unlock(sk->lock); if (uip_tcp_is_fin(tcp)) { if (sk->write_done) goto out; sk->write_done = 1; sk->ack_server += 1; uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0); /* * Close guest to server TCP connection */ uip_tcp_socket_close(sk, SHUT_WR); goto out; } /* * Ignore guest to server frames with zero tcp payload */ if (uip_tcp_payloadlen(tcp) == 0) goto out; /* * Sent out TCP data to remote host */ ret = uip_tcp_socket_send(sk, tcp); if (ret < 0) return -1; /* * Send ACK to guest imediately */ sk->ack_server += ret; uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0); out: return 0; } void uip_tcp_exit(struct uip_info *info) { struct uip_tcp_socket *sk, *next; mutex_lock(&info->tcp_socket_lock); list_for_each_entry_safe(sk, next, &info->tcp_socket_head, list) uip_tcp_socket_free(sk); mutex_unlock(&info->tcp_socket_lock); }