1
+ #include <assert.h>
2
+ #include <errno.h>
3
+ #include <fcntl.h>
4
+ #include <stdio.h>
5
+ #include <stdlib.h>
6
+ #include <sys/ioctl.h>
7
+ #include <sys/eventfd.h>
8
+ #include <sys/uio.h>
9
+ #include <string.h>
10
+ #include <signal.h>
11
+ #include <stddef.h>
12
+ #include <pthread.h>
13
+ #include <unistd.h>
14
+ #include <poll.h>
15
+ #include <linux/if.h>
16
+ #include <linux/if_tun.h>
17
+
18
+ #include "virtio-net.h"
19
+ #include "err.h"
20
+ #include "vm.h"
21
+ #include "utils.h"
22
+
23
+ #define TAP_INTERFACE "tap%d"
24
+ #define VIRTQ_RX 0
25
+ #define VIRTQ_TX 1
26
+ #define NOTIFY_OFFSET 2
27
+
28
+ static volatile bool thread_stop = false;
29
+
30
+ static int virtio_net_virtq_available_rx (struct virtio_net_dev * dev , int timeout )
31
+ {
32
+ struct pollfd pollfd = (struct pollfd ){
33
+ .fd = dev -> tapfd ,
34
+ .events = POLLIN ,
35
+ };
36
+ return (poll (& pollfd , 1 , timeout ) > 0 ) && (pollfd .revents & POLLIN );
37
+ }
38
+
39
+ static int virtio_net_virtq_available_tx (struct virtio_net_dev * dev , int timeout )
40
+ {
41
+ struct pollfd pollfds []= {
42
+ [0 ] = {.fd = dev -> tx_ioeventfd , .events = POLLIN },
43
+ [1 ] = {.fd = dev -> tapfd , .events = POLLOUT },
44
+ };
45
+
46
+ int ret = poll (pollfds , 2 , timeout );
47
+
48
+ return ret > 0 && (pollfds [0 ].revents & POLLIN ) && (pollfds [1 ].revents & POLLOUT );
49
+ }
50
+
51
+ static void * virtio_net_vq_avail_handler_rx (void * arg ){
52
+ struct virtq * vq = (struct virtq * )arg ;
53
+ struct virtio_net_dev * dev = (struct virtio_net_dev * )vq -> dev ;
54
+
55
+ while (!__atomic_load_n (& thread_stop , __ATOMIC_RELAXED )){
56
+ vq -> guest_event -> flags = VRING_PACKED_EVENT_FLAG_ENABLE ;
57
+ if (virtio_net_virtq_available_rx (dev , -1 ))
58
+ virtq_handle_avail (vq );
59
+ }
60
+ return NULL ;
61
+ }
62
+
63
+ static void * virtio_net_vq_avail_handler_tx (void * arg )
64
+ {
65
+ struct virtq * vq = (struct virtq * ) arg ;
66
+ struct virtio_net_dev * dev = (struct virtio_net_dev * ) vq -> dev ;
67
+
68
+ while (!__atomic_load_n (& thread_stop , __ATOMIC_RELAXED )) {
69
+ vq -> guest_event -> flags = VRING_PACKED_EVENT_FLAG_ENABLE ;
70
+ if (virtio_net_virtq_available_tx (dev , -1 ))
71
+ virtq_handle_avail (vq );
72
+ }
73
+ return NULL ;
74
+ }
75
+
76
+ static void virtio_net_enable_vq_rx (struct virtq * vq )
77
+ {
78
+ struct virtio_net_dev * dev = (struct virtio_net_dev * ) vq -> dev ;
79
+ vm_t * v = container_of (dev , vm_t , virtio_net_dev );
80
+
81
+ if (vq -> info .enable )
82
+ return ;
83
+ vq -> info .enable = true;
84
+ vq -> desc_ring =
85
+ (struct vring_packed_desc * ) vm_guest_to_host (v , vq -> info .desc_addr );
86
+ vq -> device_event = (struct vring_packed_desc_event * ) vm_guest_to_host (
87
+ v , vq -> info .device_addr );
88
+ vq -> guest_event = (struct vring_packed_desc_event * ) vm_guest_to_host (
89
+ v , vq -> info .driver_addr );
90
+ uint64_t addr = virtio_pci_get_notify_addr (& dev -> virtio_pci_dev , vq );
91
+ vm_ioeventfd_register (v , dev -> rx_ioeventfd , addr ,
92
+ NOTIFY_OFFSET , 0 );
93
+ pthread_create (& dev -> rx_thread , NULL , virtio_net_vq_avail_handler_rx ,
94
+ (void * ) vq );
95
+ }
96
+
97
+ static void virtio_net_enable_vq_tx (struct virtq * vq )
98
+ {
99
+ struct virtio_net_dev * dev = (struct virtio_net_dev * ) vq -> dev ;
100
+ vm_t * v = container_of (dev , vm_t , virtio_net_dev );
101
+
102
+ if (vq -> info .enable )
103
+ return ;
104
+ vq -> info .enable = true;
105
+ vq -> desc_ring =
106
+ (struct vring_packed_desc * ) vm_guest_to_host (v , vq -> info .desc_addr );
107
+ vq -> device_event = (struct vring_packed_desc_event * ) vm_guest_to_host (
108
+ v , vq -> info .device_addr );
109
+ vq -> guest_event = (struct vring_packed_desc_event * ) vm_guest_to_host (
110
+ v , vq -> info .driver_addr );
111
+
112
+ uint64_t addr = virtio_pci_get_notify_addr (& dev -> virtio_pci_dev , vq );
113
+ vm_ioeventfd_register (v , dev -> tx_ioeventfd , addr ,
114
+ NOTIFY_OFFSET , 0 );
115
+ pthread_create (& dev -> tx_thread , NULL , virtio_net_vq_avail_handler_tx ,
116
+ (void * ) vq );
117
+ }
118
+
119
+ static void virtio_net_notify_used_rx (struct virtq * vq )
120
+ {
121
+ struct virtio_net_dev * dev = (struct virtio_net_dev * ) vq -> dev ;
122
+ uint64_t n = 1 ;
123
+ if (write (dev -> irqfd , & n , sizeof (n )) < 0 )
124
+ throw_err ("Failed to write the irqfd" );
125
+ }
126
+
127
+ static void virtio_net_notify_used_tx (struct virtq * vq )
128
+ {
129
+ struct virtio_net_dev * dev = (struct virtio_net_dev * ) vq -> dev ;
130
+ uint64_t n = 1 ;
131
+
132
+ if (write (dev -> irqfd , & n , sizeof (n )) < 0 )
133
+ throw_err ("Failed to write the irqfd" );
134
+ }
135
+
136
+ void virtio_net_complete_request_rx (struct virtq * vq ) {
137
+ struct virtio_net_dev * dev = (struct virtio_net_dev * )vq -> dev ;
138
+ vm_t * v = container_of (dev , vm_t , virtio_net_dev );
139
+ struct vring_packed_desc * desc ;
140
+
141
+ while ((desc = virtq_get_avail (vq )) != NULL ){
142
+ uint8_t * data = vm_guest_to_host (v , desc -> addr );
143
+ struct virtio_net_hdr_v1 * virtio_hdr = (struct virtio_net_hdr_v1 * )data ;
144
+ memset (virtio_hdr , 0 , sizeof (struct virtio_net_hdr_v1 ));
145
+
146
+ virtio_hdr -> num_buffers = 1 ;
147
+
148
+ size_t virtio_header_len = sizeof (struct virtio_net_hdr_v1 );
149
+ ssize_t read_bytes = read (dev -> tapfd , data + virtio_header_len , desc -> len - virtio_header_len );
150
+ if (read_bytes < 0 ) {
151
+ vq -> guest_event -> flags = VRING_PACKED_EVENT_FLAG_DISABLE ;
152
+ return ;
153
+ }
154
+ desc -> len = virtio_header_len + read_bytes ;
155
+
156
+ desc -> flags ^= (1ULL << VRING_PACKED_DESC_F_USED );
157
+ dev -> virtio_pci_dev .config .isr_cap .isr_status |= VIRTIO_PCI_ISR_QUEUE ;
158
+ return ;
159
+ }
160
+ vq -> guest_event -> flags = VRING_PACKED_EVENT_FLAG_DISABLE ;
161
+ return ;
162
+ }
163
+
164
+ void virtio_net_complete_request_tx (struct virtq * vq ) {
165
+ struct virtio_net_dev * dev = (struct virtio_net_dev * )vq -> dev ;
166
+ vm_t * v = container_of (dev , vm_t , virtio_net_dev );
167
+ struct vring_packed_desc * desc ;
168
+ while ((desc = virtq_get_avail (vq )) != NULL ){
169
+ uint8_t * data = vm_guest_to_host (v , desc -> addr );
170
+ size_t virtio_header_len = sizeof (struct virtio_net_hdr_v1 );
171
+
172
+ if (desc -> len < virtio_header_len ) {
173
+ vq -> guest_event -> flags = VRING_PACKED_EVENT_FLAG_DISABLE ;
174
+ return ;
175
+ }
176
+
177
+ uint8_t * actual_data = data + virtio_header_len ;
178
+ size_t actual_data_len = desc -> len - virtio_header_len ;
179
+
180
+ struct iovec iov [1 ];
181
+ iov [0 ].iov_base = actual_data ;
182
+ iov [0 ].iov_len = actual_data_len ;
183
+
184
+ ssize_t write_bytes = writev (dev -> tapfd , iov , 1 );
185
+ if (write_bytes < 0 ) {
186
+ vq -> guest_event -> flags = VRING_PACKED_EVENT_FLAG_DISABLE ;
187
+ return ;
188
+ }
189
+ desc -> flags ^= (1ULL << VRING_PACKED_DESC_F_USED );
190
+ dev -> virtio_pci_dev .config .isr_cap .isr_status |= VIRTIO_PCI_ISR_QUEUE ;
191
+ return ;
192
+ }
193
+ vq -> guest_event -> flags = VRING_PACKED_EVENT_FLAG_DISABLE ;
194
+ return ;
195
+ }
196
+
197
+ static struct virtq_ops virtio_net_ops [VIRTIO_NET_VIRTQ_NUM ] = {
198
+ [VIRTQ_RX ] = {
199
+ .enable_vq = virtio_net_enable_vq_rx ,
200
+ .complete_request = virtio_net_complete_request_rx ,
201
+ .notify_used = virtio_net_notify_used_rx ,
202
+ },
203
+ [VIRTQ_TX ] = {
204
+ .enable_vq = virtio_net_enable_vq_tx ,
205
+ .complete_request = virtio_net_complete_request_tx ,
206
+ .notify_used = virtio_net_notify_used_tx
207
+ }
208
+ };
209
+
210
+ bool virtio_net_init (struct virtio_net_dev * virtio_net_dev ){
211
+ memset (virtio_net_dev , 0x00 , sizeof (struct virtio_net_dev ));
212
+
213
+ virtio_net_dev -> tapfd = open ("/dev/net/tun" , O_RDWR );
214
+ if (virtio_net_dev -> tapfd < 0 ){
215
+ return false;
216
+ }
217
+ struct ifreq ifreq = {.ifr_flags = IFF_TAP | IFF_NO_PI };
218
+ strncpy (ifreq .ifr_name , TAP_INTERFACE , sizeof (ifreq .ifr_name ));
219
+ if (ioctl (virtio_net_dev -> tapfd , TUNSETIFF , & ifreq ) < 0 ) {
220
+ fprintf (stderr , "failed to allocate TAP device: %s\n" , strerror (errno ));
221
+ return false;
222
+ }
223
+ assert (fcntl (virtio_net_dev -> tapfd , F_SETFL ,
224
+ fcntl (virtio_net_dev -> tapfd , F_GETFL , 0 ) | O_NONBLOCK ) >= 0 );
225
+ return true;
226
+ }
227
+
228
+ static void virtio_net_setup (struct virtio_net_dev * dev )
229
+ {
230
+ vm_t * v = container_of (dev , vm_t , virtio_net_dev );
231
+
232
+ dev -> enable = true;
233
+ dev -> irq_num = VIRTIO_NET_IRQ ;
234
+ dev -> rx_ioeventfd = eventfd (0 , EFD_CLOEXEC );
235
+ dev -> tx_ioeventfd = eventfd (0 , EFD_CLOEXEC );
236
+ dev -> irqfd = eventfd (0 , EFD_CLOEXEC );
237
+ vm_irqfd_register (v , dev -> irqfd , dev -> irq_num , 0 );
238
+ for (int i = 0 ; i < VIRTIO_NET_VIRTQ_NUM ; i ++ ){
239
+ struct virtq_ops * ops = & virtio_net_ops [i ];
240
+ dev -> vq [i ].info .notify_off = i ;
241
+ virtq_init (& dev -> vq [i ], dev , ops );
242
+ }
243
+ }
244
+
245
+ void virtio_net_init_pci (struct virtio_net_dev * virtio_net_dev ,
246
+ struct pci * pci ,
247
+ struct bus * io_bus ,
248
+ struct bus * mmio_bus ){
249
+ struct virtio_pci_dev * dev = & virtio_net_dev -> virtio_pci_dev ;
250
+ virtio_net_setup (virtio_net_dev );
251
+ virtio_pci_init (dev , pci , io_bus , mmio_bus );
252
+ virtio_pci_set_dev_cfg (dev , & virtio_net_dev -> config ,
253
+ sizeof (virtio_net_dev -> config ));
254
+ virtio_pci_set_pci_hdr (dev , VIRTIO_PCI_DEVICE_ID_NET , VIRTIO_NET_PCI_CLASS ,
255
+ virtio_net_dev -> irq_num );
256
+ dev -> notify_cap -> notify_off_multiplier = NOTIFY_OFFSET ;
257
+ virtio_pci_set_virtq (dev , virtio_net_dev -> vq , VIRTIO_NET_VIRTQ_NUM );
258
+
259
+ virtio_pci_add_feature (dev , VIRTIO_NET_F_MQ );
260
+ virtio_pci_enable (dev );
261
+ }
262
+
263
+ void virtio_net_exit (struct virtio_net_dev * dev )
264
+ {
265
+ if (!dev -> enable )
266
+ return ;
267
+ __atomic_store_n (& thread_stop , true, __ATOMIC_RELAXED );
268
+ pthread_join (dev -> rx_thread , NULL );
269
+ pthread_join (dev -> tx_thread , NULL );
270
+ virtio_pci_exit (& dev -> virtio_pci_dev );
271
+ close (dev -> irqfd );
272
+ close (dev -> rx_ioeventfd );
273
+ close (dev -> tx_ioeventfd );
274
+ close (dev -> tapfd );
275
+ }
0 commit comments