logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

io_uring.h (22346B)


  1. /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
  2. /*
  3. * Header file for the io_uring interface.
  4. *
  5. * Copyright (C) 2019 Jens Axboe
  6. * Copyright (C) 2019 Christoph Hellwig
  7. */
  8. #ifndef LINUX_IO_URING_H
  9. #define LINUX_IO_URING_H
  10. #include <linux/fs.h>
  11. #include <linux/types.h>
  12. /*
  13. * this file is shared with liburing and that has to autodetect
  14. * if linux/time_types.h is available or not, it can
  15. * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H
  16. * if linux/time_types.h is not available
  17. */
  18. #ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H
  19. #include <linux/time_types.h>
  20. #endif
  21. #ifdef __cplusplus
  22. extern "C" {
  23. #endif
  24. /*
  25. * IO submission data structure (Submission Queue Entry)
  26. */
  27. struct io_uring_sqe {
  28. __u8 opcode; /* type of operation for this sqe */
  29. __u8 flags; /* IOSQE_ flags */
  30. __u16 ioprio; /* ioprio for the request */
  31. __s32 fd; /* file descriptor to do IO on */
  32. union {
  33. __u64 off; /* offset into file */
  34. __u64 addr2;
  35. struct {
  36. __u32 cmd_op;
  37. __u32 __pad1;
  38. };
  39. };
  40. union {
  41. __u64 addr; /* pointer to buffer or iovecs */
  42. __u64 splice_off_in;
  43. struct {
  44. __u32 level;
  45. __u32 optname;
  46. };
  47. };
  48. __u32 len; /* buffer size or number of iovecs */
  49. union {
  50. __kernel_rwf_t rw_flags;
  51. __u32 fsync_flags;
  52. __u16 poll_events; /* compatibility */
  53. __u32 poll32_events; /* word-reversed for BE */
  54. __u32 sync_range_flags;
  55. __u32 msg_flags;
  56. __u32 timeout_flags;
  57. __u32 accept_flags;
  58. __u32 cancel_flags;
  59. __u32 open_flags;
  60. __u32 statx_flags;
  61. __u32 fadvise_advice;
  62. __u32 splice_flags;
  63. __u32 rename_flags;
  64. __u32 unlink_flags;
  65. __u32 hardlink_flags;
  66. __u32 xattr_flags;
  67. __u32 msg_ring_flags;
  68. __u32 uring_cmd_flags;
  69. __u32 waitid_flags;
  70. __u32 futex_flags;
  71. __u32 install_fd_flags;
  72. __u32 nop_flags;
  73. };
  74. __u64 user_data; /* data to be passed back at completion time */
  75. /* pack this to avoid bogus arm OABI complaints */
  76. union {
  77. /* index into fixed buffers, if used */
  78. __u16 buf_index;
  79. /* for grouped buffer selection */
  80. __u16 buf_group;
  81. } __attribute__((packed));
  82. /* personality to use, if used */
  83. __u16 personality;
  84. union {
  85. __s32 splice_fd_in;
  86. __u32 file_index;
  87. __u32 optlen;
  88. struct {
  89. __u16 addr_len;
  90. __u16 __pad3[1];
  91. };
  92. };
  93. union {
  94. struct {
  95. __u64 addr3;
  96. __u64 __pad2[1];
  97. };
  98. __u64 optval;
  99. /*
  100. * If the ring is initialized with IORING_SETUP_SQE128, then
  101. * this field is used for 80 bytes of arbitrary command data
  102. */
  103. __u8 cmd[0];
  104. };
  105. };
  106. /*
  107. * If sqe->file_index is set to this for opcodes that instantiate a new
  108. * direct descriptor (like openat/openat2/accept), then io_uring will allocate
  109. * an available direct descriptor instead of having the application pass one
  110. * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
  111. * if the space is full.
  112. */
  113. #define IORING_FILE_INDEX_ALLOC (~0U)
  114. enum io_uring_sqe_flags_bit {
  115. IOSQE_FIXED_FILE_BIT,
  116. IOSQE_IO_DRAIN_BIT,
  117. IOSQE_IO_LINK_BIT,
  118. IOSQE_IO_HARDLINK_BIT,
  119. IOSQE_ASYNC_BIT,
  120. IOSQE_BUFFER_SELECT_BIT,
  121. IOSQE_CQE_SKIP_SUCCESS_BIT,
  122. };
  123. /*
  124. * sqe->flags
  125. */
  126. /* use fixed fileset */
  127. #define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT)
  128. /* issue after inflight IO */
  129. #define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT)
  130. /* links next sqe */
  131. #define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT)
  132. /* like LINK, but stronger */
  133. #define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
  134. /* always go async */
  135. #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
  136. /* select buffer from sqe->buf_group */
  137. #define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
  138. /* don't post CQE if request succeeded */
  139. #define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT)
  140. /*
  141. * io_uring_setup() flags
  142. */
  143. #define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
  144. #define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
  145. #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
  146. #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
  147. #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
  148. #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
  149. #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
  150. #define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
  151. /*
  152. * Cooperative task running. When requests complete, they often require
  153. * forcing the submitter to transition to the kernel to complete. If this
  154. * flag is set, work will be done when the task transitions anyway, rather
  155. * than force an inter-processor interrupt reschedule. This avoids interrupting
  156. * a task running in userspace, and saves an IPI.
  157. */
  158. #define IORING_SETUP_COOP_TASKRUN (1U << 8)
  159. /*
  160. * If COOP_TASKRUN is set, get notified if task work is available for
  161. * running and a kernel transition would be needed to run it. This sets
  162. * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
  163. */
  164. #define IORING_SETUP_TASKRUN_FLAG (1U << 9)
  165. #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
  166. #define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */
  167. /*
  168. * Only one task is allowed to submit requests
  169. */
  170. #define IORING_SETUP_SINGLE_ISSUER (1U << 12)
  171. /*
  172. * Defer running task work to get events.
  173. * Rather than running bits of task work whenever the task transitions
  174. * try to do it just before it is needed.
  175. */
  176. #define IORING_SETUP_DEFER_TASKRUN (1U << 13)
  177. /*
  178. * Application provides the memory for the rings
  179. */
  180. #define IORING_SETUP_NO_MMAP (1U << 14)
  181. /*
  182. * Register the ring fd in itself for use with
  183. * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather
  184. * than an fd.
  185. */
  186. #define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15)
  187. /*
  188. * Removes indirection through the SQ index array.
  189. */
  190. #define IORING_SETUP_NO_SQARRAY (1U << 16)
  191. enum io_uring_op {
  192. IORING_OP_NOP,
  193. IORING_OP_READV,
  194. IORING_OP_WRITEV,
  195. IORING_OP_FSYNC,
  196. IORING_OP_READ_FIXED,
  197. IORING_OP_WRITE_FIXED,
  198. IORING_OP_POLL_ADD,
  199. IORING_OP_POLL_REMOVE,
  200. IORING_OP_SYNC_FILE_RANGE,
  201. IORING_OP_SENDMSG,
  202. IORING_OP_RECVMSG,
  203. IORING_OP_TIMEOUT,
  204. IORING_OP_TIMEOUT_REMOVE,
  205. IORING_OP_ACCEPT,
  206. IORING_OP_ASYNC_CANCEL,
  207. IORING_OP_LINK_TIMEOUT,
  208. IORING_OP_CONNECT,
  209. IORING_OP_FALLOCATE,
  210. IORING_OP_OPENAT,
  211. IORING_OP_CLOSE,
  212. IORING_OP_FILES_UPDATE,
  213. IORING_OP_STATX,
  214. IORING_OP_READ,
  215. IORING_OP_WRITE,
  216. IORING_OP_FADVISE,
  217. IORING_OP_MADVISE,
  218. IORING_OP_SEND,
  219. IORING_OP_RECV,
  220. IORING_OP_OPENAT2,
  221. IORING_OP_EPOLL_CTL,
  222. IORING_OP_SPLICE,
  223. IORING_OP_PROVIDE_BUFFERS,
  224. IORING_OP_REMOVE_BUFFERS,
  225. IORING_OP_TEE,
  226. IORING_OP_SHUTDOWN,
  227. IORING_OP_RENAMEAT,
  228. IORING_OP_UNLINKAT,
  229. IORING_OP_MKDIRAT,
  230. IORING_OP_SYMLINKAT,
  231. IORING_OP_LINKAT,
  232. IORING_OP_MSG_RING,
  233. IORING_OP_FSETXATTR,
  234. IORING_OP_SETXATTR,
  235. IORING_OP_FGETXATTR,
  236. IORING_OP_GETXATTR,
  237. IORING_OP_SOCKET,
  238. IORING_OP_URING_CMD,
  239. IORING_OP_SEND_ZC,
  240. IORING_OP_SENDMSG_ZC,
  241. IORING_OP_READ_MULTISHOT,
  242. IORING_OP_WAITID,
  243. IORING_OP_FUTEX_WAIT,
  244. IORING_OP_FUTEX_WAKE,
  245. IORING_OP_FUTEX_WAITV,
  246. IORING_OP_FIXED_FD_INSTALL,
  247. IORING_OP_FTRUNCATE,
  248. IORING_OP_BIND,
  249. IORING_OP_LISTEN,
  250. /* this goes last, obviously */
  251. IORING_OP_LAST,
  252. };
  253. /*
  254. * sqe->uring_cmd_flags top 8bits aren't available for userspace
  255. * IORING_URING_CMD_FIXED use registered buffer; pass this flag
  256. * along with setting sqe->buf_index.
  257. */
  258. #define IORING_URING_CMD_FIXED (1U << 0)
  259. #define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED
  260. /*
  261. * sqe->fsync_flags
  262. */
  263. #define IORING_FSYNC_DATASYNC (1U << 0)
  264. /*
  265. * sqe->timeout_flags
  266. */
  267. #define IORING_TIMEOUT_ABS (1U << 0)
  268. #define IORING_TIMEOUT_UPDATE (1U << 1)
  269. #define IORING_TIMEOUT_BOOTTIME (1U << 2)
  270. #define IORING_TIMEOUT_REALTIME (1U << 3)
  271. #define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
  272. #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
  273. #define IORING_TIMEOUT_MULTISHOT (1U << 6)
  274. #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
  275. #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
  276. /*
  277. * sqe->splice_flags
  278. * extends splice(2) flags
  279. */
  280. #define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
  281. /*
  282. * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
  283. * command flags for POLL_ADD are stored in sqe->len.
  284. *
  285. * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if
  286. * the poll handler will continue to report
  287. * CQEs on behalf of the same SQE.
  288. *
  289. * IORING_POLL_UPDATE Update existing poll request, matching
  290. * sqe->addr as the old user_data field.
  291. *
  292. * IORING_POLL_LEVEL Level triggered poll.
  293. */
  294. #define IORING_POLL_ADD_MULTI (1U << 0)
  295. #define IORING_POLL_UPDATE_EVENTS (1U << 1)
  296. #define IORING_POLL_UPDATE_USER_DATA (1U << 2)
  297. #define IORING_POLL_ADD_LEVEL (1U << 3)
  298. /*
  299. * ASYNC_CANCEL flags.
  300. *
  301. * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key
  302. * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the
  303. * request 'user_data'
  304. * IORING_ASYNC_CANCEL_ANY Match any request
  305. * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor
  306. * IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key
  307. * IORING_ASYNC_CANCEL_OP Match request based on opcode
  308. */
  309. #define IORING_ASYNC_CANCEL_ALL (1U << 0)
  310. #define IORING_ASYNC_CANCEL_FD (1U << 1)
  311. #define IORING_ASYNC_CANCEL_ANY (1U << 2)
  312. #define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3)
  313. #define IORING_ASYNC_CANCEL_USERDATA (1U << 4)
  314. #define IORING_ASYNC_CANCEL_OP (1U << 5)
  315. /*
  316. * send/sendmsg and recv/recvmsg flags (sqe->ioprio)
  317. *
  318. * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send
  319. * or receive and arm poll if that yields an
  320. * -EAGAIN result, arm poll upfront and skip
  321. * the initial transfer attempt.
  322. *
  323. * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if
  324. * the handler will continue to report
  325. * CQEs on behalf of the same SQE.
  326. *
  327. * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in
  328. * the buf_index field.
  329. *
  330. * IORING_SEND_ZC_REPORT_USAGE
  331. * If set, SEND[MSG]_ZC should report
  332. * the zerocopy usage in cqe.res
  333. * for the IORING_CQE_F_NOTIF cqe.
  334. * 0 is reported if zerocopy was actually possible.
  335. * IORING_NOTIF_USAGE_ZC_COPIED if data was copied
  336. * (at least partially).
  337. *
  338. * IORING_RECVSEND_BUNDLE Used with IOSQE_BUFFER_SELECT. If set, send or
  339. * recv will grab as many buffers from the buffer
  340. * group ID given and send them all. The completion
  341. * result will be the number of buffers send, with
  342. * the starting buffer ID in cqe->flags as per
  343. * usual for provided buffer usage. The buffers
  344. * will be contigious from the starting buffer ID.
  345. */
  346. #define IORING_RECVSEND_POLL_FIRST (1U << 0)
  347. #define IORING_RECV_MULTISHOT (1U << 1)
  348. #define IORING_RECVSEND_FIXED_BUF (1U << 2)
  349. #define IORING_SEND_ZC_REPORT_USAGE (1U << 3)
  350. #define IORING_RECVSEND_BUNDLE (1U << 4)
  351. /*
  352. * cqe.res for IORING_CQE_F_NOTIF if
  353. * IORING_SEND_ZC_REPORT_USAGE was requested
  354. *
  355. * It should be treated as a flag, all other
  356. * bits of cqe.res should be treated as reserved!
  357. */
  358. #define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31)
  359. /*
  360. * accept flags stored in sqe->ioprio
  361. */
  362. #define IORING_ACCEPT_MULTISHOT (1U << 0)
  363. #define IORING_ACCEPT_DONTWAIT (1U << 1)
  364. #define IORING_ACCEPT_POLL_FIRST (1U << 2)
  365. /*
  366. * IORING_OP_MSG_RING command types, stored in sqe->addr
  367. */
  368. enum io_uring_msg_ring_flags {
  369. IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */
  370. IORING_MSG_SEND_FD, /* send a registered fd to another ring */
  371. };
  372. /*
  373. * IORING_OP_MSG_RING flags (sqe->msg_ring_flags)
  374. *
  375. * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not
  376. * applicable for IORING_MSG_DATA, obviously.
  377. */
  378. #define IORING_MSG_RING_CQE_SKIP (1U << 0)
  379. /* Pass through the flags from sqe->file_index to cqe->flags */
  380. #define IORING_MSG_RING_FLAGS_PASS (1U << 1)
  381. /*
  382. * IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags)
  383. *
  384. * IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC
  385. */
  386. #define IORING_FIXED_FD_NO_CLOEXEC (1U << 0)
  387. /*
  388. * IORING_OP_NOP flags (sqe->nop_flags)
  389. *
  390. * IORING_NOP_INJECT_RESULT Inject result from sqe->result
  391. */
  392. #define IORING_NOP_INJECT_RESULT (1U << 0)
  393. /*
  394. * IO completion data structure (Completion Queue Entry)
  395. */
  396. struct io_uring_cqe {
  397. __u64 user_data; /* sqe->user_data value passed back */
  398. __s32 res; /* result code for this event */
  399. __u32 flags;
  400. /*
  401. * If the ring is initialized with IORING_SETUP_CQE32, then this field
  402. * contains 16-bytes of padding, doubling the size of the CQE.
  403. */
  404. __u64 big_cqe[];
  405. };
  406. /*
  407. * cqe->flags
  408. *
  409. * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
  410. * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
  411. * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
  412. * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct
  413. * them from sends.
  414. * IORING_CQE_F_BUF_MORE If set, the buffer ID set in the completion will get
  415. * more completions. In other words, the buffer is being
  416. * partially consumed, and will be used by the kernel for
  417. * more completions. This is only set for buffers used via
  418. * the incremental buffer consumption, as provided by
  419. * a ring buffer setup with IOU_PBUF_RING_INC. For any
  420. * other provided buffer type, all completions with a
  421. * buffer passed back is automatically returned to the
  422. * application.
  423. */
  424. #define IORING_CQE_F_BUFFER (1U << 0)
  425. #define IORING_CQE_F_MORE (1U << 1)
  426. #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
  427. #define IORING_CQE_F_NOTIF (1U << 3)
  428. #define IORING_CQE_F_BUF_MORE (1U << 4)
  429. #define IORING_CQE_BUFFER_SHIFT 16
  430. /*
  431. * Magic offsets for the application to mmap the data it needs
  432. */
  433. #define IORING_OFF_SQ_RING 0ULL
  434. #define IORING_OFF_CQ_RING 0x8000000ULL
  435. #define IORING_OFF_SQES 0x10000000ULL
  436. #define IORING_OFF_PBUF_RING 0x80000000ULL
  437. #define IORING_OFF_PBUF_SHIFT 16
  438. #define IORING_OFF_MMAP_MASK 0xf8000000ULL
  439. /*
  440. * Filled with the offset for mmap(2)
  441. */
  442. struct io_sqring_offsets {
  443. __u32 head;
  444. __u32 tail;
  445. __u32 ring_mask;
  446. __u32 ring_entries;
  447. __u32 flags;
  448. __u32 dropped;
  449. __u32 array;
  450. __u32 resv1;
  451. __u64 user_addr;
  452. };
  453. /*
  454. * sq_ring->flags
  455. */
  456. #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
  457. #define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */
  458. #define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */
  459. struct io_cqring_offsets {
  460. __u32 head;
  461. __u32 tail;
  462. __u32 ring_mask;
  463. __u32 ring_entries;
  464. __u32 overflow;
  465. __u32 cqes;
  466. __u32 flags;
  467. __u32 resv1;
  468. __u64 user_addr;
  469. };
  470. /*
  471. * cq_ring->flags
  472. */
  473. /* disable eventfd notifications */
  474. #define IORING_CQ_EVENTFD_DISABLED (1U << 0)
  475. /*
  476. * io_uring_enter(2) flags
  477. */
  478. #define IORING_ENTER_GETEVENTS (1U << 0)
  479. #define IORING_ENTER_SQ_WAKEUP (1U << 1)
  480. #define IORING_ENTER_SQ_WAIT (1U << 2)
  481. #define IORING_ENTER_EXT_ARG (1U << 3)
  482. #define IORING_ENTER_REGISTERED_RING (1U << 4)
  483. #define IORING_ENTER_ABS_TIMER (1U << 5)
  484. /*
  485. * Passed in for io_uring_setup(2). Copied back with updated info on success
  486. */
  487. struct io_uring_params {
  488. __u32 sq_entries;
  489. __u32 cq_entries;
  490. __u32 flags;
  491. __u32 sq_thread_cpu;
  492. __u32 sq_thread_idle;
  493. __u32 features;
  494. __u32 wq_fd;
  495. __u32 resv[3];
  496. struct io_sqring_offsets sq_off;
  497. struct io_cqring_offsets cq_off;
  498. };
  499. /*
  500. * io_uring_params->features flags
  501. */
  502. #define IORING_FEAT_SINGLE_MMAP (1U << 0)
  503. #define IORING_FEAT_NODROP (1U << 1)
  504. #define IORING_FEAT_SUBMIT_STABLE (1U << 2)
  505. #define IORING_FEAT_RW_CUR_POS (1U << 3)
  506. #define IORING_FEAT_CUR_PERSONALITY (1U << 4)
  507. #define IORING_FEAT_FAST_POLL (1U << 5)
  508. #define IORING_FEAT_POLL_32BITS (1U << 6)
  509. #define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
  510. #define IORING_FEAT_EXT_ARG (1U << 8)
  511. #define IORING_FEAT_NATIVE_WORKERS (1U << 9)
  512. #define IORING_FEAT_RSRC_TAGS (1U << 10)
  513. #define IORING_FEAT_CQE_SKIP (1U << 11)
  514. #define IORING_FEAT_LINKED_FILE (1U << 12)
  515. #define IORING_FEAT_REG_REG_RING (1U << 13)
  516. #define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
  517. #define IORING_FEAT_MIN_TIMEOUT (1U << 15)
  518. /*
  519. * io_uring_register(2) opcodes and arguments
  520. */
  521. enum io_uring_register_op {
  522. IORING_REGISTER_BUFFERS = 0,
  523. IORING_UNREGISTER_BUFFERS = 1,
  524. IORING_REGISTER_FILES = 2,
  525. IORING_UNREGISTER_FILES = 3,
  526. IORING_REGISTER_EVENTFD = 4,
  527. IORING_UNREGISTER_EVENTFD = 5,
  528. IORING_REGISTER_FILES_UPDATE = 6,
  529. IORING_REGISTER_EVENTFD_ASYNC = 7,
  530. IORING_REGISTER_PROBE = 8,
  531. IORING_REGISTER_PERSONALITY = 9,
  532. IORING_UNREGISTER_PERSONALITY = 10,
  533. IORING_REGISTER_RESTRICTIONS = 11,
  534. IORING_REGISTER_ENABLE_RINGS = 12,
  535. /* extended with tagging */
  536. IORING_REGISTER_FILES2 = 13,
  537. IORING_REGISTER_FILES_UPDATE2 = 14,
  538. IORING_REGISTER_BUFFERS2 = 15,
  539. IORING_REGISTER_BUFFERS_UPDATE = 16,
  540. /* set/clear io-wq thread affinities */
  541. IORING_REGISTER_IOWQ_AFF = 17,
  542. IORING_UNREGISTER_IOWQ_AFF = 18,
  543. /* set/get max number of io-wq workers */
  544. IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
  545. /* register/unregister io_uring fd with the ring */
  546. IORING_REGISTER_RING_FDS = 20,
  547. IORING_UNREGISTER_RING_FDS = 21,
  548. /* register ring based provide buffer group */
  549. IORING_REGISTER_PBUF_RING = 22,
  550. IORING_UNREGISTER_PBUF_RING = 23,
  551. /* sync cancelation API */
  552. IORING_REGISTER_SYNC_CANCEL = 24,
  553. /* register a range of fixed file slots for automatic slot allocation */
  554. IORING_REGISTER_FILE_ALLOC_RANGE = 25,
  555. /* return status information for a buffer group */
  556. IORING_REGISTER_PBUF_STATUS = 26,
  557. /* set/clear busy poll settings */
  558. IORING_REGISTER_NAPI = 27,
  559. IORING_UNREGISTER_NAPI = 28,
  560. IORING_REGISTER_CLOCK = 29,
  561. /* clone registered buffers from source ring to current ring */
  562. IORING_REGISTER_CLONE_BUFFERS = 30,
  563. /* this goes last */
  564. IORING_REGISTER_LAST,
  565. /* flag added to the opcode to use a registered ring fd */
  566. IORING_REGISTER_USE_REGISTERED_RING = 1U << 31
  567. };
  568. /* io-wq worker categories */
  569. enum io_wq_type {
  570. IO_WQ_BOUND,
  571. IO_WQ_UNBOUND,
  572. };
  573. /* deprecated, see struct io_uring_rsrc_update */
  574. struct io_uring_files_update {
  575. __u32 offset;
  576. __u32 resv;
  577. __aligned_u64 /* __s32 * */ fds;
  578. };
  579. /*
  580. * Register a fully sparse file space, rather than pass in an array of all
  581. * -1 file descriptors.
  582. */
  583. #define IORING_RSRC_REGISTER_SPARSE (1U << 0)
  584. struct io_uring_rsrc_register {
  585. __u32 nr;
  586. __u32 flags;
  587. __u64 resv2;
  588. __aligned_u64 data;
  589. __aligned_u64 tags;
  590. };
  591. struct io_uring_rsrc_update {
  592. __u32 offset;
  593. __u32 resv;
  594. __aligned_u64 data;
  595. };
  596. struct io_uring_rsrc_update2 {
  597. __u32 offset;
  598. __u32 resv;
  599. __aligned_u64 data;
  600. __aligned_u64 tags;
  601. __u32 nr;
  602. __u32 resv2;
  603. };
  604. /* Skip updating fd indexes set to this value in the fd table */
  605. #define IORING_REGISTER_FILES_SKIP (-2)
  606. #define IO_URING_OP_SUPPORTED (1U << 0)
  607. struct io_uring_probe_op {
  608. __u8 op;
  609. __u8 resv;
  610. __u16 flags; /* IO_URING_OP_* flags */
  611. __u32 resv2;
  612. };
  613. struct io_uring_probe {
  614. __u8 last_op; /* last opcode supported */
  615. __u8 ops_len; /* length of ops[] array below */
  616. __u16 resv;
  617. __u32 resv2[3];
  618. struct io_uring_probe_op ops[];
  619. };
  620. struct io_uring_restriction {
  621. __u16 opcode;
  622. union {
  623. __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */
  624. __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */
  625. __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */
  626. };
  627. __u8 resv;
  628. __u32 resv2[3];
  629. };
  630. struct io_uring_clock_register {
  631. __u32 clockid;
  632. __u32 __resv[3];
  633. };
  634. enum {
  635. IORING_REGISTER_SRC_REGISTERED = 1,
  636. };
  637. struct io_uring_clone_buffers {
  638. __u32 src_fd;
  639. __u32 flags;
  640. __u32 pad[6];
  641. };
  642. struct io_uring_buf {
  643. __u64 addr;
  644. __u32 len;
  645. __u16 bid;
  646. __u16 resv;
  647. };
  648. struct io_uring_buf_ring {
  649. union {
  650. /*
  651. * To avoid spilling into more pages than we need to, the
  652. * ring tail is overlaid with the io_uring_buf->resv field.
  653. */
  654. struct {
  655. __u64 resv1;
  656. __u32 resv2;
  657. __u16 resv3;
  658. __u16 tail;
  659. };
  660. __DECLARE_FLEX_ARRAY(struct io_uring_buf, bufs);
  661. };
  662. };
  663. /*
  664. * Flags for IORING_REGISTER_PBUF_RING.
  665. *
  666. * IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring.
  667. * The application must not set a ring_addr in struct
  668. * io_uring_buf_reg, instead it must subsequently call
  669. * mmap(2) with the offset set as:
  670. * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT)
  671. * to get a virtual mapping for the ring.
  672. * IOU_PBUF_RING_INC: If set, buffers consumed from this buffer ring can be
  673. * consumed incrementally. Normally one (or more) buffers
  674. * are fully consumed. With incremental consumptions, it's
  675. * feasible to register big ranges of buffers, and each
  676. * use of it will consume only as much as it needs. This
  677. * requires that both the kernel and application keep
  678. * track of where the current read/recv index is at.
  679. */
  680. enum io_uring_register_pbuf_ring_flags {
  681. IOU_PBUF_RING_MMAP = 1,
  682. IOU_PBUF_RING_INC = 2,
  683. };
  684. /* argument for IORING_(UN)REGISTER_PBUF_RING */
  685. struct io_uring_buf_reg {
  686. __u64 ring_addr;
  687. __u32 ring_entries;
  688. __u16 bgid;
  689. __u16 flags;
  690. __u64 resv[3];
  691. };
  692. /* argument for IORING_REGISTER_PBUF_STATUS */
  693. struct io_uring_buf_status {
  694. __u32 buf_group; /* input */
  695. __u32 head; /* output */
  696. __u32 resv[8];
  697. };
  698. /* argument for IORING_(UN)REGISTER_NAPI */
  699. struct io_uring_napi {
  700. __u32 busy_poll_to;
  701. __u8 prefer_busy_poll;
  702. __u8 pad[3];
  703. __u64 resv;
  704. };
  705. /*
  706. * io_uring_restriction->opcode values
  707. */
  708. enum io_uring_register_restriction_op {
  709. /* Allow an io_uring_register(2) opcode */
  710. IORING_RESTRICTION_REGISTER_OP = 0,
  711. /* Allow an sqe opcode */
  712. IORING_RESTRICTION_SQE_OP = 1,
  713. /* Allow sqe flags */
  714. IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2,
  715. /* Require sqe flags (these flags must be set on each submission) */
  716. IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3,
  717. IORING_RESTRICTION_LAST
  718. };
  719. struct io_uring_getevents_arg {
  720. __u64 sigmask;
  721. __u32 sigmask_sz;
  722. __u32 min_wait_usec;
  723. __u64 ts;
  724. };
  725. /*
  726. * Argument for IORING_REGISTER_SYNC_CANCEL
  727. */
  728. struct io_uring_sync_cancel_reg {
  729. __u64 addr;
  730. __s32 fd;
  731. __u32 flags;
  732. struct __kernel_timespec timeout;
  733. __u8 opcode;
  734. __u8 pad[7];
  735. __u64 pad2[3];
  736. };
  737. /*
  738. * Argument for IORING_REGISTER_FILE_ALLOC_RANGE
  739. * The range is specified as [off, off + len)
  740. */
  741. struct io_uring_file_index_range {
  742. __u32 off;
  743. __u32 len;
  744. __u64 resv;
  745. };
  746. struct io_uring_recvmsg_out {
  747. __u32 namelen;
  748. __u32 controllen;
  749. __u32 payloadlen;
  750. __u32 flags;
  751. };
  752. /*
  753. * Argument for IORING_OP_URING_CMD when file is a socket
  754. */
  755. enum io_uring_socket_op {
  756. SOCKET_URING_OP_SIOCINQ = 0,
  757. SOCKET_URING_OP_SIOCOUTQ,
  758. SOCKET_URING_OP_GETSOCKOPT,
  759. SOCKET_URING_OP_SETSOCKOPT,
  760. };
  761. #ifdef __cplusplus
  762. }
  763. #endif
  764. #endif