Newer
Older
extern struct sk_buff *sock_alloc_send_skb(struct sock *sk,
unsigned long size,
int noblock,
int *errcode);
extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
unsigned long header_len,
unsigned long data_len,
int noblock,
int *errcode);
extern void *sock_kmalloc(struct sock *sk, int size,
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
extern void sock_kfree_s(struct sock *sk, void *mem, int size);
extern void sk_send_sigurg(struct sock *sk);
/*
* Functions to fill in entries in struct proto_ops when a protocol
* does not implement a particular function.
*/
extern int sock_no_bind(struct socket *,
struct sockaddr *, int);
extern int sock_no_connect(struct socket *,
struct sockaddr *, int, int);
extern int sock_no_socketpair(struct socket *,
struct socket *);
extern int sock_no_accept(struct socket *,
struct socket *, int);
extern int sock_no_getname(struct socket *,
struct sockaddr *, int *, int);
extern unsigned int sock_no_poll(struct file *, struct socket *,
struct poll_table_struct *);
extern int sock_no_ioctl(struct socket *, unsigned int,
unsigned long);
extern int sock_no_listen(struct socket *, int);
extern int sock_no_shutdown(struct socket *, int);
extern int sock_no_getsockopt(struct socket *, int , int,
char __user *, int __user *);
extern int sock_no_setsockopt(struct socket *, int, int,
char __user *, unsigned int);
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
extern int sock_no_sendmsg(struct kiocb *, struct socket *,
struct msghdr *, size_t);
extern int sock_no_recvmsg(struct kiocb *, struct socket *,
struct msghdr *, size_t, int);
extern int sock_no_mmap(struct file *file,
struct socket *sock,
struct vm_area_struct *vma);
extern ssize_t sock_no_sendpage(struct socket *sock,
struct page *page,
int offset, size_t size,
int flags);
/*
* Functions to fill in entries in struct proto_ops when a protocol
* uses the inet style.
*/
extern int sock_common_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen);
extern int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags);
extern int sock_common_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen);
extern int compat_sock_common_getsockopt(struct socket *sock, int level,
int optname, char __user *optval, int __user *optlen);
extern int compat_sock_common_setsockopt(struct socket *sock, int level,
int optname, char __user *optval, unsigned int optlen);
extern void sk_common_release(struct sock *sk);
/*
* Default socket callbacks and setup code
*/
/* Initialise core socket variables */
extern void sock_init_data(struct socket *sock, struct sock *sk);
* sk_filter_release - release a socket filter
* @fp: filter to remove
*
* Remove a filter from a socket and release its resources.
*/
static inline void sk_filter_release(struct sk_filter *fp)
{
if (atomic_dec_and_test(&fp->refcnt))
kfree(fp);
}
static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
{
unsigned int size = sk_filter_len(fp);
atomic_sub(size, &sk->sk_omem_alloc);
sk_filter_release(fp);
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
}
static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
atomic_inc(&fp->refcnt);
atomic_add(sk_filter_len(fp), &sk->sk_omem_alloc);
}
/*
* Socket reference counting postulates.
*
* * Each user of socket SHOULD hold a reference count.
* * Each access point to socket (an hash table bucket, reference from a list,
* running timer, skb in flight MUST hold a reference count.
* * When reference count hits 0, it means it will never increase back.
* * When reference count hits 0, it means that no references from
* outside exist to this socket and current process on current CPU
* is last user and may/should destroy this socket.
* * sk_free is called from any context: process, BH, IRQ. When
* it is called, socket has no references from outside -> sk_free
* may release descendant resources allocated by the socket, but
* to the time when it is called, socket is NOT referenced by any
* hash tables, lists etc.
* * Packets, delivered from outside (from network or from another process)
* and enqueued on receive/error queues SHOULD NOT grab reference count,
* when they sit in queue. Otherwise, packets will leak to hole, when
* socket is looked up by one cpu and unhasing is made by another CPU.
* It is true for udp/raw, netlink (leak to receive and error queues), tcp
* (leak to backlog). Packet socket does all the processing inside
* BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets
* use separate SMP lock, so that they are prone too.
*/
/* Ungrab socket and destroy it, if it was the last reference. */
static inline void sock_put(struct sock *sk)
{
if (atomic_dec_and_test(&sk->sk_refcnt))
sk_free(sk);
}
extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
const int nested);
static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
{
sk->sk_tx_queue_mapping = tx_queue;
}
static inline void sk_tx_queue_clear(struct sock *sk)
{
sk->sk_tx_queue_mapping = -1;
}
static inline int sk_tx_queue_get(const struct sock *sk)
{
return sk->sk_tx_queue_mapping;
}
static inline bool sk_tx_queue_recorded(const struct sock *sk)
{
return (sk && sk->sk_tx_queue_mapping >= 0);
}
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
{
/* Detach socket from process context.
* Announce socket dead, detach it from wait queue and inode.
* Note that parent inode held reference count on this struct sock,
* we do not release it in this function, because protocol
* probably wants some additional cleanups or even continuing
* to work with this socket (TCP).
*/
static inline void sock_orphan(struct sock *sk)
{
write_lock_bh(&sk->sk_callback_lock);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_sleep = NULL;
write_unlock_bh(&sk->sk_callback_lock);
}
static inline void sock_graft(struct sock *sk, struct socket *parent)
{
write_lock_bh(&sk->sk_callback_lock);
sk->sk_sleep = &parent->wait;
parent->sk = sk;
security_sock_graft(sk, parent);
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
write_unlock_bh(&sk->sk_callback_lock);
}
extern int sock_i_uid(struct sock *sk);
extern unsigned long sock_i_ino(struct sock *sk);
static inline struct dst_entry *
__sk_dst_get(struct sock *sk)
{
return sk->sk_dst_cache;
}
static inline struct dst_entry *
sk_dst_get(struct sock *sk)
{
struct dst_entry *dst;
read_lock(&sk->sk_dst_lock);
dst = sk->sk_dst_cache;
if (dst)
dst_hold(dst);
read_unlock(&sk->sk_dst_lock);
return dst;
}
static inline void
__sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
struct dst_entry *old_dst;
old_dst = sk->sk_dst_cache;
sk->sk_dst_cache = dst;
dst_release(old_dst);
}
static inline void
sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
write_lock(&sk->sk_dst_lock);
__sk_dst_set(sk, dst);
write_unlock(&sk->sk_dst_lock);
}
static inline void
__sk_dst_reset(struct sock *sk)
{
struct dst_entry *old_dst;
old_dst = sk->sk_dst_cache;
sk->sk_dst_cache = NULL;
dst_release(old_dst);
}
static inline void
sk_dst_reset(struct sock *sk)
{
write_lock(&sk->sk_dst_lock);
__sk_dst_reset(sk);
write_unlock(&sk->sk_dst_lock);
}
extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
static inline int sk_can_gso(const struct sock *sk)
{
return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
}
extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
static inline int skb_copy_to_page(struct sock *sk, char __user *from,
struct sk_buff *skb, struct page *page,
int off, int copy)
{
if (skb->ip_summed == CHECKSUM_NONE) {
int err = 0;
Al Viro
committed
__wsum csum = csum_and_copy_from_user(from,
page_address(page) + off,
copy, 0, &err);
if (err)
return err;
skb->csum = csum_block_add(skb->csum, csum, skb->len);
} else if (copy_from_user(page_address(page) + off, from, copy))
return -EFAULT;
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
sk->sk_wmem_queued += copy;
sk_mem_charge(sk, copy);
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
/**
* sk_wmem_alloc_get - returns write allocations
* @sk: socket
*
* Returns sk_wmem_alloc minus initial offset of one
*/
static inline int sk_wmem_alloc_get(const struct sock *sk)
{
return atomic_read(&sk->sk_wmem_alloc) - 1;
}
/**
* sk_rmem_alloc_get - returns read allocations
* @sk: socket
*
* Returns sk_rmem_alloc
*/
static inline int sk_rmem_alloc_get(const struct sock *sk)
{
return atomic_read(&sk->sk_rmem_alloc);
}
/**
* sk_has_allocations - check if allocations are outstanding
* @sk: socket
*
* Returns true if socket has write or read allocations
*/
static inline int sk_has_allocations(const struct sock *sk)
{
return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk);
}
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
/**
* sk_has_sleeper - check if there are any waiting processes
* @sk: socket
*
* Returns true if socket has waiting processes
*
* The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory
* barrier call. They were added due to the race found within the tcp code.
*
* Consider following tcp code paths:
*
* CPU1 CPU2
*
* sys_select receive packet
* ... ...
* __add_wait_queue update tp->rcv_nxt
* ... ...
* tp->rcv_nxt check sock_def_readable
* ... {
* schedule ...
* if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
* wake_up_interruptible(sk->sk_sleep)
* ...
* }
*
* The race for tcp fires when the __add_wait_queue changes done by CPU1 stay
* in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1
* could then endup calling schedule and sleep forever if there are no more
* data on the socket.
*
* The sk_has_sleeper is always called right after a call to read_lock, so we
* can use smp_mb__after_lock barrier.
*/
static inline int sk_has_sleeper(struct sock *sk)
{
/*
* We need to be sure we are in sync with the
* add_wait_queue modifications to the wait queue.
*
* This memory barrier is paired in the sock_poll_wait.
*/
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
return sk->sk_sleep && waitqueue_active(sk->sk_sleep);
}
/**
* sock_poll_wait - place memory barrier behind the poll_wait call.
* @filp: file
* @wait_address: socket wait queue
* @p: poll_table
*
* See the comments in the sk_has_sleeper function.
*/
static inline void sock_poll_wait(struct file *filp,
wait_queue_head_t *wait_address, poll_table *p)
{
if (p && wait_address) {
poll_wait(filp, wait_address, p);
/*
* We need to be sure we are in sync with the
* socket flags modification.
*
* This memory barrier is paired in the sk_has_sleeper.
*/
smp_mb();
}
}
/*
* Queue a received datagram if it will fit. Stream and sequenced
* protocols can't normally use this as they need to fit buffers in
* and play with them.
*
* Inlined as it's very short and called for pretty much every
* packet ever received.
*/
static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
{
/*
* We used to take a refcount on sk, but following operation
* is enough to guarantee sk_free() wont free this sock until
* all in-flight packets are completed
*/
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
}
static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
skb->sk = sk;
skb->destructor = sock_rfree;
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
sk_mem_charge(sk, skb->truesize);
}
extern void sk_reset_timer(struct sock *sk, struct timer_list* timer,
unsigned long expires);
extern void sk_stop_timer(struct sock *sk, struct timer_list* timer);
extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
{
/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
number of warnings when compiling with -W --ANK
*/
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf)
return -ENOMEM;
skb_set_owner_r(skb, sk);
skb_queue_tail(&sk->sk_error_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, skb->len);
return 0;
}
/*
* Recover an error report and clear atomically
*/
static inline int sock_error(struct sock *sk)
{
int err;
if (likely(!sk->sk_err))
return 0;
err = xchg(&sk->sk_err, 0);
return -err;
}
static inline unsigned long sock_wspace(struct sock *sk)
{
int amt = 0;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
if (amt < 0)
amt = 0;
}
return amt;
}
static inline void sk_wake_async(struct sock *sk, int how, int band)
{
sock_wake_async(sk->sk_socket, how, band);
}
#define SOCK_MIN_SNDBUF 2048
#define SOCK_MIN_RCVBUF 256
static inline void sk_stream_moderate_sndbuf(struct sock *sk)
{
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
sk->sk_sndbuf = max(sk->sk_sndbuf, SOCK_MIN_SNDBUF);
}
}
struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
static inline struct page *sk_stream_alloc_page(struct sock *sk)
{
struct page *page = NULL;
page = alloc_pages(sk->sk_allocation, 0);
if (!page) {
sk->sk_prot->enter_memory_pressure(sk);
sk_stream_moderate_sndbuf(sk);
}
return page;
}
/*
* Default write policy as shown to user space via poll/select/SIGIO
*/
static inline int sock_writeable(const struct sock *sk)
{
return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
}
static inline long sock_rcvtimeo(const struct sock *sk, int noblock)
{
return noblock ? 0 : sk->sk_rcvtimeo;
}
static inline long sock_sndtimeo(const struct sock *sk, int noblock)
{
return noblock ? 0 : sk->sk_sndtimeo;
}
static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len)
{
return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1;
}
/* Alas, with timeout socket operations are not restartable.
* Compare this to poll().
*/
static inline int sock_intr_errno(long timeo)
{
return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
}
extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
static __inline__ void
sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
{
struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
/*
* generate control messages if
* - receive time stamping in software requested (SOCK_RCVTSTAMP
* or SOCK_TIMESTAMPING_RX_SOFTWARE)
* - software time stamp available and wanted
* (SOCK_TIMESTAMPING_SOFTWARE)
* - hardware time stamps available and wanted
* (SOCK_TIMESTAMPING_SYS_HARDWARE or
* SOCK_TIMESTAMPING_RAW_HARDWARE)
*/
if (sock_flag(sk, SOCK_RCVTSTAMP) ||
sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) ||
(kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) ||
(hwtstamps->hwtstamp.tv64 &&
sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) ||
(hwtstamps->syststamp.tv64 &&
sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)))
__sock_recv_timestamp(msg, sk, skb);
else
extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb);
/**
* sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
* @msg: outgoing packet
* @sk: socket sending this packet
* @shtx: filled with instructions for time stamping
*
* Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if
* parameters are invalid.
*/
extern int sock_tx_timestamp(struct msghdr *msg,
struct sock *sk,
union skb_shared_tx *shtx);
/**
* sk_eat_skb - Release a skb if it is no longer needed
* @sk: socket to eat this skb from
* @skb: socket buffer to eat
* @copied_early: flag indicating whether DMA operations copied this data early
*
* This routine must be called with interrupts disabled or with the socket
* locked so that the sk_buff queue operation is ok.
*/
#ifdef CONFIG_NET_DMA
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early)
{
__skb_unlink(skb, &sk->sk_receive_queue);
if (!copied_early)
__kfree_skb(skb);
else
__skb_queue_tail(&sk->sk_async_wait_queue, skb);
}
#else
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early)
{
__skb_unlink(skb, &sk->sk_receive_queue);
__kfree_skb(skb);
}
static inline
struct net *sock_net(const struct sock *sk)
{
#ifdef CONFIG_NET_NS
return sk->sk_net;
#else
return &init_net;
#endif
}
static inline
void sock_net_set(struct sock *sk, struct net *net)
{
#ifdef CONFIG_NET_NS
sk->sk_net = net;
#endif
}
Denis V. Lunev
committed
/*
* Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace.
* They should not hold a referrence to a namespace in order to allow
* to stop it.
* Sockets after sk_change_net should be released using sk_release_kernel
*/
static inline void sk_change_net(struct sock *sk, struct net *net)
{
put_net(sock_net(sk));
sock_net_set(sk, hold_net(net));
Denis V. Lunev
committed
}
static inline struct sock *skb_steal_sock(struct sk_buff *skb)
{
if (unlikely(skb->sk)) {
struct sock *sk = skb->sk;
skb->destructor = NULL;
skb->sk = NULL;
return sk;
}
return NULL;
}
extern void sock_enable_timestamp(struct sock *sk, int flag);
extern int sock_get_timestamp(struct sock *, struct timeval __user *);
Eric Dumazet
committed
extern int sock_get_timestampns(struct sock *, struct timespec __user *);
extern int net_msg_warn;
#define NETDEBUG(fmt, args...) \
do { if (net_msg_warn) printk(fmt,##args); } while (0)
#define LIMIT_NETDEBUG(fmt, args...) \
do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0)
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;