Newer
Older
return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk);
}
* skwq_has_sleeper - check if there are any waiting processes
* Returns true if socket_wq has waiting processes
* The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory
* barrier call. They were added due to the race found within the tcp code.
*
* Consider following tcp code paths::
* CPU1 CPU2
* sys_select receive packet
* ... ...
* __add_wait_queue update tp->rcv_nxt
* ... ...
* tp->rcv_nxt check sock_def_readable
* ... {
* schedule rcu_read_lock();
* wq = rcu_dereference(sk->sk_wq);
* if (wq && waitqueue_active(&wq->wait))
* wake_up_interruptible(&wq->wait)
* ...
* }
*
* The race for tcp fires when the __add_wait_queue changes done by CPU1 stay
* in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1
* could then endup calling schedule and sleep forever if there are no more
* data on the socket.
static inline bool skwq_has_sleeper(struct socket_wq *wq)
return wq && wq_has_sleeper(&wq->wait);
}
/**
* sock_poll_wait - place memory barrier behind the poll_wait call.
* @filp: file
* @wait_address: socket wait queue
* @p: poll_table
*
* See the comments in the wq_has_sleeper function.
*/
static inline void sock_poll_wait(struct file *filp,
wait_queue_head_t *wait_address, poll_table *p)
{
if (!poll_does_not_wait(p) && wait_address) {
poll_wait(filp, wait_address, p);
/* We need to be sure we are in sync with the
* socket flags modification.
*
* This memory barrier is paired in the wq_has_sleeper.
smp_mb();
}
}
static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
{
if (sk->sk_txhash) {
skb->l4_hash = 1;
skb->hash = sk->sk_txhash;
}
}
void skb_set_owner_w(struct sk_buff *skb, struct sock *sk);
* Queue a received datagram if it will fit. Stream and sequenced
* protocols can't normally use this as they need to fit buffers in
* and play with them.
*
* Inlined as it's very short and called for pretty much every
* packet ever received.
*/
static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
skb->sk = sk;
skb->destructor = sock_rfree;
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
sk_mem_charge(sk, skb->truesize);
void sk_reset_timer(struct sock *sk, struct timer_list *timer,
unsigned long expires);
void sk_stop_timer(struct sock *sk, struct timer_list *timer);
int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
struct sk_buff *skb, unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb));
int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
/*
* Recover an error report and clear atomically
*/
int err;
if (likely(!sk->sk_err))
return 0;
err = xchg(&sk->sk_err, 0);
return -err;
}
static inline unsigned long sock_wspace(struct sock *sk)
{
int amt = 0;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc);
/* Note:
* We use sk->sk_wq_raw, from contexts knowing this
* pointer is not NULL and cannot disappear/change.
*/
static inline void sk_set_bit(int nr, struct sock *sk)
if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) &&
!sock_flag(sk, SOCK_FASYNC))
set_bit(nr, &sk->sk_wq_raw->flags);
}
static inline void sk_clear_bit(int nr, struct sock *sk)
{
if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) &&
!sock_flag(sk, SOCK_FASYNC))
clear_bit(nr, &sk->sk_wq_raw->flags);
static inline void sk_wake_async(const struct sock *sk, int how, int band)
if (sock_flag(sk, SOCK_FASYNC)) {
rcu_read_lock();
sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
rcu_read_unlock();
}
/* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
* need sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak.
* Note: for send buffers, TCP works better if we can build two skbs at
* minimum.
#define TCP_SKB_MIN_TRUESIZE (2048 + SKB_DATA_ALIGN(sizeof(struct sk_buff)))
#define SOCK_MIN_SNDBUF (TCP_SKB_MIN_TRUESIZE * 2)
#define SOCK_MIN_RCVBUF TCP_SKB_MIN_TRUESIZE
static inline void sk_stream_moderate_sndbuf(struct sock *sk)
{
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF);
struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
bool force_schedule);
/**
* sk_page_frag - return an appropriate page_frag
* @sk: socket
*
* If socket allocation mode allows current thread to sleep, it means its
* safe to use the per task page_frag instead of the per socket one.
*/
static inline struct page_frag *sk_page_frag(struct sock *sk)
Mel Gorman
committed
if (gfpflags_allow_blocking(sk->sk_allocation))
bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
int sg_start, int *sg_curr, unsigned int *sg_size,
int first_coalesce);
/*
* Default write policy as shown to user space via poll/select/SIGIO
*/
static inline bool sock_writeable(const struct sock *sk)
return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
{
return noblock ? 0 : sk->sk_rcvtimeo;
}
static inline long sock_sndtimeo(const struct sock *sk, bool noblock)
{
return noblock ? 0 : sk->sk_sndtimeo;
}
static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len)
{
return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1;
}
/* Alas, with timeout socket operations are not restartable.
* Compare this to poll().
*/
static inline int sock_intr_errno(long timeo)
{
return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
}
struct sock_skb_cb {
u32 dropcount;
};
/* Store sock_skb_cb at the end of skb->cb[] so protocol families
* using skb->cb[] would keep using it directly and utilize its
* alignement guarantee.
*/
#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
sizeof(struct sock_skb_cb)))
#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
SOCK_SKB_CB_OFFSET))
Eyal Birger
committed
#define sock_skb_cb_check_size(size) \
BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
Eyal Birger
committed
static inline void
sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
{
SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ?
atomic_read(&sk->sk_drops) : 0;
static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
{
int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
atomic_add(segs, &sk->sk_drops);
}
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
{
struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
/*
* generate control messages if
* - receive time stamping in software requested
* - software time stamp available and wanted
* - hardware time stamps available and wanted
*/
if (sock_flag(sk, SOCK_RCVTSTAMP) ||
(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
(kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
(hwtstamps->hwtstamp &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
__sock_recv_timestamp(msg, sk, skb);
else
if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
__sock_recv_wifi_status(msg, sk, skb);
void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
#define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC)
static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \
(1UL << SOCK_RCVTSTAMP))
#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
SOF_TIMESTAMPING_RAW_HARDWARE)
if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
__sock_recv_ts_and_drops(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
sk->sk_stamp = 0;
void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
/**
* sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
* @sk: socket sending this packet
* @tsflags: timestamping flags to use
* @tx_flags: completed with instructions for time stamping
*
* Note: callers should take care of initial ``*tx_flags`` value (usually 0)
static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags,
__u8 *tx_flags)
if (unlikely(tsflags))
__sock_tx_timestamp(tsflags, tx_flags);
if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
*tx_flags |= SKBTX_WIFI_STATUS;
}
/**
* sk_eat_skb - Release a skb if it is no longer needed
* @sk: socket to eat this skb from
* @skb: socket buffer to eat
*
* This routine must be called with interrupts disabled or with the socket
* locked so that the sk_buff queue operation is ok.
*/
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
__kfree_skb(skb);
}
static inline
struct net *sock_net(const struct sock *sk)
{
}
static inline
void sock_net_set(struct sock *sk, struct net *net)
static inline struct sock *skb_steal_sock(struct sk_buff *skb)
{
if (skb->sk) {
struct sock *sk = skb->sk;
skb->destructor = NULL;
skb->sk = NULL;
return sk;
}
return NULL;
}
/* This helper checks if a socket is a full socket,
* ie _not_ a timewait or request socket.
*/
static inline bool sk_fullsock(const struct sock *sk)
{
return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
}
/* Checks if this SKB belongs to an HW offloaded socket
* and whether any SW fallbacks are required based on dev.
*/
static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
struct net_device *dev)
{
#ifdef CONFIG_SOCK_VALIDATE_XMIT
struct sock *sk = skb->sk;
if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb)
skb = sk->sk_validate_xmit_skb(sk, dev, skb);
#endif
return skb;
}
/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV
* SYNACK messages can be attached to either ones (depending on SYNCOOKIE)
*/
static inline bool sk_listener(const struct sock *sk)
{
return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
}
void sock_enable_timestamp(struct sock *sk, int flag);
int sock_get_timestamp(struct sock *, struct timeval __user *);
int sock_get_timestampns(struct sock *, struct timespec __user *);
int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
int type);
bool sk_ns_capable(const struct sock *sk,
struct user_namespace *user_ns, int cap);
bool sk_capable(const struct sock *sk, int cap);
bool sk_net_capable(const struct sock *sk, int cap);
void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
/* Take into consideration the size of the struct sk_buff overhead in the
* determination of these values, since that is non-constant across
* platforms. This makes socket queueing behavior and performance
* not depend upon such differences.
*/
#define _SK_MEM_PACKETS 256
#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern int sysctl_tstamp_allow_data;
extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_wmem ? */
if (proto->sysctl_wmem_offset)
return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
return *proto->sysctl_wmem;
}
static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_rmem ? */
if (proto->sysctl_rmem_offset)
return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
return *proto->sysctl_rmem;
}
/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
* Some wifi drivers need to tweak it to get more chunks.
* They can use this helper from their ndo_start_xmit()
*/
static inline void sk_pacing_shift_update(struct sock *sk, int val)
{
if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
return;
sk->sk_pacing_shift = val;
}
/* if a socket is bound to a device, check that the given device
* index is either the same or that the socket is bound to an L3
* master device and the given device index is also enslaved to
* that L3 master
*/
static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
{
int mdif;
if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)
return true;
mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif);
if (mdif && mdif == sk->sk_bound_dev_if)
return true;
return false;
}