Newer
Older
break;
case SO_OOBINLINE:
sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
break;
case SO_NO_CHECK:
sk->sk_no_check_tx = valbool;
if ((val >= 0 && val <= 6) ||
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
sk->sk_priority = val;
else
ret = -EPERM;
break;
case SO_LINGER:
if (optlen < sizeof(ling)) {
ret = -EINVAL; /* 1003.1g */
if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
}
if (!ling.l_onoff)
sock_reset_flag(sk, SOCK_LINGER);
else {
if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
#endif
sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
sock_set_flag(sk, SOCK_LINGER);
}
break;
case SO_BSDCOMPAT:
break;
case SO_PASSCRED:
if (valbool)
set_bit(SOCK_PASSCRED, &sock->flags);
else
clear_bit(SOCK_PASSCRED, &sock->flags);
break;
case SO_TIMESTAMP_OLD:
case SO_TIMESTAMPNS_OLD:
sock_set_timestamp(sk, valbool, optname);
case SO_TIMESTAMPING_OLD:
ret = sock_set_timestamping(sk, optname, val);
case SO_RCVLOWAT:
if (val < 0)
val = INT_MAX;
if (sock->ops->set_rcvlowat)
ret = sock->ops->set_rcvlowat(sk, val);
else
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
case SO_RCVTIMEO_OLD:
ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
optlen, optname == SO_RCVTIMEO_OLD);
case SO_SNDTIMEO_OLD:
ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
optlen, optname == SO_SNDTIMEO_OLD);
case SO_ATTACH_FILTER: {
struct sock_fprog fprog;
ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
ret = sk_attach_filter(&fprog, sk);
break;
case SO_ATTACH_BPF:
ret = -EINVAL;
if (optlen == sizeof(u32)) {
u32 ufd;
ret = -EFAULT;
if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
break;
ret = sk_attach_bpf(ufd, sk);
}
break;
case SO_ATTACH_REUSEPORT_CBPF: {
struct sock_fprog fprog;
ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
ret = sk_reuseport_attach_filter(&fprog, sk);
break;
case SO_ATTACH_REUSEPORT_EBPF:
ret = -EINVAL;
if (optlen == sizeof(u32)) {
u32 ufd;
ret = -EFAULT;
if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
break;
ret = sk_reuseport_attach_bpf(ufd, sk);
}
break;
case SO_DETACH_REUSEPORT_BPF:
ret = reuseport_detach_prog(sk);
break;
ret = sk_detach_filter(sk);
case SO_LOCK_FILTER:
if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
ret = -EPERM;
else
sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
break;
case SO_PASSSEC:
if (valbool)
set_bit(SOCK_PASSSEC, &sock->flags);
else
clear_bit(SOCK_PASSSEC, &sock->flags);
break;
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
case SO_RXQ_OVFL:
sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
case SO_WIFI_STATUS:
sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
break;
case SO_PEEK_OFF:
if (sock->ops->set_peek_off)
ret = sock->ops->set_peek_off(sk, val);
else
ret = -EOPNOTSUPP;
break;
case SO_NOFCS:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
break;
case SO_SELECT_ERR_QUEUE:
sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
break;
#ifdef CONFIG_NET_RX_BUSY_POLL
/* allow unprivileged users to decrease the value */
if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
ret = -EPERM;
else {
if (val < 0)
ret = -EINVAL;
else
sk->sk_ll_usec = val;
}
break;
case SO_PREFER_BUSY_POLL:
if (valbool && !capable(CAP_NET_ADMIN))
ret = -EPERM;
else
WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
break;
case SO_BUSY_POLL_BUDGET:
if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) {
ret = -EPERM;
} else {
if (val < 0 || val > U16_MAX)
ret = -EINVAL;
else
WRITE_ONCE(sk->sk_busy_poll_budget, val);
}
break;
unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
if (sizeof(ulval) != sizeof(val) &&
optlen >= sizeof(ulval) &&
copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
ret = -EFAULT;
break;
}
if (ulval != ~0UL)
cmpxchg(&sk->sk_pacing_status,
SK_PACING_NONE,
SK_PACING_NEEDED);
sk->sk_max_pacing_rate = ulval;
sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
WRITE_ONCE(sk->sk_incoming_cpu, val);
case SO_CNX_ADVICE:
if (val == 1)
dst_negative_advice(sk);
break;
if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
if (!((sk->sk_type == SOCK_STREAM &&
sk->sk_protocol == IPPROTO_TCP) ||
(sk->sk_type == SOCK_DGRAM &&
sk->sk_protocol == IPPROTO_UDP)))
ret = -ENOTSUPP;
} else if (sk->sk_family != PF_RDS) {
}
if (!ret) {
if (val < 0 || val > 1)
ret = -EINVAL;
else
sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
}
case SO_TXTIME:
if (optlen != sizeof(struct sock_txtime)) {
ret = -EINVAL;
} else if (copy_from_sockptr(&sk_txtime, optval,
sizeof(struct sock_txtime))) {
ret = -EFAULT;
} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
ret = -EINVAL;
break;
}
/* CLOCK_MONOTONIC is only used by sch_fq, and this packet
* scheduler has enough safe guards.
*/
if (sk_txtime.clockid != CLOCK_MONOTONIC &&
!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
break;
sock_valbool_flag(sk, SOCK_TXTIME, true);
sk->sk_clockid = sk_txtime.clockid;
sk->sk_txtime_deadline_mode =
!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
sk->sk_txtime_report_errors =
!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
default:
ret = -ENOPROTOOPT;
break;
static void cred_to_ucred(struct pid *pid, const struct cred *cred,
struct ucred *ucred)
{
ucred->pid = pid_vnr(pid);
ucred->uid = ucred->gid = -1;
if (cred) {
struct user_namespace *current_ns = current_user_ns();
ucred->uid = from_kuid_munged(current_ns, cred->euid);
ucred->gid = from_kgid_munged(current_ns, cred->egid);
static int groups_to_user(gid_t __user *dst, const struct group_info *src)
{
struct user_namespace *user_ns = current_user_ns();
int i;
for (i = 0; i < src->ngroups; i++)
if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
return -EFAULT;
return 0;
}
int sock_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
struct sock *sk = sock->sk;
unsigned long ulval;
struct old_timeval32 tm32;
struct __kernel_old_timeval tm;
struct __kernel_sock_timeval stm;
struct sock_txtime txtime;
memset(&v, 0, sizeof(v));
case SO_DEBUG:
v.val = sock_flag(sk, SOCK_DBG);
break;
case SO_DONTROUTE:
v.val = sock_flag(sk, SOCK_LOCALROUTE);
break;
case SO_BROADCAST:
break;
case SO_SNDBUF:
v.val = sk->sk_sndbuf;
break;
case SO_RCVBUF:
v.val = sk->sk_rcvbuf;
break;
case SO_REUSEADDR:
v.val = sk->sk_reuse;
break;
case SO_REUSEPORT:
v.val = sk->sk_reuseport;
break;
break;
case SO_TYPE:
v.val = sk->sk_type;
break;
case SO_PROTOCOL:
v.val = sk->sk_protocol;
break;
case SO_DOMAIN:
v.val = sk->sk_family;
break;
case SO_ERROR:
v.val = -sock_error(sk);
v.val = xchg(&sk->sk_err_soft, 0);
break;
case SO_OOBINLINE:
v.val = sk->sk_no_check_tx;
break;
case SO_PRIORITY:
v.val = sk->sk_priority;
break;
case SO_LINGER:
lv = sizeof(v.ling);
v.ling.l_linger = sk->sk_lingertime / HZ;
break;
case SO_BSDCOMPAT:
break;
case SO_TIMESTAMP_OLD:
v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
!sock_flag(sk, SOCK_RCVTSTAMPNS);
break;
case SO_TIMESTAMPNS_OLD:
v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
break;
case SO_TIMESTAMP_NEW:
v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
break;
case SO_TIMESTAMPNS_NEW:
v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
case SO_TIMESTAMPING_OLD:
v.val = sk->sk_tsflags;
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
case SO_SNDTIMEO_OLD:
case SO_SNDTIMEO_NEW:
lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
case SO_RCVLOWAT:
v.val = sk->sk_rcvlowat;
break;
v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
{
struct ucred peercred;
if (len > sizeof(peercred))
len = sizeof(peercred);
cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
if (copy_to_user(optval, &peercred, len))
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
case SO_PEERGROUPS:
{
int ret, n;
if (!sk->sk_peer_cred)
return -ENODATA;
n = sk->sk_peer_cred->group_info->ngroups;
if (len < n * sizeof(gid_t)) {
len = n * sizeof(gid_t);
return put_user(len, optlen) ? -EFAULT : -ERANGE;
}
len = n * sizeof(gid_t);
ret = groups_to_user((gid_t __user *)optval,
sk->sk_peer_cred->group_info);
if (ret)
return ret;
goto lenout;
}
case SO_PEERNAME:
{
char address[128];
lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
if (lv < 0)
return -ENOTCONN;
if (lv < len)
return -EINVAL;
if (copy_to_user(optval, address, len))
return -EFAULT;
goto lenout;
}
/* Dubious BSD thing... Probably nobody even uses it, but
* the UNIX standard wants it for whatever reason... -DaveM
*/
case SO_ACCEPTCONN:
v.val = sk->sk_state == TCP_LISTEN;
break;
v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
case SO_PEERSEC:
return security_socket_getpeersec_stream(sock, optval, optlen, len);
case SO_MARK:
v.val = sk->sk_mark;
break;
case SO_RXQ_OVFL:
case SO_PEEK_OFF:
if (!sock->ops->set_peek_off)
return -EOPNOTSUPP;
v.val = sk->sk_peek_off;
break;
return sock_getbindtodevice(sk, optval, optlen, len);
case SO_GET_FILTER:
len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
if (len < 0)
return len;
goto lenout;
case SO_LOCK_FILTER:
v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
break;
case SO_BPF_EXTENSIONS:
v.val = bpf_tell_extensions();
break;
case SO_SELECT_ERR_QUEUE:
v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
break;
#ifdef CONFIG_NET_RX_BUSY_POLL
v.val = sk->sk_ll_usec;
break;
case SO_PREFER_BUSY_POLL:
v.val = READ_ONCE(sk->sk_prefer_busy_poll);
break;
if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
lv = sizeof(v.ulval);
v.ulval = sk->sk_max_pacing_rate;
} else {
/* 32bit version */
v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
}
v.val = READ_ONCE(sk->sk_incoming_cpu);
case SO_MEMINFO:
{
u32 meminfo[SK_MEMINFO_VARS];
sk_get_meminfo(sk, meminfo);
len = min_t(unsigned int, len, sizeof(meminfo));
if (copy_to_user(optval, &meminfo, len))
return -EFAULT;
goto lenout;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_INCOMING_NAPI_ID:
v.val = READ_ONCE(sk->sk_napi_id);
/* aggregate non-NAPI IDs down to 0 */
if (v.val < MIN_NAPI_ID)
v.val = 0;
break;
#endif
case SO_COOKIE:
lv = sizeof(u64);
if (len < lv)
return -EINVAL;
v.val64 = sock_gen_cookie(sk);
break;
case SO_ZEROCOPY:
v.val = sock_flag(sk, SOCK_ZEROCOPY);
break;
case SO_TXTIME:
lv = sizeof(v.txtime);
v.txtime.clockid = sk->sk_clockid;
v.txtime.flags |= sk->sk_txtime_deadline_mode ?
SOF_TXTIME_DEADLINE_MODE : 0;
v.txtime.flags |= sk->sk_txtime_report_errors ?
SOF_TXTIME_REPORT_ERRORS : 0;
case SO_BINDTOIFINDEX:
v.val = sk->sk_bound_dev_if;
break;
case SO_NETNS_COOKIE:
lv = sizeof(u64);
if (len != lv)
return -EINVAL;
v.val64 = sock_net(sk)->net_cookie;
break;
YOSHIFUJI Hideaki/吉藤英明
committed
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
*/
if (len > lv)
len = lv;
if (copy_to_user(optval, &v, len))
return -EFAULT;
lenout:
if (put_user(len, optlen))
return -EFAULT;
return 0;
/*
* Initialize an sk_lock.
*
* (We also register the sk_lock with the lock validator.)
*/
static inline void sock_lock_init(struct sock *sk)
if (sk->sk_kern_sock)
sock_lock_init_class_and_name(
sk,
af_family_kern_slock_key_strings[sk->sk_family],
af_family_kern_slock_keys + sk->sk_family,
af_family_kern_key_strings[sk->sk_family],
af_family_kern_keys + sk->sk_family);
else
sock_lock_init_class_and_name(
sk,
af_family_slock_key_strings[sk->sk_family],
af_family_slock_keys + sk->sk_family,
af_family_key_strings[sk->sk_family],
af_family_keys + sk->sk_family);
/*
* Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
* even temporarly, because of RCU lookups. sk_node should also be left as is.
* We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
static void sock_copy(struct sock *nsk, const struct sock *osk)
{
const struct proto *prot = READ_ONCE(osk->sk_prot);
#ifdef CONFIG_SECURITY_NETWORK
void *sptr = nsk->sk_security;
#endif
/* If we move sk_tx_queue_mapping out of the private section,
* we must check if sk_tx_queue_clear() is called after
* sock_copy() in sk_clone_lock().
*/
BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
offsetof(struct sock, sk_dontcopy_begin) ||
offsetof(struct sock, sk_tx_queue_mapping) >=
offsetof(struct sock, sk_dontcopy_end));
memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
#ifdef CONFIG_SECURITY_NETWORK
nsk->sk_security = sptr;
security_sk_clone(osk, nsk);
#endif
}
static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
int family)
{
struct sock *sk;
struct kmem_cache *slab;
slab = prot->slab;
if (slab != NULL) {
sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
if (!sk)
return sk;
Alexander Potapenko
committed
if (want_init_on_alloc(priority))
sk = kmalloc(prot->obj_size, priority);
if (sk != NULL) {
if (security_sk_alloc(sk, family, priority))
goto out_free;
if (!try_module_get(prot->owner))
goto out_free_sec;
}
out_free_sec:
security_sk_free(sk);
out_free:
if (slab != NULL)
kmem_cache_free(slab, sk);
else
kfree(sk);
return NULL;
}
static void sk_prot_free(struct proto *prot, struct sock *sk)
{
struct kmem_cache *slab;
struct module *owner;
owner = prot->owner;
slab = prot->slab;
mem_cgroup_sk_free(sk);
security_sk_free(sk);
if (slab != NULL)
kmem_cache_free(slab, sk);
else
kfree(sk);
module_put(owner);
/**
* sk_alloc - All socket objects are allocated here
* @net: the applicable net namespace
* @family: protocol family
* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
* @prot: struct proto associated with this new sock instance
* @kern: is this to be a kernel socket?
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot, int kern)
struct sock *sk;
sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
sk->sk_family = family;
/*
* See comment in struct sock definition to understand
* why we need sk_prot_creator -acme
*/
sk->sk_prot = sk->sk_prot_creator = prot;
sk->sk_kern_sock = kern;
sock_lock_init(sk);
Eric W. Biederman
committed
sk->sk_net_refcnt = kern ? 0 : 1;
if (likely(sk->sk_net_refcnt)) {
Eric W. Biederman
committed
get_net(net);
sock_inuse_add(net, 1);
}
Eric W. Biederman
committed
sock_net_set(sk, net);
refcount_set(&sk->sk_wmem_alloc, 1);
mem_cgroup_sk_alloc(sk);
cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
sk_tx_queue_clear(sk);
/* Sockets having SOCK_RCU_FREE will call this function after one RCU
* grace period. This is the case for UDP sockets and TCP listeners.
*/
static void __sk_destruct(struct rcu_head *head)
struct sock *sk = container_of(head, struct sock, sk_rcu);
struct sk_filter *filter;
if (sk->sk_destruct)
sk->sk_destruct(sk);
filter = rcu_dereference_check(sk->sk_filter,
refcount_read(&sk->sk_wmem_alloc) == 0);
sk_filter_uncharge(sk, filter);
RCU_INIT_POINTER(sk->sk_filter, NULL);
sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
#ifdef CONFIG_BPF_SYSCALL
bpf_sk_storage_free(sk);
#endif
pr_debug("%s: optmem leakage (%d bytes) detected\n",
__func__, atomic_read(&sk->sk_omem_alloc));
if (sk->sk_frag.page) {
put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
if (sk->sk_peer_cred)
put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
Eric W. Biederman
committed
if (likely(sk->sk_net_refcnt))
put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
void sk_destruct(struct sock *sk)
{
bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
if (rcu_access_pointer(sk->sk_reuseport_cb)) {
reuseport_detach_sock(sk);
use_call_rcu = true;
}
if (use_call_rcu)
call_rcu(&sk->sk_rcu, __sk_destruct);
else
__sk_destruct(&sk->sk_rcu);
}
static void __sk_free(struct sock *sk)
{
if (likely(sk->sk_net_refcnt))
sock_inuse_add(sock_net(sk), -1);
if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
sock_diag_broadcast_destroy(sk);
else
sk_destruct(sk);
}
void sk_free(struct sock *sk)
{
/*
* We subtract one from sk_wmem_alloc and can know if
* some packets are still in some tx queue.
* If not null, sock_wfree() will call __sk_free(sk) later
*/
if (refcount_dec_and_test(&sk->sk_wmem_alloc))
__sk_free(sk);
}
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
static void sk_init_common(struct sock *sk)
{
skb_queue_head_init(&sk->sk_receive_queue);
skb_queue_head_init(&sk->sk_write_queue);
skb_queue_head_init(&sk->sk_error_queue);
rwlock_init(&sk->sk_callback_lock);
lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
af_rlock_keys + sk->sk_family,
af_family_rlock_key_strings[sk->sk_family]);
lockdep_set_class_and_name(&sk->sk_write_queue.lock,
af_wlock_keys + sk->sk_family,
af_family_wlock_key_strings[sk->sk_family]);
lockdep_set_class_and_name(&sk->sk_error_queue.lock,
af_elock_keys + sk->sk_family,
af_family_elock_key_strings[sk->sk_family]);
lockdep_set_class_and_name(&sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
af_family_clock_key_strings[sk->sk_family]);
}
/**
* sk_clone_lock - clone a socket, and lock its clone
* @sk: the socket to clone
* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
*
* Caller must unlock socket even in error path (bh_unlock_sock(newsk))
*/
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
struct proto *prot = READ_ONCE(sk->sk_prot);
struct sk_filter *filter;
newsk = sk_prot_alloc(prot, priority, sk->sk_family);
if (!newsk)
goto out;
newsk->sk_prot_creator = prot;
/* SANITY */
if (likely(newsk->sk_net_refcnt))
get_net(sock_net(newsk));
sk_node_init(&newsk->sk_node);
sock_lock_init(newsk);
bh_lock_sock(newsk);
newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
newsk->sk_backlog.len = 0;
atomic_set(&newsk->sk_rmem_alloc, 0);
/* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
refcount_set(&newsk->sk_wmem_alloc, 1);
atomic_set(&newsk->sk_omem_alloc, 0);
sk_init_common(newsk);
newsk->sk_dst_cache = NULL;
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
/* sk->sk_memcg will be populated at accept() time */
newsk->sk_memcg = NULL;
cgroup_sk_clone(&newsk->sk_cgrp_data);
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
if (filter != NULL)
/* though it's an empty new sock, the charging may fail
* if sysctl_optmem_max was changed between creation of
* original socket and cloning
*/
is_charged = sk_filter_charge(newsk, filter);
RCU_INIT_POINTER(newsk->sk_filter, filter);
rcu_read_unlock();
if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
/* We need to make sure that we don't uncharge the new
* socket if we couldn't charge it in the first place
* as otherwise we uncharge the parent's filter.
if (!is_charged)
RCU_INIT_POINTER(newsk->sk_filter, NULL);
sk_free_unlock_clone(newsk);
newsk = NULL;
goto out;
}
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
if (bpf_sk_storage_clone(sk, newsk)) {
sk_free_unlock_clone(newsk);
newsk = NULL;
goto out;
}
/* Clear sk_user_data if parent had the pointer tagged
* as not suitable for copying when cloning.