linux-imx/include/linux/mroute.h
Nikolay Aleksandrov ec097a17bf ipmr, ip6mr: fix scheduling while atomic and a deadlock with ipmr_get_route
commit 2cf750704b upstream.

Since the commit below the ipmr/ip6mr rtnl_unicast() code uses the portid
instead of the previous dst_pid which was copied from in_skb's portid.
Since the skb is new the portid is 0 at that point so the packets are sent
to the kernel and we get scheduling while atomic or a deadlock (depending
on where it happens) by trying to acquire rtnl two times.
Also since this is RTM_GETROUTE, it can be triggered by a normal user.

Here's the sleeping while atomic trace:
[ 7858.212557] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
[ 7858.212748] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0
[ 7858.212881] 2 locks held by swapper/0/0:
[ 7858.213013]  #0:  (((&mrt->ipmr_expire_timer))){+.-...}, at: [<ffffffff810fbbf5>] call_timer_fn+0x5/0x350
[ 7858.213422]  #1:  (mfc_unres_lock){+.....}, at: [<ffffffff8161e005>] ipmr_expire_process+0x25/0x130
[ 7858.213807] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.8.0-rc7+ #179
[ 7858.213934] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
[ 7858.214108]  0000000000000000 ffff88005b403c50 ffffffff813a7804 0000000000000000
[ 7858.214412]  ffffffff81a1338e ffff88005b403c78 ffffffff810a4a72 ffffffff81a1338e
[ 7858.214716]  000000000000026c 0000000000000000 ffff88005b403ca8 ffffffff810a4b9f
[ 7858.215251] Call Trace:
[ 7858.215412]  <IRQ>  [<ffffffff813a7804>] dump_stack+0x85/0xc1
[ 7858.215662]  [<ffffffff810a4a72>] ___might_sleep+0x192/0x250
[ 7858.215868]  [<ffffffff810a4b9f>] __might_sleep+0x6f/0x100
[ 7858.216072]  [<ffffffff8165bea3>] mutex_lock_nested+0x33/0x4d0
[ 7858.216279]  [<ffffffff815a7a5f>] ? netlink_lookup+0x25f/0x460
[ 7858.216487]  [<ffffffff8157474b>] rtnetlink_rcv+0x1b/0x40
[ 7858.216687]  [<ffffffff815a9a0c>] netlink_unicast+0x19c/0x260
[ 7858.216900]  [<ffffffff81573c70>] rtnl_unicast+0x20/0x30
[ 7858.217128]  [<ffffffff8161cd39>] ipmr_destroy_unres+0xa9/0xf0
[ 7858.217351]  [<ffffffff8161e06f>] ipmr_expire_process+0x8f/0x130
[ 7858.217581]  [<ffffffff8161dfe0>] ? ipmr_net_init+0x180/0x180
[ 7858.217785]  [<ffffffff8161dfe0>] ? ipmr_net_init+0x180/0x180
[ 7858.217990]  [<ffffffff810fbc95>] call_timer_fn+0xa5/0x350
[ 7858.218192]  [<ffffffff810fbbf5>] ? call_timer_fn+0x5/0x350
[ 7858.218415]  [<ffffffff8161dfe0>] ? ipmr_net_init+0x180/0x180
[ 7858.218656]  [<ffffffff810fde10>] run_timer_softirq+0x260/0x640
[ 7858.218865]  [<ffffffff8166379b>] ? __do_softirq+0xbb/0x54f
[ 7858.219068]  [<ffffffff816637c8>] __do_softirq+0xe8/0x54f
[ 7858.219269]  [<ffffffff8107a948>] irq_exit+0xb8/0xc0
[ 7858.219463]  [<ffffffff81663452>] smp_apic_timer_interrupt+0x42/0x50
[ 7858.219678]  [<ffffffff816625bc>] apic_timer_interrupt+0x8c/0xa0
[ 7858.219897]  <EOI>  [<ffffffff81055f16>] ? native_safe_halt+0x6/0x10
[ 7858.220165]  [<ffffffff810d64dd>] ? trace_hardirqs_on+0xd/0x10
[ 7858.220373]  [<ffffffff810298e3>] default_idle+0x23/0x190
[ 7858.220574]  [<ffffffff8102a20f>] arch_cpu_idle+0xf/0x20
[ 7858.220790]  [<ffffffff810c9f8c>] default_idle_call+0x4c/0x60
[ 7858.221016]  [<ffffffff810ca33b>] cpu_startup_entry+0x39b/0x4d0
[ 7858.221257]  [<ffffffff8164f995>] rest_init+0x135/0x140
[ 7858.221469]  [<ffffffff81f83014>] start_kernel+0x50e/0x51b
[ 7858.221670]  [<ffffffff81f82120>] ? early_idt_handler_array+0x120/0x120
[ 7858.221894]  [<ffffffff81f8243f>] x86_64_start_reservations+0x2a/0x2c
[ 7858.222113]  [<ffffffff81f8257c>] x86_64_start_kernel+0x13b/0x14a

Fixes: 2942e90050 ("[RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[bwh: Backported to 3.2:
 - Use 'pid' instead of 'portid' where necessary
 - Adjust context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
2016-11-20 01:01:42 +00:00

252 lines
6.5 KiB
C

#ifndef __LINUX_MROUTE_H
#define __LINUX_MROUTE_H
#include <linux/sockios.h>
#include <linux/types.h>
#ifdef __KERNEL__
#include <linux/in.h>
#endif
/*
* Based on the MROUTING 3.5 defines primarily to keep
* source compatibility with BSD.
*
* See the mrouted code for the original history.
*
* Protocol Independent Multicast (PIM) data structures included
* Carlos Picoto (cap@di.fc.ul.pt)
*
*/
#define MRT_BASE 200
#define MRT_INIT (MRT_BASE) /* Activate the kernel mroute code */
#define MRT_DONE (MRT_BASE+1) /* Shutdown the kernel mroute */
#define MRT_ADD_VIF (MRT_BASE+2) /* Add a virtual interface */
#define MRT_DEL_VIF (MRT_BASE+3) /* Delete a virtual interface */
#define MRT_ADD_MFC (MRT_BASE+4) /* Add a multicast forwarding entry */
#define MRT_DEL_MFC (MRT_BASE+5) /* Delete a multicast forwarding entry */
#define MRT_VERSION (MRT_BASE+6) /* Get the kernel multicast version */
#define MRT_ASSERT (MRT_BASE+7) /* Activate PIM assert mode */
#define MRT_PIM (MRT_BASE+8) /* enable PIM code */
#define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */
#define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */
#define SIOCGETSGCNT (SIOCPROTOPRIVATE+1)
#define SIOCGETRPF (SIOCPROTOPRIVATE+2)
#define MAXVIFS 32
typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */
typedef unsigned short vifi_t;
#define ALL_VIFS ((vifi_t)(-1))
/*
* Same idea as select
*/
#define VIFM_SET(n,m) ((m)|=(1<<(n)))
#define VIFM_CLR(n,m) ((m)&=~(1<<(n)))
#define VIFM_ISSET(n,m) ((m)&(1<<(n)))
#define VIFM_CLRALL(m) ((m)=0)
#define VIFM_COPY(mfrom,mto) ((mto)=(mfrom))
#define VIFM_SAME(m1,m2) ((m1)==(m2))
/*
* Passed by mrouted for an MRT_ADD_VIF - again we use the
* mrouted 3.6 structures for compatibility
*/
struct vifctl {
vifi_t vifc_vifi; /* Index of VIF */
unsigned char vifc_flags; /* VIFF_ flags */
unsigned char vifc_threshold; /* ttl limit */
unsigned int vifc_rate_limit; /* Rate limiter values (NI) */
union {
struct in_addr vifc_lcl_addr; /* Local interface address */
int vifc_lcl_ifindex; /* Local interface index */
};
struct in_addr vifc_rmt_addr; /* IPIP tunnel addr */
};
#define VIFF_TUNNEL 0x1 /* IPIP tunnel */
#define VIFF_SRCRT 0x2 /* NI */
#define VIFF_REGISTER 0x4 /* register vif */
#define VIFF_USE_IFINDEX 0x8 /* use vifc_lcl_ifindex instead of
vifc_lcl_addr to find an interface */
/*
* Cache manipulation structures for mrouted and PIMd
*/
struct mfcctl {
struct in_addr mfcc_origin; /* Origin of mcast */
struct in_addr mfcc_mcastgrp; /* Group in question */
vifi_t mfcc_parent; /* Where it arrived */
unsigned char mfcc_ttls[MAXVIFS]; /* Where it is going */
unsigned int mfcc_pkt_cnt; /* pkt count for src-grp */
unsigned int mfcc_byte_cnt;
unsigned int mfcc_wrong_if;
int mfcc_expire;
};
/*
* Group count retrieval for mrouted
*/
struct sioc_sg_req {
struct in_addr src;
struct in_addr grp;
unsigned long pktcnt;
unsigned long bytecnt;
unsigned long wrong_if;
};
/*
* To get vif packet counts
*/
struct sioc_vif_req {
vifi_t vifi; /* Which iface */
unsigned long icount; /* In packets */
unsigned long ocount; /* Out packets */
unsigned long ibytes; /* In bytes */
unsigned long obytes; /* Out bytes */
};
/*
* This is the format the mroute daemon expects to see IGMP control
* data. Magically happens to be like an IP packet as per the original
*/
struct igmpmsg {
__u32 unused1,unused2;
unsigned char im_msgtype; /* What is this */
unsigned char im_mbz; /* Must be zero */
unsigned char im_vif; /* Interface (this ought to be a vifi_t!) */
unsigned char unused3;
struct in_addr im_src,im_dst;
};
/*
* That's all usermode folks
*/
#ifdef __KERNEL__
#include <linux/pim.h>
#include <net/sock.h>
#ifdef CONFIG_IP_MROUTE
static inline int ip_mroute_opt(int opt)
{
return (opt >= MRT_BASE) && (opt <= MRT_BASE + 10);
}
#else
static inline int ip_mroute_opt(int opt)
{
return 0;
}
#endif
#ifdef CONFIG_IP_MROUTE
extern int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int);
extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
extern int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
extern int ip_mr_init(void);
#else
static inline
int ip_mroute_setsockopt(struct sock *sock,
int optname, char __user *optval, unsigned int optlen)
{
return -ENOPROTOOPT;
}
static inline
int ip_mroute_getsockopt(struct sock *sock,
int optname, char __user *optval, int __user *optlen)
{
return -ENOPROTOOPT;
}
static inline
int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
{
return -ENOIOCTLCMD;
}
static inline int ip_mr_init(void)
{
return 0;
}
#endif
struct vif_device {
struct net_device *dev; /* Device we are using */
unsigned long bytes_in,bytes_out;
unsigned long pkt_in,pkt_out; /* Statistics */
unsigned long rate_limit; /* Traffic shaping (NI) */
unsigned char threshold; /* TTL threshold */
unsigned short flags; /* Control flags */
__be32 local,remote; /* Addresses(remote for tunnels)*/
int link; /* Physical interface index */
};
#define VIFF_STATIC 0x8000
struct mfc_cache {
struct list_head list;
__be32 mfc_mcastgrp; /* Group the entry belongs to */
__be32 mfc_origin; /* Source of packet */
vifi_t mfc_parent; /* Source interface */
int mfc_flags; /* Flags on line */
union {
struct {
unsigned long expires;
struct sk_buff_head unresolved; /* Unresolved buffers */
} unres;
struct {
unsigned long last_assert;
int minvif;
int maxvif;
unsigned long bytes;
unsigned long pkt;
unsigned long wrong_if;
unsigned char ttls[MAXVIFS]; /* TTL thresholds */
} res;
} mfc_un;
struct rcu_head rcu;
};
#define MFC_STATIC 1
#define MFC_NOTIFY 2
#define MFC_LINES 64
#ifdef __BIG_ENDIAN
#define MFC_HASH(a,b) (((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1))
#else
#define MFC_HASH(a,b) ((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1))
#endif
#endif
#define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */
/*
* Pseudo messages used by mrouted
*/
#define IGMPMSG_NOCACHE 1 /* Kern cache fill request to mrouted */
#define IGMPMSG_WRONGVIF 2 /* For PIM assert processing (unused) */
#define IGMPMSG_WHOLEPKT 3 /* For PIM Register processing */
#ifdef __KERNEL__
struct rtmsg;
extern int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct rtmsg *rtm, int nowait, u32 portid);
#endif
#endif