当前位置:网站首页>Analysis of tcpdump packet capturing kernel code
Analysis of tcpdump packet capturing kernel code
2022-06-24 21:24:00 【already_ skb】
register pf_packet agreement
.create The function is in PF_PACKET type socket Call... On creation , The hook function is registered when calling. See packet_create Implementation of function .
static const struct net_proto_familypacket_family_ops = {
.family= PF_PACKET,
.create= packet_create,
.owner = THIS_MODULE,
};
static int __init packet_init(void)
{
…………..
sock_register(&packet_family_ops);
…………..
}
establish SOCK_PACKET sock Register the callback function
/*
* Create a packet of type SOCK_PACKET.
*/
static int packet_create(struct net *net,struct socket *sock, int protocol,
int kern)
{
structsock *sk;
structpacket_sock *po;
__be16proto = (__force __be16)protocol; /* weird, but documented */
interr;
if(!ns_capable(net->user_ns, CAP_NET_RAW))
return-EPERM;
if(sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
sock->type != SOCK_PACKET)
return-ESOCKTNOSUPPORT;
sock->state= SS_UNCONNECTED;
err= -ENOBUFS;
sk= sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
if(sk == NULL)
gotoout;
sock->ops= &packet_ops;
if(sock->type == SOCK_PACKET)
sock->ops= &packet_ops_spkt;
sock_init_data(sock,sk);
po= pkt_sk(sk);
sk->sk_family= PF_PACKET;
po->num= proto;
err= packet_alloc_pending(po);
if(err)
gotoout2;
packet_cached_dev_reset(po);
sk->sk_destruct= packet_sock_destruct;
sk_refcnt_debug_inc(sk);
/*
* Attacha protocol block
*/
spin_lock_init(&po->bind_lock);
mutex_init(&po->pg_vec_lock);
po->prot_hook.func= packet_rcv;
// Register the handler
if (sock->type == SOCK_PACKET)
po->prot_hook.func =packet_rcv_spkt;
po->prot_hook.af_packet_priv= sk;
if (proto) {
po->prot_hook.type =proto;
Put this socket Mount to ptype_all On the list
register_prot_hook(sk);
}
mutex_lock(&net->packet.sklist_lock);
sk_add_node_rcu(sk,&net->packet.sklist);
mutex_unlock(&net->packet.sklist_lock);
preempt_disable();
sock_prot_inuse_add(net,&packet_proto, 1);
preempt_enable();
return0;
out2:
sk_free(sk);
out:
returnerr;
}
Receive direction kernel packet capturing function
Two invocation scenarios , One is that the network card is enabled NAPI, Call... In the polling process process_backlog; The other is right and wrong NAPI scene , direct netif_receive_skb Receive data message , Submit to the network layer .
static int __netif_receive_skb_core(structsk_buff *skb, bool pfmemalloc)
{
structpacket_type *ptype, *pt_prev;
rx_handler_func_t*rx_handler;
structnet_device *orig_dev;
structnet_device *null_or_dev;
booldeliver_exact = false;
intret = NET_RX_DROP;
__be16type;
net_timestamp_check(!netdev_tstamp_prequeue,skb);
trace_netif_receive_skb(skb);
orig_dev= skb->dev;
skb_reset_network_header(skb);
if(!skb_transport_header_was_set(skb))
skb_reset_transport_header(skb);
skb_reset_mac_len(skb);
pt_prev= NULL;
another_round:
skb->skb_iif= skb->dev->ifindex;
__this_cpu_inc(softnet_data.processed);
if(skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
skb->protocol ==cpu_to_be16(ETH_P_8021AD)) {
skb= vlan_untag(skb);
if(unlikely(!skb))
gotoout;
}
#ifdef CONFIG_NET_CLS_ACT
if(skb->tc_verd & TC_NCLS) {
skb->tc_verd= CLR_TC_NCLS(skb->tc_verd);
gotoncls;
}
#endif
if(pfmemalloc)
gotoskip_taps;
// Traverse tcpdumpsocket Hook attached when creating
list_for_each_entry_rcu(ptype,&ptype_all, list) {
if (!ptype->dev ||ptype->dev == skb->dev) {
if (pt_prev)
// Copy data message
ret =deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
skip_taps:
#ifdef CONFIG_NET_CLS_ACT
skb= handle_ing(skb, &pt_prev, &ret, orig_dev);
if(!skb)
gotoout;
ncls:
#endif
if(pfmemalloc && !skb_pfmemalloc_protocol(skb))
gotodrop;
if(skb_vlan_tag_present(skb)) {
if(pt_prev) {
ret= deliver_skb(skb, pt_prev, orig_dev);
pt_prev= NULL;
}
if(vlan_do_receive(&skb))
gotoanother_round;
elseif (unlikely(!skb))
gotoout;
}
rx_handler= rcu_dereference(skb->dev->rx_handler);
if(rx_handler) {
if(pt_prev) {
ret= deliver_skb(skb, pt_prev, orig_dev);
pt_prev= NULL;
}
switch(rx_handler(&skb)) {
caseRX_HANDLER_CONSUMED:
ret= NET_RX_SUCCESS;
gotoout;
caseRX_HANDLER_ANOTHER:
gotoanother_round;
caseRX_HANDLER_EXACT:
deliver_exact= true;
caseRX_HANDLER_PASS:
break;
default:
BUG();
}
}
if(unlikely(skb_vlan_tag_present(skb))) {
if(skb_vlan_tag_get_id(skb))
skb->pkt_type= PACKET_OTHERHOST;
/*Note: we might in the future use prio bits
* and set skb->priority like invlan_do_receive()
* For the time being, just ignore PriorityCode Point
*/
skb->vlan_tci= 0;
}
/*deliver only exact match when indicated */
null_or_dev= deliver_exact ? skb->dev : NULL;
type= skb->protocol;
// Real data message processing flow , If it is ip So called ip_rcv Function
list_for_each_entry_rcu(ptype,
&ptype_base[ntohs(type)& PTYPE_HASH_MASK], list) {
if (ptype->type == type&&
(ptype->dev == null_or_dev ||ptype->dev == skb->dev ||
ptype->dev == orig_dev)) {
if (pt_prev)
ret =deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
if(pt_prev) {
if(unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
gotodrop;
else
ret= pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}else {
drop:
atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
/*Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret= NET_RX_DROP;
}
out:
returnret;
}
Send direction kernel packet capturing function
There are also two branches for data transmission , One is to call dev_queue_xmit Directly submit the data to the network card ( No configuration qdisc); The other branch is if qdisc,dev_queue_xmit Check whether the process is configured queue, If configured, will call __dev_xmit_skb Function puts data into qdisc In line , Then wait for the interrupt function to be sent net_tx_action Polling calls , This triggers the copy call process .
/*
* Support routine. Sends outgoing frames toany network
* taps currently in use.
*/
static void dev_queue_xmit_nit(structsk_buff *skb, struct net_device *dev)
{
structpacket_type *ptype;
structsk_buff *skb2 = NULL;
structpacket_type *pt_prev = NULL;
rcu_read_lock();
// Traverse tcpdumpsocket Hook attached when creating
list_for_each_entry_rcu(ptype,&ptype_all, list) {
/* Never send packets back tothe socket
* they originated from - MvS([email protected])
*/
if ((ptype->dev == dev ||!ptype->dev) &&
(!skb_loop_sk(ptype, skb))) {
if (pt_prev) {
// Copy data message
deliver_skb(skb2,pt_prev, skb->dev);
pt_prev =ptype;
continue;
}
skb2 =skb_clone(skb, GFP_ATOMIC);
if (!skb2)
break;
net_timestamp_set(skb2);
/* skb->nh shouldbe correctly
set by sender, so that the second statementis
just protection against buggy protocols.
*/
skb_reset_mac_header(skb2);
if(skb_network_header(skb2) < skb2->data ||
skb_network_header(skb2) >skb_tail_pointer(skb2)) {
net_crit_ratelimited("protocol%04x is buggy, dev %s\n",
ntohs(skb2->protocol),
dev->name);
skb_reset_network_header(skb2);
}
skb2->transport_header= skb2->network_header;
skb2->pkt_type =PACKET_OUTGOING;
pt_prev = ptype;
}
}
if(pt_prev)
pt_prev->func(skb2,skb->dev, pt_prev, skb->dev);
rcu_read_unlock();
}
The destruction SOCK_PACKET sock Register callbacks when
When sock_packet type socket When it is closed, it will call release function , At this time, the previous registration function will be removed
static int packet_release(struct socket*sock)
{
structsock *sk = sock->sk;
structpacket_sock *po;
structnet *net;
uniontpacket_req_u req_u;
if(!sk)
return0;
net= sock_net(sk);
po= pkt_sk(sk);
mutex_lock(&net->packet.sklist_lock);
sk_del_node_init_rcu(sk);
mutex_unlock(&net->packet.sklist_lock);
preempt_disable();
sock_prot_inuse_add(net,sk->sk_prot, -1);
preempt_enable();
spin_lock(&po->bind_lock);
// from ptype_all Remove the registered hook function from the function
unregister_prot_hook(sk, false);
packet_cached_dev_reset(po);
if(po->prot_hook.dev) {
dev_put(po->prot_hook.dev);
po->prot_hook.dev= NULL;
}
spin_unlock(&po->bind_lock);
packet_flush_mclist(sk);
if(po->rx_ring.pg_vec) {
memset(&req_u,0, sizeof(req_u));
packet_set_ring(sk,&req_u, 1, 0);
}
if(po->tx_ring.pg_vec) {
memset(&req_u,0, sizeof(req_u));
packet_set_ring(sk,&req_u, 1, 1);
}
fanout_release(sk);
synchronize_net();
/*
* Nowthe socket is dead. No more input will appear.
*/
sock_orphan(sk);
sock->sk= NULL;
/*Purge queues */
skb_queue_purge(&sk->sk_receive_queue);
packet_free_pending(po);
sk_refcnt_debug_release(sk);
sock_put(sk);
return0;
}
summary
Tcpdump Create when capturing packets SOCK_PACKET Type of socket, And in socket When creating a process, a call was made to packet_family_opspacket_create function (packet_create), Then register the hook function of capturing packets to ptype_all Linked list , When in the data receiving direction __netif_receive_skb_core Call the registered hook function in the function to copy the data message to af_packet.c In the specific processing flow function of the file ; Also sending functions dev_queue_xmit_nit Call hook function to copy data message .
Jensonqiu[email protected] 2018/05/08
边栏推荐
- [cloud native learning notes] kubernetes Foundation
- Kernel Debugging Tricks
- Football information query system based on C language course report + project source code + demo ppt+ project screenshot
- Station B takes goods to learn from New Oriental
- Page replacement of virtual memory paging mechanism
- memcached全面剖析–5. memcached的应用和兼容程序
- Common data model (updating)
- Network security review office starts network security review on HowNet
- Microsoft Certification (dynamic 365) test
- Concepts of kubernetes components
猜你喜欢

Several common command operations in win system

Postman assertion

Learn to use a new technology quickly

The difference between RPC and restful

Pod lifecycle in kubernetes

Record a deletion bash_ Profile file

Create a multithreaded thread class

ping: www.baidu.com: 未知的名称或服务

Static routing job supplement

CondaValueError: The target prefix is the base prefix. Aborting.
随机推荐
Web automation: web control interaction / multi window processing / Web page frame
Summary of message protocol problems
Background of master data construction
浅谈MySql update会锁定哪些范围的数据
Open function
JMeter parameterization
Smooth live broadcast | analysis of key technologies for live broadcast pain points
Rewrite, maplocal and maplocal operations of Charles
Second understanding permutation and combination
JMeter implementation specifies concurrent loop testing
Adding subscribers to a list using mailchimp's API V3
[cloud native learning notes] learn about kubernetes configuration list yaml file
JMeter installation plug-in, adding [email protected] -Perfmon metric collector listener steps
Rename and delete files
Geek University cloud native training camp
Rip/ospf protocol notes sorting
Return of missing persons
Why do we always "give up halfway"?
Record a deletion bash_ Profile file
Php-pdo parameter binding problem