Linux内核:从上向下分析网络层IP协议

Sally ·
更新时间:2024-11-14
· 598 次阅读

  上篇博文分析传输层终会调用函数ip_queue_xmit()函数,将发送数据的任务交给网络层,下面分析了下该函数:

  该函数的主要函数调用关系图如下:

/*  * Queues a packet to be sent, and starts the transmitter  * if necessary.  if free = 1 then we free the block after  * transmit, otherwise we don't. If free==2 we not only  * free the block but also don't assign a new ip seq number.  * This routine also needs to put in the total length,  * and compute the checksum  */

void ip_queue_xmit(struct sock *sk, //发送数据的队列所对应的sock结构        struct device *dev,//发送该数据包的网卡设备             struct sk_buff *skb,//封装好的sk_buff结构,要发送的数据在该结构中             int free)//主要配合TCP协议使用,用于数据包的重发,UDP等协议调用是free=1 {  struct iphdr *iph;//IP数据报首部指针  unsigned char *ptr;

 /* Sanity check */  if (dev == NULL)  {   printk("IP: ip_queue_xmit dev = NULL ");   return;  }

 IS_SKB(skb);

 /*   * Do some book-keeping in the packet for later   */

 skb->dev = dev;//进一步完整sk_buff的相应字段  skb->when = jiffies;//用于TCP协议的超时重传

 /*   * Find the IP header and set the length. This is bad   * but once we get the skb data handling code in the   * hardware will push its header sensibly and we will   * set skb->ip_hdr to avoid this mess and the fixed   * header length problem   */

 ptr = skb->data;//指针指向sk_buff中的数据部分  ptr += dev->hard_header_len;//hard_header_len为硬件首部长度,在net_init.c的函数eth_setup()函数中设置的,dev->hard_header_len = ETH_HLEN; 以太网首部长度为14  iph = (struct iphdr *)ptr;//prt已经指向IP数据包的首部  skb->ip_hdr = iph;  iph->tot_len = ntohs(skb->len-dev->hard_header_len);//计算IP数据报的总长度

#ifdef CONFIG_IP_FIREWALL  if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy, 0) != 1)   /* just don't send this packet */   return; #endif

 /*   * No reassigning numbers to fragments...   */

 if(free!=2)   iph->id      = htons(ip_id_count++);  else   free=1;

 /* All buffers without an owner socket get freed */  if (sk == NULL)   free = 1;

 skb->free = free;//设置skb的free值,free=1,发送后立即释放;free=2,不但释放缓存,而且不分配新的序列号

 /*   * Do we need to fragment. Again this is inefficient.   * We need to somehow lock the original buffer and use   * bits of it.   */  //数据帧中的数据部分必须小于等于MTU  if(skb->len > dev->mtu + dev->hard_header_len)//发送的数据长度大于数据帧的数据部分和帧首部之和,则需要分片  {   ip_fragment(sk,skb,dev,0);//对数据报分片后继续调用ip _queue_xmit()函数发送数据   IS_SKB(skb);   kfree_skb(skb,FREE_WRITE);   return;  }

 /*   * Add an IP checksum   */

 ip_send_check(iph);//IP数据报首部检查

 /*   * Print the frame when debugging   */

 /*   * More debugging. You cannot queue a packet already on a list   * Spot this and moan loudly.   */  if (skb->next != NULL)//说明该数据包仍然存在于某个缓存队列  {   printk("ip_queue_xmit: next != NULL ");   skb_unlink(skb);//将其从缓存链表中删除,否则可能导致内核错误  }

 /*   * If a sender wishes the packet to remain unfreed   * we add it to his send queue. This arguably belongs   * in the TCP level since nobody else uses it. BUT   * remember IPng might change all the rules.   */

 if (!free)//free=0  {   unsigned long flags;   /* The socket now has more outstanding blocks */

  sk->packets_out++;

  /* Protect the list for a moment */   save_flags(flags);   cli();

  if (skb->link3 != NULL)//link3指向数据报道呃重发队列   {    printk("ip.c: link3 != NULL ");    skb->link3 = NULL;   }   //sk中send_tail和send_head是用户缓存的单向链表表尾和表头   if (sk->send_head == NULL)   {    sk->send_tail = skb;    sk->send_head = skb;   }   else   {    sk->send_tail->link3 = skb;//link3指针用于数据包的连接    sk->send_tail = skb;   }   /* skb->link3 is NULL */

  /* Interrupt restore */   restore_flags(flags);  }  else   /* Remember who owns the buffer */   skb->sk = sk;

 /*   * If the indicated interface is up and running, send the packet.   */    ip_statistics.IpOutRequests++; #ifdef CONFIG_IP_ACCT  ip_acct_cnt(iph,dev, ip_acct_chain); #endif   #ifdef CONFIG_IP_MULTICAST //这部分是IP数据报的多播处理

 /*   * Multicasts are looped back for other local users   */    ....................................... #endif  if((dev->flags&IFF_BROADCAST) && iph->daddr==dev->pa_brdaddr && !(dev->flags&IFF_LOOPBACK))//广播数据包的处理   ip_loopback(dev,skb);    if (dev->flags & IFF_UP)//设备状态正常  {   /*    * If we have an owner use its priority setting,    * otherwise use NORMAL    */   //调用设备接口层函数发送数据: dev_queue_xmit()函数   if (sk != NULL)   {    dev_queue_xmit(skb, dev, sk->priority);   }   else   {    dev_queue_xmit(skb, dev, SOPRI_NORMAL);   }  }  else//设备状态不正常  {   ip_statistics.IpOutDiscards++;   if (free)    kfree_skb(skb, FREE_WRITE);  } }

  这个函数中对长度过长的数据包进行了分片,ip_fragment()函数,该函数没有详细分析。

void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag) {  struct iphdr *iph;  unsigned char *raw;  unsigned char *ptr;  struct sk_buff *skb2;  int left, mtu, hlen, len;  int offset;  unsigned long flags;

 /*   * Point into the IP datagram header.   */

 raw = skb->data;  iph = (struct iphdr *) (raw + dev->hard_header_len);

 skb->ip_hdr = iph;

 /*   * Setup starting values.   */

 hlen = (iph->ihl * sizeof(unsigned long));  left = ntohs(iph->tot_len) - hlen; /* Space per frame */  hlen += dev->hard_header_len;  /* Total header size */  mtu = (dev->mtu - hlen);  /* Size of data space */  ptr = (raw + hlen);   /* Where to start from */

 /*   * Check for any "DF" flag. [DF means do not fragment]   */

 if (ntohs(iph->frag_off) & IP_DF)  {   /*    * Reply giving the MTU of the failed hop.    */   ip_statistics.IpFragFails++;   icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev->mtu, dev);   return;  }

 /*   * The protocol doesn't seem to say what to do in the case that the   * frame + options doesn't fit the mtu. As it used to fall down dead   * in this case we were fortunate it didn't happen   */

 if(mtu<8)  {   /* It's wrong but it's better than nothing */   icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev->mtu, dev);   ip_statistics.IpFragFails++;   return;  }

 /*   * Fragment the datagram.   */

 /*   * The initial offset is 0 for a complete frame. When   * fragmenting fragments it's wherever this one starts.   */

 if (is_frag & 2)   offset = (ntohs(iph->frag_off) & 0x1fff) << 3;  else   offset = 0;

 /*   * Keep copying data until we run out.   */

 while(left > 0)  {   len = left;   /* IF: it doesn't fit, use 'mtu' - the data space left */   if (len > mtu)    len = mtu;   /* IF: we are not sending upto and including the packet end      then align the next start on an eight byte boundary */   if (len < left)   {    len/=8;    len*=8;   }   /*    * Allocate buffer.    */

  if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL)   {    printk("IP: frag: no memory for new fragment! ");    ip_statistics.IpFragFails++;    return;   }

  /*    * Set up data on packet    */

  skb2->arp = skb->arp;   if(skb->free==0)    printk("IP fragmenter: BUG free!=1 in fragmenter ");   skb2->free = 1;   skb2->len = len + hlen;   skb2->h.raw=(char *) skb2->data;   /*    * Charge the memory for the fragment to any owner    * it might possess    */

  save_flags(flags);   if (sk)   {    cli();    sk->wmem_alloc += skb2->mem_len;    skb2->sk=sk;   }   restore_flags(flags);   skb2->raddr = skb->raddr; /* For rebuild_header - must be here */

  /*    * Copy the packet header into the new buffer.    */

  memcpy(skb2->h.raw, raw, hlen);

  /*    * Copy a block of the IP datagram.    */   memcpy(skb2->h.raw + hlen, ptr, len);   left -= len;

  skb2->h.raw+=dev->hard_header_len;

  /*    * Fill in the new header fields.    */   iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);   iph->frag_off = htons((offset >> 3));   /*    * Added AC : If we are fragmenting a fragment thats not the    *     last fragment then keep MF on each bit    */   if (left > 0 || (is_frag & 1))    iph->frag_off |= htons(IP_MF);   ptr += len;   offset += len;

  /*    * Put this fragment into the sending queue.    */

  ip_statistics.IpFragCreates++;

  ip_queue_xmit(sk, dev, skb2, 2);//还是调用ip_queue_xmit()函数来发送分片后的数据  }  ip_statistics.IpFragOKs++; }

  网络层的发送函数调用了设备接口层,相当于网络模型的链路层的发送函数dev_queue_xmit()

  该函数的调用关系如下:

/*  * Send (or queue for sending) a packet.  *  * IMPORTANT: When this is called to resend frames. The caller MUST  * already have locked the sk_buff. Apart from that we do the  * rest of the magic.  */

void dev_queue_xmit(struct sk_buff *skb, struct device *dev, int pri) {  unsigned long flags;  int nitcount;  struct packet_type *ptype;  int where = 0;  /* used to say if the packet should go */     /* at the front or the back of the */     /* queue - front is a retransmit try */     /* where=0 表示是刚从上层传递的新数据包;where=1 表示从硬件队列中取出的数据包*/

 if (dev == NULL)  {   printk("dev.c: dev_queue_xmit: dev = NULL ");   return;  }    if(pri>=0 && !skb_device_locked(skb))//锁定该skb再进行操作,避免造成内核的不一致情况   skb_device_lock(skb); /* Shove a lock on the frame */ #ifdef CONFIG_SLAVE_BALANCING  save_flags(flags);  cli();  if(dev->slave!=NULL && dev->slave->pkt_queue < dev->pkt_queue &&     (dev->slave->flags & IFF_UP))   dev=dev->slave;  restore_flags(flags); #endif  #ifdef CONFIG_SKB_CHECK  IS_SKB(skb); #endif     skb->dev = dev;

 /*   * This just eliminates some race conditions, but not all...   */

 if (skb->next != NULL) //这种条件似乎永远不能成立,因为发送数据包前,数据包已经从缓存队列摘下  {//以防内核代码有BUG   /*    * Make sure we haven't missed an interrupt.    */   printk("dev_queue_xmit: worked around a missed interrupt ");   start_bh_atomic();   dev->hard_start_xmit(NULL, dev);   end_bh_atomic();   return;    }

 /*   * Negative priority is used to flag a frame that is being pulled from the   * queue front as a retransmit attempt. It therefore goes back on the queue   * start on a failure.   */      if (pri < 0) //优先级小于0表示是从硬件队列中取出的数据包    {   pri = -pri-1;   where = 1;    }

 if (pri >= DEV_NUMBUFFS)  {   printk("bad priority in dev_queue_xmit. ");   pri = 1;  }

 /*   * If the address has not been resolved. Call the device header rebuilder.   * This can cover all protocols and technically not just ARP either.   */    if (!skb->arp && dev->rebuild_header(skb->data, dev, skb->raddr, skb)) {//用于ARP协议,并重建MAC帧首部   return;  }

 save_flags(flags);  cli();  if (!where) {//表示是新数据包,需要将其加入设备队列中 #ifdef CONFIG_SLAVE_BALANCING   skb->in_dev_queue=1;//该数据包在设备队列 #endif    skb_queue_tail(dev->buffs + pri,skb);//将发送数据包加入硬件队列   skb_device_unlock(skb);  /* Buffer is on the device queue and can be freed safely */   skb = skb_dequeue(dev->buffs + pri);//从硬件队列中取出一个数据包   skb_device_lock(skb);  /* New buffer needs locking down */ #ifdef CONFIG_SLAVE_BALANCING    skb->in_dev_queue=0; #endif   }  restore_flags(flags);

 /* copy outgoing packets to any sniffer packet handlers */  if(!where)//对于新的数据包,则遍历网络层协议队列,内核支持混杂模式  {   for (nitcount= dev_nit, ptype = ptype_base; nitcount > 0 && ptype != NULL; ptype = ptype->next)   {    /* Never send packets back to the socket     * they originated from - MvS (miquels@drinkel.ow.org)     */    if (ptype->type == htons(ETH_P_ALL) &&       (ptype->dev == dev || !ptype->dev) &&       ((struct sock *)ptype->data != skb->sk))    {     struct sk_buff *skb2;     if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)      break;     /*      * The protocol knows this has (for other paths) been taken off      * and adds it back.      */     skb2->len-=skb->dev->hard_header_len;     ptype->func(skb2, skb->dev, ptype);//IP层函数对应func为ip_rcv(),将发送的数据回送一份给对应的网络层协议     nitcount--;//用于及时退出循环    }   }  }  start_bh_atomic();//开始原子操作  if (dev->hard_start_xmit(skb, dev) == 0) {//调用硬件的发送函数发送数据   end_bh_atomic();//结束原子操作   /*    * Packet is now solely the responsibility of the driver    */   return;//到这里说明数据包成功发送  }  //数据包没有成功发送,进行处理,将数据包从新加入硬件队列  end_bh_atomic();

 /*   * Transmission failed, put skb back into a list. Once on the list it's safe and   * no longer device locked (it can be freed safely from the device queue)   */  cli(); #ifdef CONFIG_SLAVE_BALANCING  skb->in_dev_queue=1;  dev->pkt_queue++; #endif   skb_device_unlock(skb);//对SKB解锁  skb_queue_head(dev->buffs + pri,skb);//这次采用头插法插入硬件发送队列  restore_flags(flags); }  

  具体的硬件发送函数dev->hard_start_xmit的实现将做下篇博文中分析。

  本文转自:http://blog.csdn.net/yming0221/article/details/7492423



ip ip协议 Linux

需要 登录 后方可回复, 如果你还没有账号请 注册新账号