[sr-dev] git:andrei/raw_sock: raw sockets: build ip header & fragmentation support

Andrei Pelinescu-Onciul andrei at iptel.org
Tue Jun 15 16:36:03 CEST 2010


Module: sip-router
Branch: andrei/raw_sock
Commit: 869a731313e489254fb220ce52120b7682a2526c
URL:    http://git.sip-router.org/cgi-bin/gitweb.cgi/sip-router/?a=commit;h=869a731313e489254fb220ce52120b7682a2526c

Author: Andrei Pelinescu-Onciul <andrei at iptel.org>
Committer: Andrei Pelinescu-Onciul <andrei at iptel.org>
Date:   Tue Jun 15 16:27:55 2010 +0200

raw sockets: build ip header & fragmentation support

Support for sending on raw sockets with IP_HDRINCL or IPPROTO_RAW,
that require building the IP header and fragmentation.

---

 raw_sock.c |  165 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 raw_sock.h |    6 ++-
 2 files changed, 164 insertions(+), 7 deletions(-)

diff --git a/raw_sock.c b/raw_sock.c
index e8b90b1..136f595 100644
--- a/raw_sock.c
+++ b/raw_sock.c
@@ -24,11 +24,12 @@
  * History:
  * --------
  *  2010-06-07  initial version (from older code) andrei
+ *  2010-06-15  IP_HDRINCL raw socket support, including on-send
+ *               fragmentation (andrei)
  */
 /*
  * FIXME: IP_PKTINFO & IP_HDRINCL - linux specific
  * FIXME: linux specific iphdr and udphdr
- * FIXME: send support for IP_HDRINCL
  */
 
 #ifdef USE_RAW_SOCKS
@@ -37,7 +38,8 @@
 #include "ip_addr.h"
 #include "dprint.h"
 #include "str.h"
-#include "ut.h"
+#include "rand/fastrand.h"
+#include "globals.h"
 
 #include <errno.h>
 #include <string.h>
@@ -99,7 +101,7 @@ int raw_socket(int proto, struct ip_addr* ip, str* iface, int iphdr_incl)
 	}
 	t=IP_PMTUDISC_DONT;
 	if(setsockopt(sock, IPPROTO_IP, IP_MTU_DISCOVER, &t, sizeof(t)) ==-1){
-		LOG(L_ERR, "raw_socket: setsockopt(IP_MTU_DISCOVER): %s\n",
+		ERR("raw_socket: setsockopt(IP_MTU_DISCOVER): %s\n",
 				strerror(errno));
 		goto error;
 	}
@@ -275,7 +277,7 @@ int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
 			n=-3;
 			goto error;
 		}else{
-			LOG(L_ERR, "udp length too small: %d/%d\n",
+			ERR("udp length too small: %d/%d\n",
 					(int)udp_len, (int)(end-udph_start));
 			n=-3;
 			goto error;
@@ -290,7 +292,7 @@ int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
 	dst_ip.u.addr32[0]=iph.daddr;
 	/* fill dst_port */
 	dst_port=ntohs(udph.dest);
-	ip_addr2su(to, &dst_ip, port);
+	ip_addr2su(to, &dst_ip, dst_port);
 	/* fill src_port */
 	src_port=ntohs(udph.source);
 	su_setport(from, src_port);
@@ -403,6 +405,34 @@ inline static int mk_udp_hdr(struct udphdr* u, struct sockaddr_in* from,
 
 
 
+/** fill in an ip header.
+ * Note: the checksum is _not_ computed
+ * @param iph - ip header that will be filled.
+ * @param from - source ip v4 address (network byte order).
+ * @param to -   destination ip v4 address (network byte order).
+ * @param payload len - payload length (not including the ip header).
+ * @param proto - protocol.
+ * @return 0 on success, < 0 on error.
+ */
+inline static int mk_ip_hdr(struct iphdr* iph, struct in_addr* from, 
+				struct in_addr* to, int payload_len, unsigned char proto)
+{
+	iph->ihl = sizeof(struct iphdr)/4;
+	iph->version = 4;
+	iph->tos = tos;
+	iph->tot_len = htons(payload_len);
+	iph->id = 0;
+	iph->frag_off = 0; /* first 3 bits = flags = 0, last 13 bits = offset */
+	iph->ttl = 63; /* FIXME: use some configured value */
+	iph->protocol = proto;
+	iph->check = 0;
+	iph->saddr = from->s_addr;
+	iph->daddr = to->s_addr;
+	return 0;
+}
+
+
+
 /** send an udp packet over a raw socket.
  * @param rsock - raw socket
  * @param buf - data
@@ -413,7 +443,8 @@ inline static int mk_udp_hdr(struct udphdr* u, struct sockaddr_in* from,
  * @return  <0 on error (errno set too), number of bytes sent on success
  *          (including the udp header => on success len + udpheader size).
  */
-int raw_udp4_send(int rsock, char* buf, int len, union sockaddr_union* from,
+int raw_udp4_send(int rsock, char* buf, unsigned int len,
+					union sockaddr_union* from,
 					union sockaddr_union* to)
 {
 	struct msghdr snd_msg;
@@ -453,4 +484,126 @@ int raw_udp4_send(int rsock, char* buf, int len, union sockaddr_union* from,
 
 
 
+/** send an udp packet over an IP_HDRINCL raw socket.
+ * If needed, send several fragments.
+ * @param rsock - raw socket
+ * @param buf - data
+ * @param len - data len
+ * @param from - source address:port (_must_ be non-null, but the ip address
+ *                can be 0, in which case it will be filled by the kernel).
+ * @param to - destination address:port
+ * @param mtu - maximum datagram size (including the ip header, excluding
+ *              link layer headers). Minimum allowed size is 28
+ *               (sizeof(ip_header + udp_header)). If mtu is lower, it will
+ *               be ignored (the packet will be sent un-fragmented).
+ *              0 can be used to disable fragmentation.
+ * @return  <0 on error (-2: datagram too big, -1: check errno),
+ *          number of bytes sent on success
+ *          (including the ip & udp headers =>
+ *               on success len + udpheader + ipheader size).
+ */
+int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
+						union sockaddr_union* from,
+						union sockaddr_union* to, unsigned short mtu)
+{
+	struct msghdr snd_msg;
+	struct iovec iov[2];
+	struct ip_udp_hdr {
+		struct iphdr ip;
+		struct udphdr udp;
+	} hdr;
+	unsigned int totlen;
+	unsigned int ip_frag_size; /* fragment size */
+	unsigned int last_frag_extra; /* extra bytes possible in the last frag */
+	unsigned int ip_payload;
+	unsigned int last_frag_offs;
+	void* last_frag_start;
+	int frg_no;
+	int ret;
+
+	totlen = len + sizeof(hdr);
+	if (unlikely(totlen) > 65535)
+		return -2;
+	memset(&snd_msg, 0, sizeof(snd_msg));
+	snd_msg.msg_name=&to->sin;
+	snd_msg.msg_namelen=sockaddru_len(*to);
+	snd_msg.msg_iov=&iov[0];
+	/* prepare the udp & ip headers */
+	mk_udp_hdr(&hdr.udp, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
+	mk_ip_hdr(&hdr.ip, &from->sin.sin_addr, &to->sin.sin_addr,
+				len + sizeof(hdr.udp), IPPROTO_UDP);
+	iov[0].iov_base=(char*)&hdr;
+	iov[0].iov_len=sizeof(hdr);
+	snd_msg.msg_iovlen=2;
+	snd_msg.msg_control=0;
+	snd_msg.msg_controllen=0;
+	snd_msg.msg_flags=0;
+	/* this part changes for different fragments */
+	/* packets are fragmented if mtu has a valid value (at least an
+	   IP header + UDP header fit in it) and if the total length is greater
+	   then the mtu */
+	if (likely(totlen <= mtu || mtu <= sizeof(hdr))) {
+		iov[1].iov_base=buf;
+		iov[1].iov_len=len;
+		ret=sendmsg(rsock, &snd_msg, 0);
+	} else {
+		ip_payload = len + sizeof(hdr.udp);
+		/* a fragment offset must be a multiple of 8 => its size must
+		   also be a multiple of 8, except for the last fragment */
+		ip_frag_size = (mtu -sizeof(hdr.ip)) & (~7);
+		last_frag_extra = (mtu - sizeof(hdr.ip)) & 7; /* rest */
+		frg_no = ip_payload / ip_frag_size +
+				 ((ip_payload % ip_frag_size) > last_frag_extra);
+		/*ip_last_frag_size = ip_payload % frag_size +
+							((ip_payload % frag_size) <= last_frag_extra) *
+							ip_frag_size; */
+		last_frag_offs = (frg_no - 1) * ip_frag_size;
+		/* if we are here mtu => sizeof(ip_h+udp_h) && payload > mtu
+		   => last_frag_offs >= sizeof(hdr.udp) */
+		last_frag_start = buf + last_frag_offs - sizeof(hdr.udp);
+		hdr.ip.id = fastrand_max(65534) + 1; /* random id, should be != 0
+											  (if 0 the kernel will fill it) */
+		/* send the first fragment */
+		iov[1].iov_base=buf;
+		/* ip_frag_size >= sizeof(hdr.udp) because we are here only
+		   if mtu >= sizeof(hdr.ip) + sizeof(hdr.udp) */
+		iov[1].iov_len=ip_frag_size - sizeof(hdr.udp);
+		hdr.ip.tot_len = htons(ip_frag_size);
+		hdr.ip.frag_off = htons(0x2000); /* set MF */
+		ret=sendmsg(rsock, &snd_msg, 0);
+		if (unlikely(ret < 0))
+			goto end;
+		/* all the other fragments, include only the ip header */
+		iov[0].iov_len = sizeof(hdr.ip);
+		iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
+		/* fragments between the first and the last */
+		while(unlikely(iov[1].iov_base < last_frag_start)) {
+			iov[1].iov_len = ip_frag_size;
+			hdr.ip.tot_len = htons(iov[1].iov_len);
+			/* set MF  */
+			hdr.ip.frag_off = htons( (unsigned short)
+									(((char*)iov[1].iov_base - (char*)buf +
+										sizeof(hdr.udp)) / 8) | 0x2000);
+			ret=sendmsg(rsock, &snd_msg, 0);
+			if (unlikely(ret < 0))
+				goto end;
+			iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
+		}
+		/* last fragment */
+		iov[1].iov_len = buf + len - (char*)iov[1].iov_base;
+		hdr.ip.tot_len = htons(iov[1].iov_len);
+		/* don't set MF (last fragment) */
+		hdr.ip.frag_off = htons( (unsigned short)
+								(((char*)iov[1].iov_base - (char*)buf +
+									sizeof(hdr.udp)) / 8) );
+		ret=sendmsg(rsock, &snd_msg, 0);
+		if (unlikely(ret < 0))
+			goto end;
+	}
+end:
+	return ret;
+}
+
+
+
 #endif /* USE_RAW_SOCKS */
diff --git a/raw_sock.h b/raw_sock.h
index b0e5653..6be0aba 100644
--- a/raw_sock.h
+++ b/raw_sock.h
@@ -46,7 +46,11 @@ int recvpkt4(int sock, char* buf, int len, union sockaddr_union* from,
 					union sockaddr_union* to);
 int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
 					union sockaddr_union* to, struct raw_filter* rf);
-int raw_udp4_send(int rsock, char* buf, int len, union sockaddr_union* from,
+int raw_udp4_send(int rsock, char* buf, unsigned int len,
+					union sockaddr_union* from,
 					union sockaddr_union* to);
+int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
+						union sockaddr_union* from,
+						union sockaddr_union* to, unsigned short mtu);
 
 #endif /* _raw_sock_h */




More information about the sr-dev mailing list