pcap2corpus.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. /*-
  2. * Copyright (c) 2017 Michael Tuexen
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24. * SUCH DAMAGE.
  25. *
  26. */
  27. /*
  28. * Compile: cc -Wall -Werror -pedantic pcap2corpus.c -lpcap -o pcap2corpus
  29. *
  30. * Usage: pcap2corpus infile outfile_prefix [expression]
  31. * if no expression, a pcap filter, is provided, sctp is used.
  32. */
  33. #define _GNU_SOURCE
  34. #include <sys/types.h>
  35. #include <net/ethernet.h>
  36. #include <netinet/in.h>
  37. #include <netinet/ip.h>
  38. #include <netinet/ip6.h>
  39. #include <pcap/pcap.h>
  40. #include <stdio.h>
  41. #include <stdlib.h>
  42. #include <string.h>
  43. static unsigned long nr_read = 0;
  44. static unsigned long nr_decaps = 0;
  45. #define PRE_PADDING 1
  46. struct args {
  47. struct bpf_program bpf_prog;
  48. char *filename_prefix;
  49. int (*is_ipv4)(const void *);
  50. int (*is_ipv6)(const void *);
  51. int linktype;
  52. unsigned int offset;
  53. };
  54. /*
  55. * SCTP protocol - RFC4960.
  56. */
  57. struct sctphdr {
  58. uint16_t src_port; /* source port */
  59. uint16_t dest_port; /* destination port */
  60. uint32_t v_tag; /* verification tag of packet */
  61. uint32_t checksum; /* CRC32C checksum */
  62. /* chunks follow... */
  63. } __attribute__((packed));
  64. static int
  65. loopback_is_ipv4(const void *bytes)
  66. {
  67. uint32_t family;
  68. family = *(const uint32_t *)bytes;
  69. return (family == 2);
  70. }
  71. static int
  72. loopback_is_ipv6(const void *bytes)
  73. {
  74. uint32_t family;
  75. family = *(const uint32_t *)bytes;
  76. return (family == 24 || family == 28 || family == 30);
  77. }
  78. static int
  79. ethernet_is_ipv4(const void *bytes)
  80. {
  81. const struct ether_header *ether_hdr;
  82. ether_hdr = (const struct ether_header *)bytes;
  83. return (ntohs(ether_hdr->ether_type) == ETHERTYPE_IP);
  84. }
  85. static int
  86. ethernet_is_ipv6(const void *bytes)
  87. {
  88. const struct ether_header *ether_hdr;
  89. ether_hdr = (const struct ether_header *)bytes;
  90. return (ntohs(ether_hdr->ether_type) == ETHERTYPE_IPV6);
  91. }
  92. static void
  93. packet_handler(u_char *user, const struct pcap_pkthdr *pkthdr, const u_char *bytes_in)
  94. {
  95. struct args *args;
  96. const u_char *bytes_out;
  97. FILE *file;
  98. char *filename;
  99. const struct ip *ip4_hdr_in;
  100. const struct ip6_hdr *ip6_hdr_in;
  101. size_t offset, length;
  102. int null = 0;
  103. args = (struct args *)(void *)user;
  104. bytes_out = NULL;
  105. if (pcap_offline_filter(&args->bpf_prog, pkthdr, bytes_in) == 0) {
  106. goto out;
  107. }
  108. if (pkthdr->caplen < args->offset) {
  109. goto out;
  110. }
  111. if (args->is_ipv4(bytes_in)) {
  112. offset = args->offset + sizeof(struct ip) + sizeof(struct sctphdr);
  113. if (pkthdr->caplen < offset) {
  114. goto out;
  115. }
  116. ip4_hdr_in = (const struct ip *)(const void *)(bytes_in + args->offset);
  117. if (ip4_hdr_in->ip_p == IPPROTO_SCTP) {
  118. unsigned int ip4_hdr_len;
  119. ip4_hdr_len = ip4_hdr_in->ip_hl << 2;
  120. offset = args->offset + ip4_hdr_len + sizeof(struct sctphdr);
  121. if (pkthdr->caplen < offset) {
  122. goto out;
  123. }
  124. bytes_out = bytes_in + offset;
  125. length = pkthdr->caplen - offset;
  126. }
  127. }
  128. if (args->is_ipv6(bytes_in)) {
  129. offset = args->offset + sizeof(struct ip6_hdr) + sizeof(struct sctphdr);
  130. if (pkthdr->caplen < offset) {
  131. goto out;
  132. }
  133. ip6_hdr_in = (const struct ip6_hdr *)(bytes_in + args->offset);
  134. if (ip6_hdr_in->ip6_nxt == IPPROTO_SCTP) {
  135. bytes_out = bytes_in + offset;
  136. length = pkthdr->caplen - offset;
  137. }
  138. }
  139. out:
  140. nr_read++;
  141. if (bytes_out != NULL) {
  142. if (asprintf(&filename, "%s-%06lu", args->filename_prefix, nr_decaps) < 0) {
  143. return;
  144. }
  145. file = fopen(filename, "w");
  146. fwrite(&null, 1, PRE_PADDING, file);
  147. fwrite(bytes_out, length, 1, file);
  148. fclose(file);
  149. free(filename);
  150. nr_decaps++;
  151. }
  152. }
  153. static char *
  154. get_filter(int argc, char *argv[])
  155. {
  156. char *result, *c;
  157. size_t len;
  158. int i;
  159. if (argc == 3) {
  160. if (asprintf(&result, "%s", "sctp") < 0) {
  161. return (NULL);
  162. }
  163. } else {
  164. len = 0;
  165. for (i = 3; i < argc; i++) {
  166. len += strlen(argv[i]) + 1;
  167. }
  168. result = malloc(len);
  169. c = result;
  170. for (i = 3; i < argc; i++) {
  171. c = stpcpy(c, argv[i]);
  172. if (i < argc - 1) {
  173. *c++ = ' ';
  174. }
  175. }
  176. }
  177. return (result);
  178. }
  179. int
  180. main(int argc, char *argv[])
  181. {
  182. char errbuf[PCAP_ERRBUF_SIZE];
  183. pcap_t *pcap_reader;
  184. char *filter;
  185. struct args args;
  186. if (argc < 3) {
  187. fprintf(stderr, "Usage: %s infile outfile_prefix [expression]\n", argv[0]);
  188. return (-1);
  189. }
  190. args.filename_prefix = argv[2];
  191. pcap_reader = pcap_open_offline(argv[1], errbuf);
  192. if (pcap_reader == NULL) {
  193. fprintf(stderr, "Can't open input file %s: %s\n", argv[1], errbuf);
  194. return (-1);
  195. }
  196. args.linktype = pcap_datalink(pcap_reader);
  197. switch (args.linktype) {
  198. case DLT_NULL:
  199. args.is_ipv4 = loopback_is_ipv4;
  200. args.is_ipv6 = loopback_is_ipv6;
  201. args.offset = sizeof(uint32_t);
  202. break;
  203. case DLT_EN10MB:
  204. args.is_ipv4 = ethernet_is_ipv4;
  205. args.is_ipv6 = ethernet_is_ipv6;
  206. args.offset = sizeof(struct ether_header);
  207. break;
  208. default:
  209. fprintf(stderr, "Datalink type %d not supported\n", args.linktype);
  210. pcap_close(pcap_reader);
  211. return (-1);
  212. }
  213. filter = get_filter(argc, argv);
  214. if (pcap_compile(pcap_reader, &args.bpf_prog, filter, 1, PCAP_NETMASK_UNKNOWN) < 0) {
  215. fprintf(stderr, "Can't compile filter %s: %s\n", filter, pcap_geterr(pcap_reader));
  216. free(filter);
  217. pcap_close(pcap_reader);
  218. return (-1);
  219. }
  220. free(filter);
  221. pcap_dispatch(pcap_reader, 0, packet_handler, (u_char *)&args);
  222. pcap_close(pcap_reader);
  223. fprintf(stderr, "%lu packets processed\n", nr_read);
  224. fprintf(stderr, "%lu packets decapsulated\n", nr_decaps);
  225. return (0);
  226. }