bpf/profiling/network/protocol_analyze.h - skywalking-rover - Git at Google

 /*
  * This code runs using bpf in the Linux kernel.
  * Copyright 2018- The Pixie Authors.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version 2
  * of the License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
  * SPDX-License-Identifier: GPL-2.0
  */

 #include "common.h"

 #pragma once

 enum traffic_protocol_t {
   kProtocolUnknown = 0,
   kProtocolHTTP = 1,
   kProtocolHTTP2 = 2,
   kProtocolMySQL = 3,
   kProtocolCQL = 4,
   kProtocolPGSQL = 5,
   kProtocolDNS = 6,
   kProtocolRedis = 7,
   kProtocolNATS = 8,
   kProtocolMongo = 9,
   kProtocolKafka = 10,
   kProtocolMux = 11,
 };

 enum message_type_t { kUnknown, kRequest, kResponse };

 struct protocol_message_t {
   __u32 protocol;
   __u32 type;
 };

 #define BPF_PROBE_READ_VAR1(value, ptr) bpf_probe_read(&value, sizeof(value), ptr)

 static __inline int32_t read_big_endian_int32(const char* buf) {
   int32_t length;
   BPF_PROBE_READ_VAR1(length, buf);
   return bpf_ntohl(length);
 }

 static __inline int32_t read_big_endian_int16(const char* buf) {
   int16_t val;
   BPF_PROBE_READ_VAR1(val, buf);
   return bpf_ntohl(val);
 }

 static __inline int px_bpf_strncmp(const char* lhs, size_t n, const char* rhs) {
   for (size_t i = 0; i < n; ++i) {
     if (lhs[i] != rhs[i]) {
       return 1;
     }
   }
   return 0;
 }

 static __inline __u32 infer_http_message(const char* buf, size_t count) {
     // Smallest HTTP response is 17 characters:
     // HTTP/1.1 200 OK\r\n
     // Smallest HTTP response is 16 characters:
     // GET x HTTP/1.1\r\n
     if (count < 16) {
         return kUnknown;
     }

     if (buf[0] == 'H' && buf[1] == 'T' && buf[2] == 'T' && buf[3] == 'P') {
         return kResponse;
     }
     if (buf[0] == 'G' && buf[1] == 'E' && buf[2] == 'T') {
         return kRequest;
     }
     if (buf[0] == 'H' && buf[1] == 'E' && buf[2] == 'A' && buf[3] == 'D') {
         return kRequest;
     }
     if (buf[0] == 'P' && buf[1] == 'O' && buf[2] == 'S' && buf[3] == 'T') {
         return kRequest;
     }
     if (buf[0] == 'P' && buf[1] == 'U' && buf[2] == 'T') {
         return kRequest;
     }
     if (buf[0] == 'D' && buf[1] == 'E' && buf[2] == 'L' && buf[3] == 'E' && buf[4] == 'T' && buf[5] == 'E') {
         return kRequest;
     }
     if (buf[0] == 'C' && buf[1] == 'O' && buf[2] == 'N' && buf[3] == 'N' && buf[4] == 'E' && buf[5] == 'T') {
         return kRequest;
     }
     if (buf[0] == 'O' && buf[1] == 'P' && buf[2] == 'T' && buf[3] == 'I' && buf[4] == 'O' && buf[5] == 'N') {
         return kRequest;
     }
     if (buf[0] == 'T' && buf[1] == 'R' && buf[2] == 'A' && buf[3] == 'C' && buf[4] == 'E') {
         return kRequest;
     }
     if (buf[0] == 'P' && buf[1] == 'A' && buf[2] == 'T' && buf[3] == 'C' && buf[4] == 'H') {
         return kRequest;
     }

     return kUnknown;
 }

 // frame format: https://www.rfc-editor.org/rfc/rfc7540.html#section-4.1
 static __inline __u32 infer_http2_message(const char* buf_src, size_t count) {
 static const uint8_t kFrameBasicSize = 0x9; // including Length, Type, Flags, Reserved, Stream Identity
 static const uint8_t kFrameTypeHeader = 0x1; // the type of the frame: https://www.rfc-editor.org/rfc/rfc7540.html#section-6.2
 static const uint8_t kFrameLoopCount = 5;

 static const uint8_t kStaticTableMaxSize = 61;// https://www.rfc-editor.org/rfc/rfc7541#appendix-A
 static const uint8_t kStaticTableAuth = 1;
 static const uint8_t kStaticTableGet = 2;
 static const uint8_t kStaticTablePost = 3;
 static const uint8_t kStaticTablePath1 = 4;
 static const uint8_t kStaticTablePath2 = 5;

     // the buffer size must bigger than basic frame size
     if (count < kFrameBasicSize) {
 		return kUnknown;
     }

     // frame info
     __u8 frame[21] = { 0 };
     __u32 frameOffset = 0;
     // header info
     __u8 staticInx, headerBlockFragmentOffset;

     // each all frame
 #pragma unroll
     for (__u8 i = 0; i < kFrameLoopCount; i++) {
         if (frameOffset >= count) {
             break;
         }

         // read frame
         bpf_probe_read(frame, sizeof(frame), buf_src + frameOffset);
         frameOffset += (bpf_ntohl(*(__u32 *) frame) >> 8) + kFrameBasicSize;

         // is header frame
         if (frame[3] != kFrameTypeHeader) {
             continue;
         }

         // validate the header(unset): not HTTP2 protocol
         // this frame must is a send request
         if ((frame[4] & 0xd2) || frame[5] & 0x01) {
             return kUnknown;
         }

         // locate the header block fragment offset
         headerBlockFragmentOffset = kFrameBasicSize;
         if (frame[4] & 0x20) {  // PADDED flag is set
             headerBlockFragmentOffset += 1;
         }
         if (frame[4] & 0x20) {  // PRIORITY flag is set
             headerBlockFragmentOffset += 5;
         }

 #pragma unroll
         for (__u8 j = 0; j <= kStaticTablePath2; j++) {
             if (headerBlockFragmentOffset > count) {
                 return kUnknown;
             }
             staticInx = frame[headerBlockFragmentOffset] & 0x7f;
             if (staticInx <= kStaticTableMaxSize && staticInx > 0) {
                 if (staticInx == kStaticTableAuth ||
                     staticInx == kStaticTableGet ||
                     staticInx == kStaticTablePost ||
                     staticInx == kStaticTablePath1 ||
                     staticInx == kStaticTablePath2) {
                     return kRequest;
                 } else {
                     return kResponse;
                 }
             }
             headerBlockFragmentOffset++;
         }
     }

 	return kUnknown;
 }

 // Cassandra frame:
 //      0         8        16        24        32         40
 //      +---------+---------+---------+---------+---------+
 //      | version |  flags  |      stream       | opcode  |
 //      +---------+---------+---------+---------+---------+
 //      |                length                 |
 //      +---------+---------+---------+---------+
 //      |                                       |
 //      .            ...  body ...              .
 //      .                                       .
 //      .                                       .
 //      +----------------------------------------
 static __inline enum message_type_t infer_cql_message(const char* buf, size_t count) {
 static const uint8_t kError = 0x00;
 static const uint8_t kStartup = 0x01;
 static const uint8_t kReady = 0x02;
 static const uint8_t kAuthenticate = 0x03;
 static const uint8_t kOptions = 0x05;
 static const uint8_t kSupported = 0x06;
 static const uint8_t kQuery = 0x07;
 static const uint8_t kResult = 0x08;
 static const uint8_t kPrepare = 0x09;
 static const uint8_t kExecute = 0x0a;
 static const uint8_t kRegister = 0x0b;
 static const uint8_t kEvent = 0x0c;
 static const uint8_t kBatch = 0x0d;
 static const uint8_t kAuthChallenge = 0x0e;
 static const uint8_t kAuthResponse = 0x0f;
 static const uint8_t kAuthSuccess = 0x10;

 // Cassandra frames have a 9-byte header.
 if (count < 9) {
 return kUnknown;
 }

 // Version contains both version and direction.
 bool request = (buf[0] & 0x80) == 0x00;
 uint8_t version = (buf[0] & 0x7f);
 uint8_t flags = buf[1];
 uint8_t opcode = buf[4];
 int32_t length = read_big_endian_int32(&buf[5]);

 // Cassandra version should 5 or less. Also v2 and lower seem much less popular.
 // For example ScyllaDB only supports v3+.
 if (version < 3 || version > 5) {
 return kUnknown;
 }

 // Only flags 0x1, 0x2, 0x4 and 0x8 are used.
 if ((flags & 0xf0) != 0) {
 return kUnknown;
 }

 // A frame is limited to 256MB in length,
 // but we look for more common frames which should be much smaller in size.
 if (length > 10000) {
 return kUnknown;
 }

 switch (opcode) {
 case kStartup:
 case kOptions:
 case kQuery:
 case kPrepare:
 case kExecute:
 case kRegister:
 case kBatch:
 case kAuthResponse:
   return request ? kRequest : kUnknown;
 case kError:
 case kReady:
 case kAuthenticate:
 case kSupported:
 case kResult:
 case kEvent:
 case kAuthChallenge:
 case kAuthSuccess:
   return !request ? kResponse : kUnknown;
 default:
   return kUnknown;
 }
 }

 static __inline enum message_type_t infer_mongo_message(const char* buf, size_t count) {
 // Reference:
 // https://docs.mongodb.com/manual/reference/mongodb-wire-protocol/#std-label-wp-request-opcodes.
 // Note: Response side inference for Mongo is not robust, and is not attempted to avoid
 // confusion with other protocols, especially MySQL.
 static const int32_t kOPUpdate = 2001;
 static const int32_t kOPInsert = 2002;
 static const int32_t kReserved = 2003;
 static const int32_t kOPQuery = 2004;
 static const int32_t kOPGetMore = 2005;
 static const int32_t kOPDelete = 2006;
 static const int32_t kOPKillCursors = 2007;
 static const int32_t kOPCompressed = 2012;
 static const int32_t kOPMsg = 2013;

 static const int32_t kMongoHeaderLength = 16;

 if (count < kMongoHeaderLength) {
 return kUnknown;
 }

 int32_t* buf4 = (int32_t*)buf;
 int32_t message_length = buf4[0];

 if (message_length < kMongoHeaderLength) {
 return kUnknown;
 }

 int32_t request_id = buf4[1];

 if (request_id < 0) {
 return kUnknown;
 }

 int32_t response_to = buf4[2];
 int32_t opcode = buf4[3];

 if (opcode == kOPUpdate || opcode == kOPInsert || opcode == kReserved || opcode == kOPQuery ||
   opcode == kOPGetMore || opcode == kOPDelete || opcode == kOPKillCursors ||
   opcode == kOPCompressed || opcode == kOPMsg) {
 if (response_to == 0) {
   return kRequest;
 }
 }

 return kUnknown;
 }

 // TODO(yzhao): This is for initial development use. Later we need to combine with more inference
 // code, as the startup message only appears at the beginning of the exchanges between PostgreSQL
 // client and server.
 static __inline enum message_type_t infer_pgsql_startup_message(const char* buf, size_t count) {
 // Length field: int32, protocol version field: int32, "user" string, 4 bytes.
 const int kMinMsgLen = 4 + 4 + 4;
 if (count < kMinMsgLen) {
 return kUnknown;
 }

 // Assume startup message wont be larger than 10240 (10KiB).
 const int kMaxMsgLen = 10240;
 const int32_t length = read_big_endian_int32(buf);
 if (length < kMinMsgLen) {
 return kUnknown;
 }
 if (length > kMaxMsgLen) {
 return kUnknown;
 }

 const char kPgsqlVer30[] = "\x00\x03\x00\x00";
 if (px_bpf_strncmp((const char*)buf + 4, 4, kPgsqlVer30) != 0) {
 return kUnknown;
 }

 // Next we expect a key like "user", "datestyle" or "extra_float_digits".
 // For inference purposes, we simply look for a short sequence of alphabetic characters.
 for (int i = 0; i < 3; ++i) {
 // Loosely check for an alphabetic character.
 // This is a loose check and still covers some non alphabetic characters (e.g. `\`),
 // but we want to keep the BPF instruction count low.
 if (*((const char*)buf + 8 + i) < 'A') {
   return kUnknown;
 }
 }

 return kRequest;
 }

 // Regular message format: | byte tag | int32_t len | string payload |
 static __inline enum message_type_t infer_pgsql_query_message(const char* buf, size_t count) {
 const uint8_t kTagQ = 'Q';
 if (*buf != kTagQ) {
 return kUnknown;
 }
 const int32_t len = read_big_endian_int32(buf + 1);
 // The length field include the field itself of 4 bytes. Also the minimal size command is
 // COPY/MOVE. The minimal length is therefore 8.
 const int32_t kMinPayloadLen = 8;
 // Assume typical query message size is below an artificial limit.
 // 30000 is copied from postgres code base:
 // https://github.com/postgres/postgres/tree/master/src/interfaces/libpq/fe-protocol3.c#L94
 const int32_t kMaxPayloadLen = 30000;
 if (len < kMinPayloadLen || len > kMaxPayloadLen) {
 return kUnknown;
 }
 // If the input includes a whole message (1 byte tag + length), check the last character.
 if (count > MAX_SOCKET_BUFFER_READ_LENGTH) {
     count = MAX_SOCKET_BUFFER_READ_LENGTH;
 }
 if ((len + 1 <= (int)count) && (buf[count-1] != '\0')) {
 return kUnknown;
 }
 return kRequest;
 }

 // TODO(yzhao): ReadyForQuery message could be nice pattern to check, as it has 6 bytes of fixed bit
 // pattern, plus one byte of enum with possible values 'I', 'E', 'T'.  But it's usually sent as a
 // suffix of a query response, so it's difficult to capture. Research more to see if we can detect
 // this message.

 static __inline enum message_type_t infer_pgsql_regular_message(const char* buf, size_t count) {
 const int kMinMsgLen = 1 + sizeof(int32_t);
 if (count < kMinMsgLen) {
 return kUnknown;
 }
 return infer_pgsql_query_message(buf, count);
 }

 static __inline enum message_type_t infer_pgsql_message(const char* buf, size_t count) {
 enum message_type_t type = infer_pgsql_startup_message(buf, count);
 if (type != kUnknown) {
 return type;
 }
 return infer_pgsql_regular_message(buf, count);
 }

 // MySQL packet:
 //      0         8        16        24        32
 //      +---------+---------+---------+---------+
 //      |        payload_length       | seq_id  |
 //      +---------+---------+---------+---------+
 //      |                                       |
 //      .            ...  body ...              .
 //      .                                       .
 //      .                                       .
 //      +----------------------------------------
 // TODO(oazizi/yzhao): This produces too many false positives. Add stronger protocol detection.
 static __inline enum message_type_t infer_mysql_message(const char* buf, size_t count,
                                                     struct active_connection_t* conn_info) {
 static const uint8_t kComQuery = 0x03;
 static const uint8_t kComConnect = 0x0b;
 static const uint8_t kComStmtPrepare = 0x16;
 static const uint8_t kComStmtExecute = 0x17;
 static const uint8_t kComStmtClose = 0x19;

 // Second statement checks whether suspected header matches the length of current packet.
 bool use_prev_buf = (conn_info->prev_count == 4) && ((size_t)read_big_endian_int32(conn_info->prev_buf) == count);

 if (use_prev_buf) {
 // Check the header_state to find out if the header has been read. MySQL server tends to
 // read in the 4 byte header and the rest of the packet in a separate read.
 count += 4;
 }

 // MySQL packets start with a 3-byte packet length and a 1-byte packet number.
 // The 5th byte on a request contains a command that tells the type.
 if (count < 5) {
 return kUnknown;
 }

 // Convert 3-byte length to uint32_t. But since the 4th byte is supposed to be \x00, directly
 // casting 4-bytes is correct.
 // NOLINTNEXTLINE: readability/casting
 uint32_t len = use_prev_buf ? *((uint32_t*)conn_info->prev_buf) : *((uint32_t*)buf);
 len = len & 0x00ffffff;

 uint8_t seq = use_prev_buf ? conn_info->prev_buf[3] : buf[3];
 uint8_t com = use_prev_buf ? buf[0] : buf[4];

 // The packet number of a request should always be 0.
 if (seq != 0) {
 return kUnknown;
 }

 // No such thing as a zero-length request in MySQL protocol.
 if (len == 0) {
 return kUnknown;
 }

 // Assuming that the length of a request is less than 10k characters to avoid false
 // positive flagging as MySQL, which statistically happens frequently for a single-byte
 // check.
 if (len > 10000) {
 return kUnknown;
 }

 // TODO(oazizi): Consider adding more commands (0x00 to 0x1f).
 // Be careful, though: trade-off is higher rates of false positives.
 if (com == kComConnect || com == kComQuery || com == kComStmtPrepare || com == kComStmtExecute ||
   com == kComStmtClose) {
 return kRequest;
 }
 return kUnknown;
 }

 // Reference: https://kafka.apache.org/protocol.html#protocol_messages
 // Request Header v0 => request_api_key request_api_version correlation_id
 //     request_api_key => INT16
 //     request_api_version => INT16
 //     correlation_id => INT32
 static __inline enum message_type_t infer_kafka_request(const char* buf) {
 // API is Kafka's terminology for opcode.
 static const int kNumAPIs = 62;
 static const int kMaxAPIVersion = 12;

 const int16_t request_API_key = read_big_endian_int16(buf);
 if (request_API_key < 0 || request_API_key > kNumAPIs) {
 return kUnknown;
 }

 const int16_t request_API_version = read_big_endian_int16(buf + 2);
 if (request_API_version < 0 || request_API_version > kMaxAPIVersion) {
 return kUnknown;
 }

 const int32_t correlation_id = read_big_endian_int32(buf + 4);
 if (correlation_id < 0) {
 return kUnknown;
 }
 return kRequest;
 }

 static __inline enum message_type_t infer_kafka_message(const char* buf, size_t count,
                                                     struct active_connection_t* conn_info) {
 // Second statement checks whether suspected header matches the length of current packet.
 // This shouldn't confuse with MySQL because MySQL uses little endian, and Kafka uses big endian.
 bool use_prev_buf =
   (conn_info->prev_count == 4) && ((size_t)read_big_endian_int32(conn_info->prev_buf) == count);

 if (use_prev_buf) {
 count += 4;
 }

 // length(4 bytes) + api_key(2 bytes) + api_version(2 bytes) + correlation_id(4 bytes)
 static const int kMinRequestLength = 12;
 if (count < kMinRequestLength) {
 return kUnknown;
 }

 const int32_t message_size = use_prev_buf ? count : read_big_endian_int32(buf) + 4;

 // Enforcing count to be exactly message_size + 4 to mitigate misclassification.
 // However, this will miss long messages broken into multiple reads.
 if (message_size < 0 || count != (size_t)message_size) {
 return kUnknown;
 }
 const char* request_buf = use_prev_buf ? buf : buf + 4;
 enum message_type_t result = infer_kafka_request(request_buf);

 // Kafka servers read in a 4-byte packet length header first. The first packet in the
 // stream is used to infer protocol, but the header has already been read. One solution is to
 // add another perf_submit of the 4-byte header, but this would impact the instruction limit.
 // Not handling this case causes potential confusion in the parsers. Instead, we set a
 // prepend_length_header field if and only if Kafka has just been inferred for the first time
 // under the scenario described above. Length header is appended to user the buffer in user space.
 if (use_prev_buf && result == kRequest && conn_info->protocol == kProtocolUnknown) {
 conn_info->prepend_length_header = true;
 }
 return result;
 }

 static __inline enum message_type_t infer_dns_message(const char* buf, size_t count) {
 const int kDNSHeaderSize = 12;

 // Use the maximum *guaranteed* UDP packet size as the max DNS message size.
 // UDP packets can be larger, but this is the typical maximum size for DNS.
 const int kMaxDNSMessageSize = 512;

 // Maximum number of resource records.
 // https://stackoverflow.com/questions/6794926/how-many-a-records-can-fit-in-a-single-dns-response
 const int kMaxNumRR = 25;

 if (count < kDNSHeaderSize || count > kMaxDNSMessageSize) {
 return kUnknown;
 }

 const uint8_t* ubuf = (const uint8_t*)buf;

 uint16_t flags = (ubuf[2] << 8) + ubuf[3];
 uint16_t num_questions = (ubuf[4] << 8) + ubuf[5];
 uint16_t num_answers = (ubuf[6] << 8) + ubuf[7];
 uint16_t num_auth = (ubuf[8] << 8) + ubuf[9];
 uint16_t num_addl = (ubuf[10] << 8) + ubuf[11];

 bool qr = (flags >> 15) & 0x1;
 uint8_t opcode = (flags >> 11) & 0xf;
 uint8_t zero = (flags >> 6) & 0x1;

 if (zero != 0) {
 return kUnknown;
 }

 if (opcode != 0) {
 return kUnknown;
 }

 if (num_questions == 0 || num_questions > 10) {
 return kUnknown;
 }

 uint32_t num_rr = num_questions + num_answers + num_auth + num_addl;
 if (num_rr > kMaxNumRR) {
 return kUnknown;
 }

 return (qr == 0) ? kRequest : kResponse;
 }

 // Redis request and response messages share the same format.
 // See https://redis.io/topics/protocol for the REDIS protocol spec.
 //
 // TODO(yzhao): Apply simplified parsing to read the content to distinguished request & response.
 static __inline bool is_redis_message(const char* buf, size_t count) {
 // Redis messages start with an one-byte type marker, and end with \r\n terminal sequence.
 if (count < 3) {
 return false;
 }

 const char first_byte = buf[0];

 if (  // Simple strings start with +
   first_byte != '+' &&
   // Errors start with -
   first_byte != '-' &&
   // Integers start with :
   first_byte != ':' &&
   // Bulk strings start with $
   first_byte != '$' &&
   // Arrays start with *
   first_byte != '*') {
 return false;
 }

 // The last two chars are \r\n, the terminal sequence of all Redis messages.
 if (buf[count - 2] != '\r') {
 return false;
 }
 if (buf[count - 1] != '\n') {
 return false;
 }

 return true;
 }

 // TODO(ddelnano): Mux protocol traffic is currently misidentified as ssh. Since
 // stirling doesn't have ssh support yet, but will need to be addressed. In addition,
 // mux seems to send the header and body on its protocol in two separate syscalls on
 // the server side.
 static __inline enum message_type_t infer_mux_message(const char* buf, size_t count) {
 // mux's on the wire format causes false positives for protocol inference
 // In order to address this, we only infer mux messages by the
 // most useful message types and if they are easy to identify
 static const int8_t kTdispatch = 2;
 static const int8_t kRdispatch = -2;
 static const int8_t kTinit = 68;
 static const int8_t kRinit = -68;
 static const int8_t kRerr = -128;
 static const int8_t kRerrOld = 127;
 uint32_t mux_header_size = 8;
 // TODO(ddelnano): Determine why mux-framer text in T/Rinit is
 // 6 bytes after the mux header
 int32_t mux_framer_pos = mux_header_size + 6;

 if (count < mux_header_size) {
 return kUnknown;
 }

 uint32_t length = read_big_endian_int32(buf) + 4;
 enum message_type_t msg_type;

 int32_t type_and_tag = read_big_endian_int32(buf + 4);
 int8_t mux_type = (type_and_tag & 0xff000000) >> 24;
 uint32_t tag = (type_and_tag & 0xffffff);
 switch (mux_type) {
 case kTdispatch:
 case kTinit:
 case kRerrOld:
   msg_type = kRequest;
   break;
 case kRdispatch:
 case kRinit:
 case kRerr:
   msg_type = kResponse;
   break;
 default:
   return kUnknown;
 }
 if (mux_type == kRerr || mux_type == kRerrOld) {
 if (length > MAX_SOCKET_BUFFER_READ_LENGTH) {
     length = MAX_SOCKET_BUFFER_READ_LENGTH;
 }
 if (buf[length - 5] != 'c' || buf[length - 4] != 'h' || buf[length - 3] != 'e' ||
     buf[length - 2] != 'c' || buf[length - 1] != 'k')
   return kUnknown;
 }

 if (mux_type == kRinit || mux_type == kTinit) {
 if (buf[mux_framer_pos] != 'm' || buf[mux_framer_pos + 1] != 'u' ||
     buf[mux_framer_pos + 2] != 'x' || buf[mux_framer_pos + 3] != '-' ||
     buf[mux_framer_pos + 4] != 'f' || buf[mux_framer_pos + 5] != 'r' ||
     buf[mux_framer_pos + 6] != 'a' || buf[mux_framer_pos + 7] != 'm' ||
     buf[mux_framer_pos + 8] != 'e' || buf[mux_framer_pos + 9] != 'r')
   return kUnknown;
 }

 if (tag < 1 || tag > ((1 << 23) - 1)) {
 return kUnknown;
 }

 return msg_type;
 }

 // NATS messages are in texts. The role is inferred from the message type.
 // See https://github.com/nats-io/docs/blob/master/nats_protocol/nats-protocol.md
 //
 // In case of bpf instruction count limit becomes a problem, we can drop CONNECT and INFO message
 // detection, they are only sent once after establishing the connection.
 static __inline enum message_type_t infer_nats_message(const char* buf, size_t count) {
 // NATS messages start with an one-byte type marker, and end with \r\n terminal sequence.
 if (count < 3) {
 return kUnknown;
 }
 if (count > MAX_SOCKET_BUFFER_READ_LENGTH) {
     count = MAX_SOCKET_BUFFER_READ_LENGTH;
 }
 // The last two chars are \r\n, the terminal sequence of all NATS messages.
 if (buf[count - 2] != '\r') {
 return kUnknown;
 }
 if (buf[count - 1] != '\n') {
 return kUnknown;
 }
 if (buf[0] == 'C' && buf[1] == 'O' && buf[2] == 'N' && buf[3] == 'N' && buf[4] == 'E' &&
   buf[5] == 'C' && buf[6] == 'T') {
 // kRequest is not precise. Here only means the message is sent by client.
 return kRequest;
 }
 if (buf[0] == 'S' && buf[1] == 'U' && buf[2] == 'B') {
 return kRequest;
 }
 if (buf[0] == 'U' && buf[1] == 'N' && buf[2] == 'S' && buf[3] == 'U' && buf[4] == 'B') {
 return kRequest;
 }
 if (buf[0] == 'P' && buf[1] == 'U' && buf[2] == 'B') {
 return kRequest;
 }
 if (buf[0] == 'I' && buf[1] == 'N' && buf[2] == 'F' && buf[3] == 'O') {
 // kResponse is not precise. Here only means the message is sent by server.
 return kResponse;
 }
 if (buf[0] == 'M' && buf[1] == 'S' && buf[2] == 'G') {
 return kResponse;
 }
 if (buf[0] == '+' && buf[1] == 'O' && buf[2] == 'K') {
 return kResponse;
 }
 if (buf[0] == '-' && buf[1] == 'E' && buf[2] == 'R' && buf[3] == 'R') {
 return kResponse;
 }
 // PING & PONG can be sent by both client and server. Don't use them.
 return kUnknown;
 }

 static __inline enum message_type_t analyze_protocol(char *buf, __u32 count, struct active_connection_t *conn_info) {
     struct protocol_message_t inferred_message;
     inferred_message.protocol = kProtocolUnknown;
     inferred_message.type = kUnknown;

     // The prepend_length_header controls whether a length header is prepended to the buffer
     // in user space.
     conn_info->prepend_length_header = false;

     // PROTOCOL_LIST: Requires update on new protocols.
     if ((inferred_message.type = infer_http_message(buf, count)) != kUnknown) {
         inferred_message.protocol = kProtocolHTTP;
     } else if ((inferred_message.type = infer_http2_message(buf, count)) != kUnknown) {
         inferred_message.protocol = kProtocolHTTP2;
     } else if ((inferred_message.type = infer_cql_message(buf, count)) != kUnknown) {
         inferred_message.protocol = kProtocolCQL;
     } else if ((inferred_message.type = infer_mongo_message(buf, count)) != kUnknown) {
         inferred_message.protocol = kProtocolMongo;
 //    } else if ((inferred_message.type = infer_pgsql_message(buf, count)) != kUnknown) {
 //        inferred_message.protocol = kProtocolPGSQL;
     } else if ((inferred_message.type = infer_mysql_message(buf, count, conn_info)) != kUnknown) {
         inferred_message.protocol = kProtocolMySQL;
 //    } else if ((inferred_message.type = infer_mux_message(buf, count)) != kUnknown) {
 //        inferred_message.protocol = kProtocolMux;
     } else if ((inferred_message.type = infer_kafka_message(buf, count, conn_info)) != kUnknown) {
         inferred_message.protocol = kProtocolKafka;
     } else if ((inferred_message.type = infer_dns_message(buf, count)) != kUnknown) {
         inferred_message.protocol = kProtocolDNS;
 //    } else if (is_redis_message(buf, count)) {
 //    // For Redis, the message type is left to be kUnknown.
 //    // The message types are then inferred via traffic direction and client/server role.
 //        inferred_message.protocol = kProtocolRedis;
 //    } else if ((inferred_message.type = infer_nats_message(buf, count)) != kUnknown) {
 //        inferred_message.protocol = kProtocolNATS;
     }

     conn_info->prev_count = count;
     if (count == 4) {
         conn_info->prev_buf[0] = buf[0];
         conn_info->prev_buf[1] = buf[1];
         conn_info->prev_buf[2] = buf[2];
         conn_info->prev_buf[3] = buf[3];
     }

     if (inferred_message.protocol != kProtocolUnknown) {
         conn_info->protocol = inferred_message.protocol;
     }

     return inferred_message.type;
 }