2018-02-15 21:49:26 +08:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
|
|
import collections
|
|
|
|
import dpkt
|
|
|
|
import json
|
|
|
|
import operator
|
2018-08-23 18:00:48 +08:00
|
|
|
import os
|
2018-11-27 20:20:35 +08:00
|
|
|
import re
|
2018-02-15 21:49:26 +08:00
|
|
|
import socket
|
2018-08-23 18:00:48 +08:00
|
|
|
import subprocess
|
2018-02-15 21:49:26 +08:00
|
|
|
import struct
|
2018-10-31 19:54:10 +08:00
|
|
|
import sys
|
2018-02-15 21:49:26 +08:00
|
|
|
import tabulate
|
|
|
|
|
2018-08-23 18:00:48 +08:00
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
PROJECT_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, ".."))
|
|
|
|
|
2018-11-27 20:20:35 +08:00
|
|
|
TYPE_INFO_PATTERN = re.compile(r"""
|
|
|
|
const\s+utils::TypeInfo\s+
|
|
|
|
(\S+)\s* # <class-name>::kType
|
|
|
|
{\s*
|
|
|
|
(\S+)ULL\s*,\s* # <id-hex>
|
|
|
|
"(\S+)"\s* # "<class-name>"
|
|
|
|
}
|
|
|
|
""", re.VERBOSE)
|
2018-08-23 18:00:48 +08:00
|
|
|
|
2018-02-15 21:49:26 +08:00
|
|
|
# helpers
|
|
|
|
|
|
|
|
def format_endpoint(addr, port):
|
|
|
|
return "{}:{}".format(socket.inet_ntoa(addr), port)
|
|
|
|
|
|
|
|
|
2018-11-27 20:20:35 +08:00
|
|
|
def parse_source_file(fname):
|
2018-08-23 18:00:48 +08:00
|
|
|
ret = {}
|
|
|
|
with open(fname) as f:
|
2018-11-27 20:20:35 +08:00
|
|
|
for match in TYPE_INFO_PATTERN.finditer(f.read()):
|
|
|
|
id_hex = int(match.groups()[1], 16)
|
|
|
|
class_name = match.groups()[2]
|
|
|
|
ret[id_hex] = class_name
|
2018-08-23 18:00:48 +08:00
|
|
|
return ret
|
2018-02-15 21:49:26 +08:00
|
|
|
|
|
|
|
|
2018-11-27 20:20:35 +08:00
|
|
|
def parse_all_source_files(dirname):
|
2018-08-23 18:00:48 +08:00
|
|
|
ids = {}
|
2018-11-27 20:20:35 +08:00
|
|
|
ret = subprocess.run(["find", dirname, "-name", "*.lcp.cpp"],
|
2018-08-23 18:00:48 +08:00
|
|
|
stdout=subprocess.PIPE)
|
|
|
|
ret.check_returncode()
|
|
|
|
headers = list(filter(None, ret.stdout.decode("utf-8").split("\n")))
|
|
|
|
for header in headers:
|
2018-11-27 20:20:35 +08:00
|
|
|
ids.update(parse_source_file(header))
|
2018-08-23 18:00:48 +08:00
|
|
|
return ids
|
|
|
|
|
|
|
|
|
2018-11-27 20:20:35 +08:00
|
|
|
MESSAGES = parse_all_source_files(os.path.join(PROJECT_DIR, "src"))
|
2018-02-15 21:49:26 +08:00
|
|
|
|
|
|
|
|
2018-08-23 18:00:48 +08:00
|
|
|
class Connection:
|
|
|
|
# uint32_t message_size
|
|
|
|
SIZE_FORMAT = "I"
|
|
|
|
SIZE_LEN = struct.calcsize(SIZE_FORMAT)
|
|
|
|
|
|
|
|
def __init__(self):
|
2018-10-31 19:54:10 +08:00
|
|
|
self._previous = bytes()
|
2018-02-15 21:49:26 +08:00
|
|
|
self._data = bytes()
|
|
|
|
self._message = bytes()
|
|
|
|
self._ts = []
|
|
|
|
|
|
|
|
self._last = None
|
2018-08-23 18:00:48 +08:00
|
|
|
self._stats = collections.defaultdict(lambda: {"duration": [],
|
|
|
|
"size": []})
|
|
|
|
self._requests = []
|
2018-02-15 21:49:26 +08:00
|
|
|
|
|
|
|
def _extract_message(self):
|
2018-08-23 18:00:48 +08:00
|
|
|
if len(self._data) < self.SIZE_LEN:
|
2018-02-15 21:49:26 +08:00
|
|
|
return False
|
2018-08-23 18:00:48 +08:00
|
|
|
msg_len = struct.unpack_from(self.SIZE_FORMAT, self._data)[0]
|
|
|
|
if len(self._data) < self.SIZE_LEN + msg_len:
|
2018-02-15 21:49:26 +08:00
|
|
|
return False
|
2018-08-23 18:00:48 +08:00
|
|
|
self._message = self._data[self.SIZE_LEN:]
|
2018-10-31 19:54:10 +08:00
|
|
|
self._previous = self._data
|
2018-02-15 21:49:26 +08:00
|
|
|
self._data = bytes()
|
|
|
|
return True
|
|
|
|
|
2018-08-23 18:00:48 +08:00
|
|
|
def add_data(self, data, direction, ts):
|
2018-10-31 19:54:10 +08:00
|
|
|
if self._previous[-len(data):] == data \
|
|
|
|
or self._data[-len(data):] == data:
|
|
|
|
print("Retransmission detected!", file=sys.stderr)
|
|
|
|
return
|
|
|
|
|
2018-02-15 21:49:26 +08:00
|
|
|
self._data += data
|
|
|
|
self._ts.append(ts)
|
|
|
|
|
|
|
|
if not self._extract_message():
|
|
|
|
return
|
|
|
|
|
2018-10-31 19:54:10 +08:00
|
|
|
found = False
|
|
|
|
for i in range(2, 6):
|
|
|
|
if len(self._message) < (i + 1) * 8:
|
|
|
|
continue
|
|
|
|
message_id = struct.unpack("<Q",
|
|
|
|
self._message[i * 8:(i + 1) * 8])[0]
|
|
|
|
if message_id in MESSAGES:
|
|
|
|
found = True
|
|
|
|
break
|
|
|
|
|
|
|
|
if not found:
|
|
|
|
print("Got a message that I can't identify as any known "
|
|
|
|
"RPC request/response!", file=sys.stderr)
|
|
|
|
self._last = None
|
|
|
|
self._ts = []
|
|
|
|
return
|
|
|
|
|
2018-08-23 18:00:48 +08:00
|
|
|
message_type = MESSAGES[message_id]
|
|
|
|
|
|
|
|
if direction == "to":
|
|
|
|
self._requests.append((self._ts[-1], direction, message_type,
|
|
|
|
len(self._message)))
|
|
|
|
else:
|
|
|
|
self._requests.append((self._ts[0], direction, message_type,
|
|
|
|
len(self._message)))
|
2018-02-15 21:49:26 +08:00
|
|
|
|
|
|
|
if self._last is None:
|
2018-10-31 19:54:10 +08:00
|
|
|
self._last = (message_type, self._ts[0], len(self._message))
|
2018-02-15 21:49:26 +08:00
|
|
|
else:
|
2018-10-31 19:54:10 +08:00
|
|
|
req_type, req_ts, req_size = self._last
|
2018-02-15 21:49:26 +08:00
|
|
|
duration = self._ts[-1] - req_ts
|
|
|
|
self._stats[(req_type, message_type)]["duration"].append(duration)
|
|
|
|
self._stats[(req_type, message_type)]["size"].append(
|
2018-10-31 19:54:10 +08:00
|
|
|
req_size + len(self._message))
|
2018-02-15 21:49:26 +08:00
|
|
|
self._last = None
|
|
|
|
|
|
|
|
self._ts = []
|
|
|
|
|
|
|
|
def get_stats(self):
|
|
|
|
return self._stats
|
|
|
|
|
2018-08-23 18:00:48 +08:00
|
|
|
def get_requests(self):
|
|
|
|
return self._requests
|
|
|
|
|
2018-02-15 21:49:26 +08:00
|
|
|
|
|
|
|
class Server:
|
|
|
|
def __init__(self):
|
|
|
|
self._conns = collections.defaultdict(lambda: Connection())
|
|
|
|
|
2018-08-23 18:00:48 +08:00
|
|
|
def add_data(self, addr, data, direction, ts):
|
|
|
|
self._conns[addr].add_data(data, direction, ts)
|
2018-02-15 21:49:26 +08:00
|
|
|
|
|
|
|
def print_stats(self, machine_names, title, sort_by):
|
|
|
|
stats = collections.defaultdict(lambda: collections.defaultdict(
|
|
|
|
lambda: {"duration": [], "size": []}))
|
|
|
|
|
|
|
|
for addr, conn in self._conns.items():
|
|
|
|
ip, port = addr.split(":")
|
|
|
|
for rpc, connstats in conn.get_stats().items():
|
|
|
|
stats[ip][rpc]["duration"] += connstats["duration"]
|
|
|
|
stats[ip][rpc]["size"] += connstats["size"]
|
|
|
|
|
|
|
|
table = []
|
|
|
|
headers = ["RPC ({})".format(title), "Client", "Count", "Tmin (ms)",
|
2018-08-23 18:00:48 +08:00
|
|
|
"Tavg (ms)", "Tmax (ms)", "Ttot (s)", "Smin (B)",
|
|
|
|
"Savg (B)", "Smax (B)", "Stot (kiB)"]
|
2018-02-15 21:49:26 +08:00
|
|
|
sort_keys = ["rpc", "client", "count", "tmin", "tavg", "tmax", "ttot",
|
2018-08-23 18:00:48 +08:00
|
|
|
"smin", "savg", "smax", "stot"]
|
2018-02-15 21:49:26 +08:00
|
|
|
for client in sorted(stats.keys()):
|
|
|
|
rpcs = stats[client]
|
|
|
|
for rpc, connstats in rpcs.items():
|
|
|
|
durs = connstats["duration"]
|
|
|
|
sizes = connstats["size"]
|
|
|
|
durs_sum = sum(durs)
|
|
|
|
sizes_sum = sum(sizes)
|
|
|
|
table.append(["{} / {}".format(*rpc), machine_names[client],
|
2018-08-23 18:00:48 +08:00
|
|
|
len(durs), min(durs) * 1000,
|
|
|
|
durs_sum / len(durs) * 1000,
|
|
|
|
max(durs) * 1000, durs_sum, min(sizes),
|
|
|
|
int(sizes_sum / len(sizes)), max(sizes),
|
|
|
|
sizes_sum / 1024])
|
2018-02-15 21:49:26 +08:00
|
|
|
for sort_field in sort_by.split(","):
|
|
|
|
reverse = True if sort_field.endswith("-") else False
|
|
|
|
table.sort(key=operator.itemgetter(sort_keys.index(
|
|
|
|
sort_field.rstrip("+-"))), reverse=reverse)
|
|
|
|
print(tabulate.tabulate(table, headers=headers, tablefmt="psql",
|
2018-08-23 18:00:48 +08:00
|
|
|
floatfmt=".3f"))
|
|
|
|
|
|
|
|
def get_requests(self, server_name, machine_names):
|
|
|
|
ret = []
|
|
|
|
for addr, conn in self._conns.items():
|
|
|
|
client_name = machine_names[addr.split(":")[0]]
|
|
|
|
for ts, direction, message, size in conn.get_requests():
|
|
|
|
if direction == "from":
|
|
|
|
name_from, name_to = server_name, client_name
|
|
|
|
else:
|
|
|
|
name_from, name_to = client_name, server_name
|
|
|
|
ret.append((ts, name_from, name_to, message, size))
|
|
|
|
return ret
|
2018-02-15 21:49:26 +08:00
|
|
|
|
|
|
|
|
|
|
|
# process logic
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description="Generate RPC statistics from "
|
2018-08-23 18:00:48 +08:00
|
|
|
"network traffic capture.")
|
2018-02-15 21:49:26 +08:00
|
|
|
parser.add_argument("--sort-by", default="tavg+,count-,client+",
|
2018-08-23 18:00:48 +08:00
|
|
|
help="comma separated list of fields which should be used "
|
|
|
|
"to sort the data; each field can be suffixed with + or - "
|
|
|
|
"to indicate ascending or descending order; available "
|
|
|
|
"fields: rpc, client, count, min, avg, max, total")
|
|
|
|
parser.add_argument("--no-aggregate", action="store_true",
|
|
|
|
help="don't aggregate the results, instead display the "
|
|
|
|
"individual RPC calls as they occurred")
|
2018-02-15 21:49:26 +08:00
|
|
|
parser.add_argument("capfile", help="network traffic capture file")
|
|
|
|
parser.add_argument("conffile", help="cluster config file")
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
config = json.load(open(args.conffile))
|
|
|
|
last_worker = 0
|
|
|
|
machine_names = {}
|
|
|
|
server_addresses = []
|
|
|
|
for machine in config["workload_machines"]:
|
|
|
|
name = machine["type"]
|
|
|
|
if name == "worker":
|
|
|
|
last_worker += 1
|
|
|
|
name += str(last_worker)
|
|
|
|
machine_names["{address}".format(**machine)] = name
|
|
|
|
server_addresses.append("{address}:{port}".format(**machine))
|
|
|
|
|
|
|
|
servers = collections.defaultdict(Server)
|
|
|
|
|
|
|
|
for ts, pkt in dpkt.pcap.Reader(open(args.capfile, "rb")):
|
|
|
|
eth = dpkt.ethernet.Ethernet(pkt)
|
|
|
|
if eth.type != dpkt.ethernet.ETH_TYPE_IP:
|
|
|
|
continue
|
|
|
|
|
|
|
|
ip = eth.data
|
|
|
|
if ip.p != dpkt.ip.IP_PROTO_TCP:
|
|
|
|
continue
|
|
|
|
|
|
|
|
tcp = ip.data
|
|
|
|
src = format_endpoint(ip.src, tcp.sport)
|
|
|
|
dst = format_endpoint(ip.dst, tcp.dport)
|
|
|
|
if src not in server_addresses and dst not in server_addresses:
|
|
|
|
continue
|
|
|
|
if len(tcp.data) == 0:
|
|
|
|
continue
|
|
|
|
|
|
|
|
server = dst if dst in server_addresses else src
|
|
|
|
client = dst if dst not in server_addresses else src
|
2018-08-23 18:00:48 +08:00
|
|
|
direction = "to" if dst in server_addresses else "from"
|
2018-02-15 21:49:26 +08:00
|
|
|
|
2018-08-23 18:00:48 +08:00
|
|
|
servers[server].add_data(client, tcp.data, direction, ts)
|
2018-02-15 21:49:26 +08:00
|
|
|
|
2018-08-23 18:00:48 +08:00
|
|
|
requests = []
|
2018-02-15 21:49:26 +08:00
|
|
|
for server in sorted(servers.keys()):
|
2018-08-23 18:00:48 +08:00
|
|
|
server_name = machine_names[server.split(":")[0]]
|
|
|
|
if args.no_aggregate:
|
|
|
|
requests.extend(servers[server].get_requests(server_name,
|
|
|
|
machine_names))
|
|
|
|
else:
|
|
|
|
servers[server].print_stats(machine_names=machine_names,
|
|
|
|
title=server_name,
|
|
|
|
sort_by=args.sort_by)
|
|
|
|
|
|
|
|
if args.no_aggregate:
|
|
|
|
requests.sort()
|
|
|
|
headers = ["timestamp", "from", "to", "request", "size"]
|
|
|
|
print(tabulate.tabulate(requests, headers=headers, tablefmt="psql",
|
|
|
|
floatfmt=".6f"))
|