Simply amazing!
New times:
C++: 24 seconds
Python (original): 60.7 seconds
Python (1st improv.): 48.4 seconds
Python (2nd improv.): 45.5 seconds
C++ code:
Code:
#include <fstream>
#include <string>
#include <unordered_map>
#include <cstdlib>
int main()
{
std::ifstream infile("20110803.txt");
std::unordered_map<std::string, unsigned int> bandwidth;
std::string line, ip1, ip2;
size_t pos1, pos2;
unsigned int bytes;
while (std::getline(infile, line))
{
pos1 = line.find('\t');
pos2 = line.find('\t', pos1 + 1);
ip1 = line.substr(pos1 + 1, pos2 - pos1 - 1);
pos1 = line.find('\t', pos2 + 1);
pos2 = line.find('\t', pos1 + 1);
ip2 = line.substr(pos1 + 1, pos2 - pos1 - 1);
pos1 = line.find('\t', pos1 + 1);
pos1 = line.find('\t', pos1 + 1);
pos1 = line.find('\t', pos1 + 1);
pos2 = line.find('\t', pos1 + 1);
bytes = atoi(line.substr(pos1 + 1, pos2 - pos1 - 1).c_str());
bandwidth[ip1] += bytes;
bandwidth[ip2] += bytes;
}
infile.close();
return 0;
}
Python codes:
Code:
import time
t1 = time.time()
bandwidth = {}
fin = open('20110803.txt', 'r')
line = fin.readline()
while line != '':
row = line.split('\t')
bytes = int(row[6])
if row[1] in bandwidth:
bandwidth[row[1]] += bytes
else:
bandwidth[row[1]] = bytes
if row[3] in bandwidth:
bandwidth[row[3]] += bytes
else:
bandwidth[row[3]] = bytes
line = fin.readline()
fin.close()
dt = time.time() - t1
print(('%.1f' % dt) + ' seconds\n')
t1 = time.time()
bandwidth = {}
with open('20110803.txt', 'r') as fin:
for line in fin:
row = line.split('\t')
bytes = int(row[6])
if row[1] in bandwidth:
bandwidth[row[1]] += bytes
else:
bandwidth[row[1]] = bytes
if row[3] in bandwidth:
bandwidth[row[3]] += bytes
else:
bandwidth[row[3]] = bytes
fin.close()
dt = time.time() - t1
print(('%.1f' % dt) + ' seconds\n')
t1 = time.time()
bandwidth = {}
with open('20110803.txt', 'r') as fin:
for line in fin:
row = line.split('\t')
bytes = int(row[6])
try:
bandwidth[row[1]] += bytes
except KeyError:
bandwidth[row[1]] = bytes
try:
bandwidth[row[3]] += bytes
except KeyError:
bandwidth[row[3]] = bytes
fin.close()
dt = time.time() - t1
print(('%.1f' % dt) + ' seconds\n')
Example text:
Code:
25201 72.247.133.186 443 192.168.3.62 52314 SSL 1514 Continuation Data
25201 72.247.133.186 443 192.168.3.62 52314 SSL 1514 Continuation Data
25201 72.247.133.186 443 192.168.3.62 52314 SSL 1514 Continuation Data
25201 192.168.5.7 54164 192.168.3.181 445 SMB 124 Write AndX Request, FID: 0xc010, 2 bytes at offset 1425610
25201 192.168.3.7 445 192.168.4.7 49495 SMB 105 Write AndX Response, 2 bytes
25201 72.247.133.186 443 192.168.3.62 52314 SSL 1514 Continuation Data
25201 72.247.133.186 443 192.168.3.62 52314 SSL 507 Continuation Data
25201 192.168.3.7 445 192.168.4.7 49495 SMB 105 Write AndX Response, 86 bytes
25201 192.168.62.2 65200 192.168.3.124 4089 DCERPC 106 Response: call_id: 3153182 Fragment: Single ctx_id: 1
25201 192.168.3.7 445 192.168.4.7 49495 SMB 105 Write AndX Response, 40 bytes
25202 192.168.9.7 2322 192.168.3.7 445 SMB 124 Write AndX Request, FID: 0x000f, 2 bytes at offset 906509