Index

lognestmonster / df63a69

A general-purpose single-header C logging library and parser for event-based logs. (Incomplete)

Latest Commit

{#}TimeHashSubjectAuthor#(+)(-)GPG?
9202 Sep 2019 13:20df63a69update file reading for speedJosh Stockin17650N

Blob @ lognestmonster / parser / read.py

application/x-python6521 bytesdownload raw
1# lognestmonster Copyright (c) 2019 Joshua 'joshuas3' Stockin
2# <https://github.com/JoshuaS3/lognestmonster/>.
3
4
5# This file is part of lognestmonster.
6
7# lognestmonster is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11
12# lognestmonster is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16
17# You should have received a copy of the GNU General Public License
18# along with lognestmonster. If not, see <https://www.gnu.org/licenses/>.
19
20import struct
21import os
22import resource
23import time
24import sys
25
26STATEMENT_START = 0
27STATEMENT_END = 1
28EVENT_START = 2
29EVENT_END = 3
30
31VERBOSITY_LEVELS = {
32 0: "INIT",
33 1: "DEBUG",
34 2: "VERBOSE",
35 3: "VERYVERBOSE",
36 4: "WARNING",
37 5: "ERROR"
38}
39
40def ulonglong(bytestr):
41 return struct.unpack("@Q", bytestr)[0]
42def uchar(charv):
43 return struct.unpack("@B", charv)[0]
44def ushort(shortv):
45 return struct.unpack("@H", shortv)[0]
46
47
48class EventProto:
49 parent = None
50 pushed = [False]
51 def __init__(self):
52 self.parent = None
53 self.pushed = [False]
54
55class Reader:
56 fd = None
57
58 version = 0
59 timestamp = 0
60
61 top_level = []
62
63 event_count = 0
64 statement_count = 0
65
66 file_size = 0
67 position = 0
68 bad_bytes = 0
69
70 filter_time_start = -1
71 filter_time_end = -1
72 filter_verbosity = -1
73 filter_tag = -1
74
75 def __init__(self, fd):
76 self.fd = fd
77
78 self.version = 0
79 self.timestamp = 0
80
81 self.top_level = []
82
83 self.event_count = 0
84 self.statement_count = 0
85
86 self.file_size = 0
87 self.position = 0
88 self.bad_bytes = 0
89
90 self.filter_time_start = -1
91 self.filter_time_end = -1
92 self.filter_verbosity = -1
93 self.filter_tag = -1
94
95 def size(self):
96 self.fd.seek(0, os.SEEK_END) # go to end of file and get position
97 newsize = self.fd.tell()
98 self.fd.seek(self.position) # return to previous position
99
100 is_diff = self.file_size is not newsize
101 self.file_size = newsize
102 return is_diff
103
104 def pos(self):
105 p = self.fd.tell()
106 self.position = p
107 return p
108
109 def seek(self, position):
110 self.position = position
111 self.fd.seek(self.position)
112
113 def read(self, byte_count):
114 data = self.fd.read(byte_count)
115 if len(data) == byte_count:
116 return data
117 else:
118 return False
119
120 def scan(self): # scan for events and statements from self.position to the end of file
121 print()
122 print("beginning file scan")
123 if self.filter_time_start is not -1:
124 print("filter time start: {0}".format(self.filter_time_start))
125 if self.filter_time_end is not -1:
126 print("filter time end: {0}".format(self.filter_time_end))
127 if self.filter_verbosity is not -1:
128 print("filter verbosity: {0}".format(self.filter_verbosity))
129 if self.filter_tag is not -1:
130 print("filter tag: {0}".format(self.filter_tag))
131 print("file size: {0}".format(self.file_size))
132 print()
133
134 s = time.time()
135
136 if self.position == 0: # if it's the start of the file, grab version and timestamp
137 self.version = uchar(self.read(1))
138 self.timestamp = ulonglong(self.read(8))
139
140 current_statement = None
141 current_event = None
142
143 if self.pos() < self.file_size: # if the seeker is before EOF
144 while self.pos() < self.file_size: # while the seeker is before EOF
145 in_byte = uchar(self.read(1)) # read 1 byte
146
147 if in_byte == STATEMENT_START: # the byte indicates a statement's start, begin interpreting
148 this_position = self.position + 1 # identify and save the seeker position of this statement
149
150 try:
151 timestamp = ulonglong(self.read(8))
152
153 append = True
154
155 if self.filter_time_start is not -1 and append:
156 append = timestamp > self.filter_time_start
157
158 if self.filter_time_end is not -1 and append:
159 append = timestamp < self.filter_time_end
160
161 if self.filter_verbosity is not -1 and append:
162 verbosity = uchar(self.read(1))
163 append = verbosity in self.filter_verbosity
164 else:
165 self.seek(this_position + 9)
166
167 tag_size = uchar(self.read(1))
168 if self.filter_tag is not -1 and append:
169 if tag_size > 0:
170 tag = self.read(tag_size).decode("utf-8")
171 append = tag == self.filter_tag
172 else:
173 self.seek(this_position + 10 + tag_size)
174
175 message_size = ushort(self.read(2))
176 self.seek(this_position + 12 + tag_size + message_size) # ignore the message
177
178 while uchar(self.read(1)) is not STATEMENT_END and self.pos() < self.file_size: # we can afford self.pos() here because most files aren't corrupt
179 self.bad_bytes += 1
180
181 if append == True:
182 self.statement_count += 1
183 if current_event is not None:
184 current_event.pushed.append(this_position)
185 else:
186 self.top_level.append(this_position)
187 except Exception as e:
188 self.bad_bytes += self.pos() - this_position
189
190 elif in_byte == EVENT_START: # the byte indicates an event's start, create an event
191 new_event = EventProto()
192 if current_event is not None: # if an event exists, push the new event to it
193 new_event.parent = current_event
194 current_event = new_event
195
196
197 elif in_byte == EVENT_END: # the byte indicates an event's end, close event if exists
198 if current_event is not None:
199 if len(current_event.pushed) > 1:
200 self.event_count += 1
201 if current_event.parent is not None:
202 current_event.parent.pushed.append(current_event)
203 current_event = current_event.parent # if the event has a parent, set the parent to current
204 else:
205 self.top_level.append(current_event) # event has no parent, it's a top-level log item
206 current_event = None
207 else:
208 if current_event.parent is not None:
209 current_event = current_event.parent # if the event has a parent, set the parent to current
210 else:
211 current_event = None
212 else: # event doesn't exist, this end byte is out of place
213 self.bad_bytes += 1
214
215 else: # unknown byte, not in statement or event
216 self.bad_bytes += 1
217
218 print("finished reading, {0} bad bytes".format(self.bad_bytes))
219 print()
220 print("tree version: {0}".format(self.version))
221 print("tree timestamp: {0}".format(self.timestamp))
222 print()
223 print("event count: {0}".format(self.event_count))
224 print("statement count: {0}".format(self.statement_count))
225 print()
226 print("time elapsed: {0}".format(time.time() - s))
227
228