1 | # lognestmonster Copyright (c) 2019 Joshua 'joshuas3' Stockin |
2 | # <https://github.com/JoshuaS3/lognestmonster/>. |
3 |
|
4 |
|
5 | # This file is part of lognestmonster. |
6 |
|
7 | # lognestmonster is free software: you can redistribute it and/or modify |
8 | # it under the terms of the GNU General Public License as published by |
9 | # the Free Software Foundation, either version 3 of the License, or |
10 | # (at your option) any later version. |
11 |
|
12 | # lognestmonster is distributed in the hope that it will be useful, |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | # GNU General Public License for more details. |
16 |
|
17 | # You should have received a copy of the GNU General Public License |
18 | # along with lognestmonster. If not, see <https://www.gnu.org/licenses/>. |
19 |
|
20 | import struct |
21 | import os |
22 |
|
23 | STATEMENT_START = 0 |
24 | STATEMENT_END = 1 |
25 | EVENT_START = 2 |
26 | EVENT_END = 3 |
27 |
|
28 | VERBOSITY_LEVELS = { |
29 | 0: "INIT", |
30 | 1: "DEBUG", |
31 | 2: "VERBOSE", |
32 | 3: "VERYVERBOSE", |
33 | 4: "WARNING", |
34 | 5: "ERROR" |
35 | } |
36 |
|
37 | def ulonglong(bytestr): |
38 | return struct.unpack("@Q", bytestr)[0] |
39 | def uchar(charv): |
40 | return struct.unpack("@B", charv)[0] |
41 | def ushort(shortv): |
42 | return struct.unpack("@H", shortv)[0] |
43 |
|
44 |
|
45 | class EventProto: |
46 | parent = None |
47 | pushed = [False] |
48 | def __init__(self): |
49 | self.parent = None |
50 | self.pushed = [False] |
51 |
|
52 | class Reader: |
53 | fd = None |
54 |
|
55 | top_level = [] |
56 |
|
57 | current_event = None |
58 |
|
59 | event_count = 0 |
60 | statement_count = 0 |
61 |
|
62 | total_events = 0 |
63 | total_statements = 0 |
64 |
|
65 | seekable = True |
66 | file_size = 0 |
67 | position = 0 |
68 |
|
69 | version = 0 |
70 | timestamp = 0 |
71 |
|
72 | bad_bytes = 0 |
73 |
|
74 | filter_time_start = -1 |
75 | filter_time_end = -1 |
76 | filter_verbosity = -1 |
77 | filter_tag = -1 |
78 |
|
79 | update_callbacks = [] |
80 |
|
81 | def __init__(self, fd, seekable=True): |
82 | self.fd = fd |
83 |
|
84 | self.top_level = [] |
85 |
|
86 | self.current_event = None |
87 |
|
88 | self.event_count = 0 |
89 | self.statement_count = 0 |
90 |
|
91 | self.total_events = 0 |
92 | self.total_statements = 0 |
93 |
|
94 | self.seekable = seekable |
95 | self.file_size = 0 |
96 | self.position = 0 |
97 |
|
98 | self.version = 0 |
99 | self.timestamp = 0 |
100 |
|
101 | self.bad_bytes = 0 |
102 |
|
103 | self.filters = False |
104 |
|
105 | self.filter_time_start = -1 |
106 | self.filter_time_end = -1 |
107 | self.filter_verbosity = -1 |
108 | self.filter_tag = -1 |
109 |
|
110 | self.update_callbacks = [] |
111 |
|
112 | def update(self): |
113 | for callback in self.update_callbacks: |
114 | callback() |
115 |
|
116 | def onupdate(self, callback): |
117 | self.update_callbacks.append(callback) |
118 |
|
119 | def size(self): |
120 | self.fd.seek(0, os.SEEK_END) # go to end of file and get position |
121 | newsize = self.fd.tell() |
122 | self.fd.seek(self.position) # return to previous position |
123 |
|
124 | is_diff = self.file_size is not newsize |
125 | self.file_size = newsize |
126 | return is_diff |
127 |
|
128 | def pos(self): |
129 | p = self.fd.tell() |
130 | self.position = p |
131 | return p |
132 |
|
133 | def seek(self, position): |
134 | self.position = position |
135 | self.fd.seek(self.position) |
136 |
|
137 | def read(self, byte_count): |
138 | return self.fd.read(byte_count) |
139 |
|
140 | def fetch_item(self, position): |
141 | previouspos = self.pos() |
142 | self.seek(position) |
143 | timestamp = ulonglong(self.read(8)) |
144 | verbosity = uchar(self.read(1)) |
145 | tag_size = uchar(self.read(1)) |
146 | tag = self.read(tag_size).decode("utf-8") |
147 | message_size = ushort(self.read(2)) |
148 | message = self.read(message_size).decode("utf-8") |
149 | self.seek(previouspos) |
150 | return (timestamp, verbosity, tag, message) |
151 |
|
152 | def parse_block(self, in_byte): |
153 | seekable = self.seekable |
154 | if in_byte == STATEMENT_START: # the byte indicates a statement's start, begin interpreting |
155 | if seekable: |
156 | this_position = self.pos() # identify and save the seeker position of this statement |
157 |
|
158 | try: |
159 | block = self.read(10) |
160 | timestamp = ulonglong(block[:8]) # bytes 1-8 |
161 | verbosity = block[8] # byte 9 |
162 | tag = self.read(block[9]) # byte 10 |
163 |
|
164 | append = True |
165 |
|
166 | if self.filters: |
167 | if self.filter_time_start is not -1 and append: |
168 | append = timestamp > self.filter_time_start |
169 |
|
170 | if self.filter_time_end is not -1 and append: |
171 | append = timestamp < self.filter_time_end |
172 |
|
173 | if self.filter_verbosity is not -1 and append: |
174 | append = verbosity in self.filter_verbosity |
175 |
|
176 | if self.filter_tag is not -1 and append: |
177 | tag = tag.decode("utf-8") |
178 | append = tag == self.filter_tag |
179 |
|
180 | message_size = ushort(self.read(2)) |
181 | if seekable: |
182 | self.read(message_size) |
183 | while self.read(1)[0] is not STATEMENT_END and self.pos() < self.file_size: |
184 | self.bad_bytes += 1 |
185 | else: |
186 | message = self.read(message_size).decode("utf-8") |
187 | while self.read(1)[0] is not STATEMENT_END: |
188 | self.bad_bytes += 1 |
189 |
|
190 | if append == True: |
191 | self.statement_count += 1 |
192 | if self.current_event is not None: |
193 | if seekable: |
194 | self.current_event.pushed.append(this_position) |
195 | else: |
196 | self.current_event.pushed.append((timestamp, verbosity, tag, message)) |
197 | else: |
198 | if seekable: |
199 | self.top_level.append(this_position) |
200 | else: |
201 | self.top_level.append((timestamp, verbosity, tag, message)) |
202 | self.total_statements += 1 |
203 | except: |
204 | return -1 |
205 |
|
206 | elif in_byte == EVENT_START: # the byte indicates an event's start, create an event |
207 | new_event = EventProto() |
208 | if self.current_event is not None: # we're already inside an event, set the new event's parent to match |
209 | new_event.parent = self.current_event |
210 | self.current_event = new_event |
211 |
|
212 |
|
213 | elif in_byte == EVENT_END: # end of event |
214 | if self.current_event is not None: |
215 | if len(self.current_event.pushed) > 1: |
216 | self.event_count += 1 |
217 | if self.current_event.parent is not None: |
218 | self.current_event.parent.pushed.append(self.current_event) |
219 | self.current_event = self.current_event.parent |
220 | else: |
221 | self.top_level.append(self.current_event) |
222 | self.current_event = None |
223 | else: # event is empty |
224 | if self.current_event.parent is not None: |
225 | self.current_event = self.current_event.parent |
226 | else: |
227 | self.current_event = None |
228 | self.total_events += 1 |
229 | else: |
230 | self.bad_bytes += 1 # event doesn't exist, bad byte |
231 | return -1 |
232 |
|
233 | else: # unknown byte |
234 | self.bad_bytes += 1 |
235 | return -1 |
236 |
|
237 | def scan(self): # scan for events and statements from self.position to the end of file |
238 | if not self.seekable: raise Exception("attempting to scan non-seekable stream") |
239 | if self.pos() == 0: # if it's the start of the file, grab version and timestamp |
240 | self.version = self.read(1)[0] |
241 | self.timestamp = ulonglong(self.read(8)) |
242 |
|
243 | while self.pos() < self.file_size: # while the seeker is before EOF |
244 | in_byte = self.read(1)[0] # read 1 byte |
245 | self.parse_block(in_byte) # parse block based on byte read |
246 | self.update() # trigger callbacks |
247 |
|
248 |
|