1 | # lognestmonster Copyright (c) 2019 Joshua 'joshuas3' Stockin |
2 | # <https://github.com/JoshuaS3/lognestmonster/>. |
3 |
|
4 |
|
5 | # This file is part of lognestmonster. |
6 |
|
7 | # lognestmonster is free software: you can redistribute it and/or modify |
8 | # it under the terms of the GNU General Public License as published by |
9 | # the Free Software Foundation, either version 3 of the License, or |
10 | # (at your option) any later version. |
11 |
|
12 | # lognestmonster is distributed in the hope that it will be useful, |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | # GNU General Public License for more details. |
16 |
|
17 | # You should have received a copy of the GNU General Public License |
18 | # along with lognestmonster. If not, see <https://www.gnu.org/licenses/>. |
19 |
|
20 | import struct |
21 | import os |
22 | import resource |
23 | import time |
24 | import sys |
25 |
|
26 | STATEMENT_START = 0 |
27 | STATEMENT_END = 1 |
28 | EVENT_START = 2 |
29 | EVENT_END = 3 |
30 |
|
31 | VERBOSITY_LEVELS = { |
32 | 0: "INIT", |
33 | 1: "DEBUG", |
34 | 2: "VERBOSE", |
35 | 3: "VERYVERBOSE", |
36 | 4: "WARNING", |
37 | 5: "ERROR" |
38 | } |
39 |
|
40 | def ulonglong(bytestr): |
41 | return struct.unpack("@Q", bytestr)[0] |
42 | def uchar(charv): |
43 | return struct.unpack("@B", charv)[0] |
44 | def ushort(shortv): |
45 | return struct.unpack("@H", shortv)[0] |
46 |
|
47 |
|
48 | class EventProto: |
49 | parent = None |
50 | pushed = [False] |
51 | def __init__(self): |
52 | self.parent = None |
53 | self.pushed = [False] |
54 |
|
55 | class Reader: |
56 | fd = None |
57 |
|
58 | version = 0 |
59 | timestamp = 0 |
60 |
|
61 | top_level = [] |
62 |
|
63 | event_count = 0 |
64 | statement_count = 0 |
65 |
|
66 | file_size = 0 |
67 | position = 0 |
68 | bad_bytes = 0 |
69 |
|
70 | filter_time_start = -1 |
71 | filter_time_end = -1 |
72 | filter_verbosity = -1 |
73 | filter_tag = -1 |
74 |
|
75 | def __init__(self, fd): |
76 | self.fd = fd |
77 |
|
78 | self.version = 0 |
79 | self.timestamp = 0 |
80 |
|
81 | self.top_level = [] |
82 |
|
83 | self.event_count = 0 |
84 | self.statement_count = 0 |
85 |
|
86 | self.file_size = 0 |
87 | self.position = 0 |
88 | self.bad_bytes = 0 |
89 |
|
90 | self.filter_time_start = -1 |
91 | self.filter_time_end = -1 |
92 | self.filter_verbosity = -1 |
93 | self.filter_tag = -1 |
94 |
|
95 | def size(self): |
96 | self.fd.seek(0, os.SEEK_END) # go to end of file and get position |
97 | newsize = self.fd.tell() |
98 | self.fd.seek(self.position) # return to previous position |
99 |
|
100 | is_diff = self.file_size is not newsize |
101 | self.file_size = newsize |
102 | return is_diff |
103 |
|
104 | def pos(self): |
105 | p = self.fd.tell() |
106 | self.position = p |
107 | return p |
108 |
|
109 | def seek(self, position): |
110 | self.position = position |
111 | self.fd.seek(self.position) |
112 |
|
113 | def read(self, byte_count): |
114 | data = self.fd.read(byte_count) |
115 | if len(data) == byte_count: |
116 | return data |
117 | else: |
118 | return False |
119 |
|
120 | def scan(self): # scan for events and statements from self.position to the end of file |
121 | print() |
122 | print("beginning file scan") |
123 | if self.filter_time_start is not -1: |
124 | print("filter time start: {0}".format(self.filter_time_start)) |
125 | if self.filter_time_end is not -1: |
126 | print("filter time end: {0}".format(self.filter_time_end)) |
127 | if self.filter_verbosity is not -1: |
128 | print("filter verbosity: {0}".format(self.filter_verbosity)) |
129 | if self.filter_tag is not -1: |
130 | print("filter tag: {0}".format(self.filter_tag)) |
131 | print("file size: {0}".format(self.file_size)) |
132 | print() |
133 |
|
134 | s = time.time() |
135 |
|
136 | if self.position == 0: # if it's the start of the file, grab version and timestamp |
137 | self.version = uchar(self.read(1)) |
138 | self.timestamp = ulonglong(self.read(8)) |
139 |
|
140 | current_statement = None |
141 | current_event = None |
142 |
|
143 | if self.pos() < self.file_size: # if the seeker is before EOF |
144 | while self.pos() < self.file_size: # while the seeker is before EOF |
145 | in_byte = uchar(self.read(1)) # read 1 byte |
146 |
|
147 | if in_byte == STATEMENT_START: # the byte indicates a statement's start, begin interpreting |
148 | this_position = self.position + 1 # identify and save the seeker position of this statement |
149 |
|
150 | try: |
151 | timestamp = ulonglong(self.read(8)) |
152 | |
153 | append = True |
154 |
|
155 | if self.filter_time_start is not -1 and append: |
156 | append = timestamp > self.filter_time_start |
157 |
|
158 | if self.filter_time_end is not -1 and append: |
159 | append = timestamp < self.filter_time_end |
160 |
|
161 | if self.filter_verbosity is not -1 and append: |
162 | verbosity = uchar(self.read(1)) |
163 | append = verbosity in self.filter_verbosity |
164 | else: |
165 | self.seek(this_position + 9) |
166 |
|
167 | tag_size = uchar(self.read(1)) |
168 | if self.filter_tag is not -1 and append: |
169 | if tag_size > 0: |
170 | tag = self.read(tag_size).decode("utf-8") |
171 | append = tag == self.filter_tag |
172 | else: |
173 | self.seek(this_position + 10 + tag_size) |
174 |
|
175 | message_size = ushort(self.read(2)) |
176 | self.seek(this_position + 12 + tag_size + message_size) # ignore the message |
177 | |
178 | while uchar(self.read(1)) is not STATEMENT_END and self.pos() < self.file_size: # we can afford self.pos() here because most files aren't corrupt |
179 | self.bad_bytes += 1 |
180 |
|
181 | if append == True: |
182 | self.statement_count += 1 |
183 | if current_event is not None: |
184 | current_event.pushed.append(this_position) |
185 | else: |
186 | self.top_level.append(this_position) |
187 | except Exception as e: |
188 | self.bad_bytes += self.pos() - this_position |
189 |
|
190 | elif in_byte == EVENT_START: # the byte indicates an event's start, create an event |
191 | new_event = EventProto() |
192 | if current_event is not None: # if an event exists, push the new event to it |
193 | new_event.parent = current_event |
194 | current_event = new_event |
195 |
|
196 |
|
197 | elif in_byte == EVENT_END: # the byte indicates an event's end, close event if exists |
198 | if current_event is not None: |
199 | if len(current_event.pushed) > 1: |
200 | self.event_count += 1 |
201 | if current_event.parent is not None: |
202 | current_event.parent.pushed.append(current_event) |
203 | current_event = current_event.parent # if the event has a parent, set the parent to current |
204 | else: |
205 | self.top_level.append(current_event) # event has no parent, it's a top-level log item |
206 | current_event = None |
207 | else: |
208 | if current_event.parent is not None: |
209 | current_event = current_event.parent # if the event has a parent, set the parent to current |
210 | else: |
211 | current_event = None |
212 | else: # event doesn't exist, this end byte is out of place |
213 | self.bad_bytes += 1 |
214 |
|
215 | else: # unknown byte, not in statement or event |
216 | self.bad_bytes += 1 |
217 |
|
218 | print("finished reading, {0} bad bytes".format(self.bad_bytes)) |
219 | print() |
220 | print("tree version: {0}".format(self.version)) |
221 | print("tree timestamp: {0}".format(self.timestamp)) |
222 | print() |
223 | print("event count: {0}".format(self.event_count)) |
224 | print("statement count: {0}".format(self.statement_count)) |
225 | print() |
226 | print("time elapsed: {0}".format(time.time() - s)) |
227 |
|
228 |
|