| 1 | # lognestmonster Copyright (c) 2019 Joshua 'joshuas3' Stockin |
| 2 | # <https://github.com/JoshuaS3/lognestmonster/>. |
| 3 |
|
| 4 |
|
| 5 | # This file is part of lognestmonster. |
| 6 |
|
| 7 | # lognestmonster is free software: you can redistribute it and/or modify |
| 8 | # it under the terms of the GNU General Public License as published by |
| 9 | # the Free Software Foundation, either version 3 of the License, or |
| 10 | # (at your option) any later version. |
| 11 |
|
| 12 | # lognestmonster is distributed in the hope that it will be useful, |
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | # GNU General Public License for more details. |
| 16 |
|
| 17 | # You should have received a copy of the GNU General Public License |
| 18 | # along with lognestmonster. If not, see <https://www.gnu.org/licenses/>. |
| 19 |
|
| 20 | import struct |
| 21 | import os |
| 22 | import resource |
| 23 | import time |
| 24 | import sys |
| 25 |
|
| 26 | STATEMENT_START = 0 |
| 27 | STATEMENT_END = 1 |
| 28 | EVENT_START = 2 |
| 29 | EVENT_END = 3 |
| 30 |
|
| 31 | VERBOSITY_LEVELS = { |
| 32 | 0: "INIT", |
| 33 | 1: "DEBUG", |
| 34 | 2: "VERBOSE", |
| 35 | 3: "VERYVERBOSE", |
| 36 | 4: "WARNING", |
| 37 | 5: "ERROR" |
| 38 | } |
| 39 |
|
| 40 | def ulonglong(bytestr): |
| 41 | return struct.unpack("@Q", bytestr)[0] |
| 42 | def uchar(charv): |
| 43 | return struct.unpack("@B", charv)[0] |
| 44 | def ushort(shortv): |
| 45 | return struct.unpack("@H", shortv)[0] |
| 46 |
|
| 47 |
|
| 48 | class EventProto: |
| 49 | parent = None |
| 50 | pushed = [False] |
| 51 | def __init__(self): |
| 52 | self.parent = None |
| 53 | self.pushed = [False] |
| 54 |
|
| 55 | class Reader: |
| 56 | fd = None |
| 57 |
|
| 58 | version = 0 |
| 59 | timestamp = 0 |
| 60 |
|
| 61 | top_level = [] |
| 62 |
|
| 63 | event_count = 0 |
| 64 | statement_count = 0 |
| 65 |
|
| 66 | file_size = 0 |
| 67 | position = 0 |
| 68 | bad_bytes = 0 |
| 69 |
|
| 70 | filter_time_start = -1 |
| 71 | filter_time_end = -1 |
| 72 | filter_verbosity = -1 |
| 73 | filter_tag = -1 |
| 74 |
|
| 75 | def __init__(self, fd): |
| 76 | self.fd = fd |
| 77 |
|
| 78 | self.version = 0 |
| 79 | self.timestamp = 0 |
| 80 |
|
| 81 | self.top_level = [] |
| 82 |
|
| 83 | self.event_count = 0 |
| 84 | self.statement_count = 0 |
| 85 |
|
| 86 | self.file_size = 0 |
| 87 | self.position = 0 |
| 88 | self.bad_bytes = 0 |
| 89 |
|
| 90 | self.filter_time_start = -1 |
| 91 | self.filter_time_end = -1 |
| 92 | self.filter_verbosity = -1 |
| 93 | self.filter_tag = -1 |
| 94 |
|
| 95 | def size(self): |
| 96 | self.fd.seek(0, os.SEEK_END) # go to end of file and get position |
| 97 | newsize = self.fd.tell() |
| 98 | self.fd.seek(self.position) # return to previous position |
| 99 |
|
| 100 | is_diff = self.file_size is not newsize |
| 101 | self.file_size = newsize |
| 102 | return is_diff |
| 103 |
|
| 104 | def pos(self): |
| 105 | p = self.fd.tell() |
| 106 | self.position = p |
| 107 | return p |
| 108 |
|
| 109 | def seek(self, position): |
| 110 | self.position = position |
| 111 | self.fd.seek(self.position) |
| 112 |
|
| 113 | def read(self, byte_count): |
| 114 | data = self.fd.read(byte_count) |
| 115 | if len(data) == byte_count: |
| 116 | return data |
| 117 | else: |
| 118 | return False |
| 119 |
|
| 120 | def scan(self): # scan for events and statements from self.position to the end of file |
| 121 | print() |
| 122 | print("beginning file scan") |
| 123 | if self.filter_time_start is not -1: |
| 124 | print("filter time start: {0}".format(self.filter_time_start)) |
| 125 | if self.filter_time_end is not -1: |
| 126 | print("filter time end: {0}".format(self.filter_time_end)) |
| 127 | if self.filter_verbosity is not -1: |
| 128 | print("filter verbosity: {0}".format(self.filter_verbosity)) |
| 129 | if self.filter_tag is not -1: |
| 130 | print("filter tag: {0}".format(self.filter_tag)) |
| 131 | print("file size: {0}".format(self.file_size)) |
| 132 | print() |
| 133 |
|
| 134 | s = time.time() |
| 135 |
|
| 136 | if self.position == 0: # if it's the start of the file, grab version and timestamp |
| 137 | self.version = uchar(self.read(1)) |
| 138 | self.timestamp = ulonglong(self.read(8)) |
| 139 |
|
| 140 | current_statement = None |
| 141 | current_event = None |
| 142 |
|
| 143 | if self.pos() < self.file_size: # if the seeker is before EOF |
| 144 | while self.pos() < self.file_size: # while the seeker is before EOF |
| 145 | in_byte = uchar(self.read(1)) # read 1 byte |
| 146 |
|
| 147 | if in_byte == STATEMENT_START: # the byte indicates a statement's start, begin interpreting |
| 148 | this_position = self.position + 1 # identify and save the seeker position of this statement |
| 149 |
|
| 150 | try: |
| 151 | timestamp = ulonglong(self.read(8)) |
| 152 | |
| 153 | append = True |
| 154 |
|
| 155 | if self.filter_time_start is not -1 and append: |
| 156 | append = timestamp > self.filter_time_start |
| 157 |
|
| 158 | if self.filter_time_end is not -1 and append: |
| 159 | append = timestamp < self.filter_time_end |
| 160 |
|
| 161 | if self.filter_verbosity is not -1 and append: |
| 162 | verbosity = uchar(self.read(1)) |
| 163 | append = verbosity in self.filter_verbosity |
| 164 | else: |
| 165 | self.seek(this_position + 9) |
| 166 |
|
| 167 | tag_size = uchar(self.read(1)) |
| 168 | if self.filter_tag is not -1 and append: |
| 169 | if tag_size > 0: |
| 170 | tag = self.read(tag_size).decode("utf-8") |
| 171 | append = tag == self.filter_tag |
| 172 | else: |
| 173 | self.seek(this_position + 10 + tag_size) |
| 174 |
|
| 175 | message_size = ushort(self.read(2)) |
| 176 | self.seek(this_position + 12 + tag_size + message_size) # ignore the message |
| 177 | |
| 178 | while uchar(self.read(1)) is not STATEMENT_END and self.pos() < self.file_size: # we can afford self.pos() here because most files aren't corrupt |
| 179 | self.bad_bytes += 1 |
| 180 |
|
| 181 | if append == True: |
| 182 | self.statement_count += 1 |
| 183 | if current_event is not None: |
| 184 | current_event.pushed.append(this_position) |
| 185 | else: |
| 186 | self.top_level.append(this_position) |
| 187 | except Exception as e: |
| 188 | self.bad_bytes += self.pos() - this_position |
| 189 |
|
| 190 | elif in_byte == EVENT_START: # the byte indicates an event's start, create an event |
| 191 | new_event = EventProto() |
| 192 | if current_event is not None: # if an event exists, push the new event to it |
| 193 | new_event.parent = current_event |
| 194 | current_event = new_event |
| 195 |
|
| 196 |
|
| 197 | elif in_byte == EVENT_END: # the byte indicates an event's end, close event if exists |
| 198 | if current_event is not None: |
| 199 | if len(current_event.pushed) > 1: |
| 200 | self.event_count += 1 |
| 201 | if current_event.parent is not None: |
| 202 | current_event.parent.pushed.append(current_event) |
| 203 | current_event = current_event.parent # if the event has a parent, set the parent to current |
| 204 | else: |
| 205 | self.top_level.append(current_event) # event has no parent, it's a top-level log item |
| 206 | current_event = None |
| 207 | else: |
| 208 | if current_event.parent is not None: |
| 209 | current_event = current_event.parent # if the event has a parent, set the parent to current |
| 210 | else: |
| 211 | current_event = None |
| 212 | else: # event doesn't exist, this end byte is out of place |
| 213 | self.bad_bytes += 1 |
| 214 |
|
| 215 | else: # unknown byte, not in statement or event |
| 216 | self.bad_bytes += 1 |
| 217 |
|
| 218 | print("finished reading, {0} bad bytes".format(self.bad_bytes)) |
| 219 | print() |
| 220 | print("tree version: {0}".format(self.version)) |
| 221 | print("tree timestamp: {0}".format(self.timestamp)) |
| 222 | print() |
| 223 | print("event count: {0}".format(self.event_count)) |
| 224 | print("statement count: {0}".format(self.statement_count)) |
| 225 | print() |
| 226 | print("time elapsed: {0}".format(time.time() - s)) |
| 227 |
|
| 228 |
|