1# this file contains a simple parser that parses report 2# from cuda-memcheck 3 4 5class ParseError(Exception): 6 """Whenever the simple parser is unable to parse the report, this exception will be raised""" 7 8 9class Report: 10 """A report is a container of errors, and a summary on how many errors are found""" 11 12 def __init__(self, text, errors): 13 # text is something like 14 # ERROR SUMMARY: 1 error 15 # or 16 # ERROR SUMMARY: 2 errors 17 self.text = text 18 self.num_errors = int(text.strip().split()[2]) 19 self.errors = errors 20 if len(errors) != self.num_errors: 21 if len(errors) == 10000 and self.num_errors > 10000: 22 # When there are more than 10k errors, cuda-memcheck only display 10k 23 self.num_errors = 10000 24 else: 25 raise ParseError("Number of errors does not match") 26 27 28class Error: 29 """Each error is a section in the output of cuda-memcheck. 30 Each error in the report has an error message and a backtrace. It looks like: 31 32 ========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaGetLastError. 33 ========= Saved host backtrace up to driver entry point at error 34 ========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3] 35 ========= Host Frame:/usr/local/cuda/lib64/libcudart.so.10.1 (cudaGetLastError + 0x163) [0x4c493] 36 ========= Host Frame:/home/xgao/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so [0x5b77a05] 37 ========= Host Frame:/home/xgao/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so [0x39d6d1d] 38 ========= ..... 39 """ 40 41 def __init__(self, lines): 42 self.message = lines[0] 43 lines = lines[2:] 44 self.stack = [l.strip() for l in lines] 45 46 47def parse(message): 48 """A simple parser that parses the report of cuda-memcheck. This parser is meant to be simple 49 and it only split the report into separate errors and a summary. Where each error is further 50 splitted into error message and backtrace. No further details are parsed. 51 52 A report contains multiple errors and a summary on how many errors are detected. It looks like: 53 54 ========= CUDA-MEMCHECK 55 ========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaPointerGetAttributes. 56 ========= Saved host backtrace up to driver entry point at error 57 ========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3] 58 ========= Host Frame:/usr/local/cuda/lib64/libcudart.so.10.1 (cudaPointerGetAttributes + 0x1a9) [0x428b9] 59 ========= Host Frame:/home/xgao/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so [0x5b778a9] 60 ========= ..... 61 ========= 62 ========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaGetLastError. 63 ========= Saved host backtrace up to driver entry point at error 64 ========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3] 65 ========= Host Frame:/usr/local/cuda/lib64/libcudart.so.10.1 (cudaGetLastError + 0x163) [0x4c493] 66 ========= ..... 67 ========= 68 ========= ..... 69 ========= 70 ========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaGetLastError. 71 ========= Saved host backtrace up to driver entry point at error 72 ========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3] 73 ========= ..... 74 ========= Host Frame:python (_PyEval_EvalFrameDefault + 0x6a0) [0x1d0ad0] 75 ========= Host Frame:python (_PyEval_EvalCodeWithName + 0xbb9) [0x116db9] 76 ========= 77 ========= ERROR SUMMARY: 4 errors 78 """ 79 errors = [] 80 HEAD = "=========" 81 headlen = len(HEAD) 82 started = False 83 in_message = False 84 message_lines = [] 85 lines = message.splitlines() 86 for l in lines: 87 if l == HEAD + " CUDA-MEMCHECK": 88 started = True 89 continue 90 if not started or not l.startswith(HEAD): 91 continue 92 l = l[headlen + 1 :] 93 if l.startswith("ERROR SUMMARY:"): 94 return Report(l, errors) 95 if not in_message: 96 in_message = True 97 message_lines = [l] 98 elif l == "": 99 errors.append(Error(message_lines)) 100 in_message = False 101 else: 102 message_lines.append(l) 103 raise ParseError("No error summary found") 104