You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

151 lines
4.3 KiB

1 month ago
  1. #!/usr/bin/env python3
  2. """
  3. strip_asm.py - Cleanup ASM output for the specified file
  4. """
  5. from argparse import ArgumentParser
  6. import sys
  7. import os
  8. import re
  9. def find_used_labels(asm):
  10. found = set()
  11. label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
  12. for l in asm.splitlines():
  13. m = label_re.match(l)
  14. if m:
  15. found.add('.L%s' % m.group(1))
  16. return found
  17. def normalize_labels(asm):
  18. decls = set()
  19. label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
  20. for l in asm.splitlines():
  21. m = label_decl.match(l)
  22. if m:
  23. decls.add(m.group(0))
  24. if len(decls) == 0:
  25. return asm
  26. needs_dot = next(iter(decls))[0] != '.'
  27. if not needs_dot:
  28. return asm
  29. for ld in decls:
  30. asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
  31. return asm
  32. def transform_labels(asm):
  33. asm = normalize_labels(asm)
  34. used_decls = find_used_labels(asm)
  35. new_asm = ''
  36. label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
  37. for l in asm.splitlines():
  38. m = label_decl.match(l)
  39. if not m or m.group(0) in used_decls:
  40. new_asm += l
  41. new_asm += '\n'
  42. return new_asm
  43. def is_identifier(tk):
  44. if len(tk) == 0:
  45. return False
  46. first = tk[0]
  47. if not first.isalpha() and first != '_':
  48. return False
  49. for i in range(1, len(tk)):
  50. c = tk[i]
  51. if not c.isalnum() and c != '_':
  52. return False
  53. return True
  54. def process_identifiers(l):
  55. """
  56. process_identifiers - process all identifiers and modify them to have
  57. consistent names across all platforms; specifically across ELF and MachO.
  58. For example, MachO inserts an additional understore at the beginning of
  59. names. This function removes that.
  60. """
  61. parts = re.split(r'([a-zA-Z0-9_]+)', l)
  62. new_line = ''
  63. for tk in parts:
  64. if is_identifier(tk):
  65. if tk.startswith('__Z'):
  66. tk = tk[1:]
  67. elif tk.startswith('_') and len(tk) > 1 and \
  68. tk[1].isalpha() and tk[1] != 'Z':
  69. tk = tk[1:]
  70. new_line += tk
  71. return new_line
  72. def process_asm(asm):
  73. """
  74. Strip the ASM of unwanted directives and lines
  75. """
  76. new_contents = ''
  77. asm = transform_labels(asm)
  78. # TODO: Add more things we want to remove
  79. discard_regexes = [
  80. re.compile("\s+\..*$"), # directive
  81. re.compile("\s*#(NO_APP|APP)$"), #inline ASM
  82. re.compile("\s*#.*$"), # comment line
  83. re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
  84. re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
  85. ]
  86. keep_regexes = [
  87. ]
  88. fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
  89. for l in asm.splitlines():
  90. # Remove Mach-O attribute
  91. l = l.replace('@GOTPCREL', '')
  92. add_line = True
  93. for reg in discard_regexes:
  94. if reg.match(l) is not None:
  95. add_line = False
  96. break
  97. for reg in keep_regexes:
  98. if reg.match(l) is not None:
  99. add_line = True
  100. break
  101. if add_line:
  102. if fn_label_def.match(l) and len(new_contents) != 0:
  103. new_contents += '\n'
  104. l = process_identifiers(l)
  105. new_contents += l
  106. new_contents += '\n'
  107. return new_contents
  108. def main():
  109. parser = ArgumentParser(
  110. description='generate a stripped assembly file')
  111. parser.add_argument(
  112. 'input', metavar='input', type=str, nargs=1,
  113. help='An input assembly file')
  114. parser.add_argument(
  115. 'out', metavar='output', type=str, nargs=1,
  116. help='The output file')
  117. args, unknown_args = parser.parse_known_args()
  118. input = args.input[0]
  119. output = args.out[0]
  120. if not os.path.isfile(input):
  121. print(("ERROR: input file '%s' does not exist") % input)
  122. sys.exit(1)
  123. contents = None
  124. with open(input, 'r') as f:
  125. contents = f.read()
  126. new_contents = process_asm(contents)
  127. with open(output, 'w') as f:
  128. f.write(new_contents)
  129. if __name__ == '__main__':
  130. main()
  131. # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
  132. # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
  133. # kate: indent-mode python; remove-trailing-spaces modified;