building_data_management_systems.Xuanzhou.2024Fall.DaSE
/
leveldb.project


								#!/usr/bin/env python3


								"""

								strip_asm.py - Cleanup ASM output for the specified file

								"""


								from argparse import ArgumentParser

								import sys

								import os

								import re


								def find_used_labels(asm):

								    found = set()

								    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")

								    for l in asm.splitlines():

								        m = label_re.match(l)

								        if m:

								            found.add('.L%s' % m.group(1))

								    return found


								def normalize_labels(asm):

								    decls = set()

								    label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")

								    for l in asm.splitlines():

								        m = label_decl.match(l)

								        if m:

								            decls.add(m.group(0))

								    if len(decls) == 0:

								        return asm

								    needs_dot = next(iter(decls))[0] != '.'

								    if not needs_dot:

								        return asm

								    for ld in decls:

								        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)

								    return asm


								def transform_labels(asm):

								    asm = normalize_labels(asm)

								    used_decls = find_used_labels(asm)

								    new_asm = ''

								    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")

								    for l in asm.splitlines():

								        m = label_decl.match(l)

								        if not m or m.group(0) in used_decls:

								            new_asm += l

								            new_asm += '\n'

								    return new_asm


								def is_identifier(tk):

								    if len(tk) == 0:

								        return False

								    first = tk[0]

								    if not first.isalpha() and first != '_':

								        return False

								    for i in range(1, len(tk)):

								        c = tk[i]

								        if not c.isalnum() and c != '_':

								            return False

								    return True


								def process_identifiers(l):

								    """

								    process_identifiers - process all identifiers and modify them to have

								    consistent names across all platforms; specifically across ELF and MachO.

								    For example, MachO inserts an additional understore at the beginning of

								    names. This function removes that.

								    """

								    parts = re.split(r'([a-zA-Z0-9_]+)', l)

								    new_line = ''

								    for tk in parts:

								        if is_identifier(tk):

								            if tk.startswith('__Z'):

								                tk = tk[1:]

								            elif tk.startswith('_') and len(tk) > 1 and \

								                    tk[1].isalpha() and tk[1] != 'Z':

								                tk = tk[1:]

								        new_line += tk

								    return new_line


								def process_asm(asm):

								    """

								    Strip the ASM of unwanted directives and lines

								    """

								    new_contents = ''

								    asm = transform_labels(asm)


								    # TODO: Add more things we want to remove

								    discard_regexes = [

								        re.compile("\s+\..*$"), # directive

								        re.compile("\s*#(NO_APP|APP)$"), #inline ASM

								        re.compile("\s*#.*$"), # comment line

								        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive

								        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),

								    ]

								    keep_regexes = [


								    ]

								    fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")

								    for l in asm.splitlines():

								        # Remove Mach-O attribute

								        l = l.replace('@GOTPCREL', '')

								        add_line = True

								        for reg in discard_regexes:

								            if reg.match(l) is not None:

								                add_line = False

								                break

								        for reg in keep_regexes:

								            if reg.match(l) is not None:

								                add_line = True

								                break

								        if add_line:

								            if fn_label_def.match(l) and len(new_contents) != 0:

								                new_contents += '\n'

								            l = process_identifiers(l)

								            new_contents += l

								            new_contents += '\n'

								    return new_contents


								def main():

								    parser = ArgumentParser(

								        description='generate a stripped assembly file')

								    parser.add_argument(

								        'input', metavar='input', type=str, nargs=1,

								        help='An input assembly file')

								    parser.add_argument(

								        'out', metavar='output', type=str, nargs=1,

								        help='The output file')

								    args, unknown_args = parser.parse_known_args()

								    input = args.input[0]

								    output = args.out[0]

								    if not os.path.isfile(input):

								        print(("ERROR: input file '%s' does not exist") % input)

								        sys.exit(1)

								    contents = None

								    with open(input, 'r') as f:

								        contents = f.read()

								    new_contents = process_asm(contents)

								    with open(output, 'w') as f:

								        f.write(new_contents)


								if __name__ == '__main__':

								    main()


								# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4

								# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;

								# kate: indent-mode python; remove-trailing-spaces modified;