@inproceedings{10.1145/3611643.3616322,
author = {Wang, Bo and Li, Ruishi and Li, Mingkai and Saxena, Prateek},
title = {TransMap: Pinpointing Mistakes in Neural Code Translation},
year = {2023},
isbn = {9798400703270},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3611643.3616322},
doi = {10.1145/3611643.3616322},
abstract = {Automated code translation between programming languages can greatly reduce the human effort needed in learning new languages or in migrating code. Recent neural machine translation models, such as Codex, have been shown to be effective on many code generation tasks including translation. However, code produced by neural translators often has semantic mistakes. These mistakes are difficult to eliminate from the neural translator itself because the translator is a black box, which is difficult to interpret or control compared to rule-based transpilers. We propose the first automated approach to pinpoint semantic mistakes in code obtained after neural code translation. Our techniques are implemented in a prototype tool called TransMap which translates Python to JavaScript, both of which are popular scripting languages. On our created micro-benchmarks of Python programs with 648 semantic mistakes in total, TransMap accurately pinpoints the correct location for a fix for 87.96\%, often highlighting 1-2 lines for the user to inspect per mistake. We report on our experience in translating 5 Python libraries with up to 1k lines of code with TransMap. Our preliminary user study suggests that TransMap can reduce the time for fixing semantic mistakes by around 70\% compared to using a standard IDE with debuggers.},
booktitle = {Proceedings of the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering},
pages = {999–1011},
numpages = {13},
keywords = {Code Translation, Large Language Models, Semantic Mistakes},
location = {San Francisco, CA, USA},
series = {ESEC/FSE 2023}
}