submit-condense.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. """Condense manuscript into single files, often preferred by journals.
  2. Usage: condense [--output_dir=<output_dir>] [<directory>]
  3. """
  4. import argparse
  5. import pathlib
  6. import shutil
  7. import re
  8. STYLE_RE = re.compile(r"\\def\\style{\d}\s") # style statement + newline
  9. INPUT_RE = re.compile(r"\\input{.*}") # an input statement
  10. ARGUMENT_RE = re.compile(r"(?<={)[\s\S]*(?=})") # any latex argument
  11. IF_RE = re.compile(r"\\if[^def][\s\S]*?\\fi") # any if/fi section not ifdef
  12. ELSE_RE = re.compile(r"\\else[\s\S]*?\\fi") # any else/fi section
  13. IF_ARG_RE = re.compile(r"(?<=\\if\\style)\d") # the style if comparator
  14. IF_ARG_STRIP = re.compile(r"\\if\\\w*\s*|\\fi") # the if/fi edges
  15. ELSE_ARG_STRIP = re.compile(r"\\else|\\fi") # the else/fi edges
  16. META_RE = re.compile(r"% !TEX .*") # tex meta commands
  17. IFDEF_RE = re.compile(r"\\ifdef") # any ifdefined/fi section
  18. NEWCOMM_RE = re.compile(r"\\newcommand") # any ifdefined/fi section
  19. style_d = {
  20. '0': 'pi',
  21. '1': 'els',
  22. '2': 'rsc',
  23. '3': 'acs',
  24. }
  25. def check(directory, filename):
  26. for path in directory.iterdir():
  27. if path.name == filename:
  28. return True
  29. return False
  30. def get_style(text):
  31. """Figure out style, then remove from file."""
  32. style_match = STYLE_RE.search(text)
  33. if not style_match:
  34. return None, text
  35. style = ARGUMENT_RE.search(style_match.group()).group()
  36. text = STYLE_RE.sub("", text)
  37. return style, text
  38. def find_close_bracket(string):
  39. """finds the closing of a scope, taking into account nesting"""
  40. nest = 1
  41. for ind, char in enumerate(string):
  42. if char == '{':
  43. nest += 1
  44. elif char == '}':
  45. nest -= 1
  46. if nest == 0:
  47. return ind
  48. def sort_style_ifs(text, style):
  49. r"""
  50. Deal with any if/else/fi flags.
  51. The if/else types to replace are only '\if\style'
  52. """
  53. def repl_ifs(if_section):
  54. if_text = if_section.group()
  55. if_style = IF_ARG_RE.search(if_text) # find if in group
  56. if not if_style:
  57. raise Exception(if_text)
  58. else_match = ELSE_RE.search(if_text)
  59. if else_match:
  60. if_text = if_text[0:else_match.start()]
  61. else_text = else_match.group()
  62. if style == if_style.group():
  63. return IF_ARG_STRIP.sub("", if_text)
  64. else:
  65. if else_match:
  66. return ELSE_ARG_STRIP.sub("", else_text)
  67. return ""
  68. return IF_RE.sub(repl_ifs, text)
  69. def sort_other_ifs(text):
  70. r"""
  71. Deal with other if flags.
  72. # The types to replace are either \ifdef or \if@switch
  73. """
  74. def repl_ifswitch(if_section):
  75. if_text = if_section.group()
  76. if if_text.startswith("\\if@switch"):
  77. if text.startswith("% Master SI"):
  78. return ""
  79. else_match = ELSE_RE.search(if_text)
  80. if_text = if_text[0:else_match.start()]
  81. else_text = else_match.group()
  82. return ELSE_ARG_STRIP.sub("", else_text)
  83. else:
  84. raise Exception
  85. text = IF_RE.sub(repl_ifswitch, text)
  86. hit = IFDEF_RE.search(text)
  87. while hit:
  88. arg1_o = hit.end() + 1
  89. arg1_e = arg1_o + find_close_bracket(text[arg1_o:])
  90. arg2_o = arg1_e + 2
  91. arg2_e = arg2_o + find_close_bracket(text[arg2_o:])
  92. arg3_o = arg2_e + 2
  93. arg3_e = arg3_o + find_close_bracket(text[arg3_o:])
  94. if text.find(f"\\newcommand{{{text[arg1_o:arg1_e]}}}") != -1:
  95. text = text[:hit.start()] + text[arg2_o:arg2_e] + text[arg3_e + 1:]
  96. else:
  97. text = text[:hit.start()] + text[arg3_o:arg3_e] + text[arg3_e + 1:]
  98. hit = IFDEF_RE.search(text)
  99. return text
  100. def expand(text):
  101. def repl_input(inp_section):
  102. input_text = inp_section.group()
  103. filename = ARGUMENT_RE.search(input_text)
  104. if not filename:
  105. raise Exception
  106. filepath = pathlib.Path("./" + filename.group() + '.tex')
  107. with open(filepath, 'r', encoding="utf-8") as file:
  108. return file.read()
  109. return INPUT_RE.subn(repl_input, text)
  110. def expand_meta(text):
  111. replaced = [
  112. r'\pubauth',
  113. r'\pubaffil',
  114. r'\pubaddr',
  115. r'\orcid',
  116. r'\pubkeywords',
  117. r'\pubSI',
  118. r'\pubtitle',
  119. r'\dg',
  120. r'\eqcontrib',
  121. r'\authemail',
  122. r'\pubemail',
  123. ]
  124. for r in replaced:
  125. replaced_vals = []
  126. for hit in NEWCOMM_RE.finditer(text):
  127. arg1_o = hit.end() + 1
  128. arg1_e = arg1_o + find_close_bracket(text[arg1_o:])
  129. if text[arg1_e + 1] == "{":
  130. arg2_o = arg1_e + 2
  131. arg2_e = arg2_o + find_close_bracket(text[arg2_o:])
  132. command = text[arg1_o:arg1_e]
  133. value = text[arg2_o:arg2_e]
  134. if command.startswith(r):
  135. replaced_vals.append([command, value, hit.start(), arg2_e + 1])
  136. text = ''.join([
  137. chr for idx, chr in enumerate(text, 1) if not any(
  138. strt_idx <= idx <= end_idx
  139. for _, _, strt_idx, end_idx in replaced_vals
  140. )
  141. ])
  142. for val in replaced_vals:
  143. text = text.replace(val[0], val[1])
  144. return text
  145. def clean(text, style):
  146. # remove meta commands
  147. text = META_RE.sub("", text)
  148. # remove advanced tex lines
  149. text = re.sub(r"\\makeatletter", r'', text)
  150. text = re.sub(r"\\makeatother", r'', text)
  151. # pandoc unnecessary commands
  152. text = text.replace(
  153. r"\newenvironment{widefigure}{\begin{figure*}}{\end{figure*}}", ""
  154. )
  155. text = text.replace("widefigure", "figure*")
  156. text = text.replace(
  157. r"\newenvironment{widetable}{\begin{table*}}{\end{table*}}", ""
  158. )
  159. text = text.replace("widetable", "table*")
  160. # remove comment lines
  161. text = re.sub(r'(?<!\\)%.*', r'', text)
  162. # remove superfluous newlines
  163. text = re.sub(r'\n\s*\n', r'\n\n', text)
  164. # update references
  165. text = re.sub(r"(?<=\\bibliography{)refs/biblio(?=})", r'biblio', text)
  166. if style == '0' or style is None:
  167. text = re.sub(r"templates/pi/", r'', text)
  168. elif style == '2':
  169. text = re.sub(r"templates/rsc/", r'', text)
  170. return text
  171. def process_tex(target):
  172. """Process a tex file to return it to a simple 'one file' format."""
  173. with target.open() as file:
  174. # get file contents
  175. filetext = file.read()
  176. # get style
  177. style, filetext = get_style(filetext)
  178. # style if/else processing
  179. if style:
  180. filetext = sort_style_ifs(filetext, style)
  181. # input expansion
  182. expansions = 1
  183. while expansions > 0:
  184. filetext, expansions = expand(filetext)
  185. # second if/else processing
  186. filetext = sort_other_ifs(filetext)
  187. # replace metadata commands
  188. filetext = expand_meta(filetext)
  189. # cleaning
  190. filetext = clean(filetext, style)
  191. return filetext, style
  192. def copy_files(source, target, style):
  193. shutil.copy((source / 'refs' / 'biblio.bib'), (target / 'biblio.bib'))
  194. shutil.copy((source / 'manuscript-SI.aux'), (target / 'manuscript-SI.aux'))
  195. shutil.copy((source / 'manuscript-SI.pdf'), (target / 'manuscript-SI.pdf'))
  196. shutil.copytree((source / 'figs'), (target / 'figs'),
  197. ignore=shutil.ignore_patterns('*.md', '*.txt'))
  198. if style == '0':
  199. shutil.copy((source / 'templates' / 'pi' / 'pi-article.cls'),
  200. (target / 'pi-article.cls'))
  201. shutil.copy((source / 'templates' / 'pi' / 'pi-bib.bst'),
  202. (target / 'pi-bib.bst'))
  203. elif style == '2':
  204. shutil.copy((source / 'templates' / 'rsc' / 'rsc.bst'),
  205. (target / 'rsc.bst'))
  206. shutil.copytree((source / 'templates' / 'rsc' / 'head_foot'),
  207. (target / 'head_foot'))
  208. def main(source_dir, output_dir='./condensed/'):
  209. # define source and output dirs
  210. source_dir = pathlib.Path(source_dir)
  211. output_dir = pathlib.Path(output_dir)
  212. # create and clean output dir
  213. if output_dir.exists():
  214. shutil.rmtree(output_dir)
  215. output_dir.mkdir(parents=True, exist_ok=True)
  216. # the manuscript style (own, RCS, etc)
  217. man_style = None
  218. # condense the manuscript files
  219. files = [
  220. 'manuscript.tex',
  221. 'manuscript-SI.tex',
  222. ]
  223. for filename in files:
  224. if check(source_dir, filename):
  225. target_in = source_dir / filename
  226. target_out = output_dir / filename
  227. processed, style = process_tex(target_in)
  228. if style:
  229. man_style = style
  230. with open(target_out, "w", encoding="utf-8") as file:
  231. file.write(processed)
  232. # copy over other required data
  233. copy_files(source_dir, output_dir, man_style)
  234. if __name__ == "__main__":
  235. parser = argparse.ArgumentParser(
  236. description='Condense manuscript to a single file.'
  237. )
  238. parser.add_argument(
  239. '--source', type=str, default=pathlib.Path(__file__).parent.parent
  240. )
  241. parser.add_argument(
  242. '--dest',
  243. type=str,
  244. default=pathlib.Path(__file__).parent.parent / 'condensed'
  245. )
  246. args = parser.parse_args()
  247. main(args.source, args.dest)