Source code for greater_tables.data.tex_list

"""
Find and process blobs of TeX.

Change target directory to find other blobs.
"""

from pathlib import Path
import re
import subprocess

import pandas as pd


[docs]class TeXMacros(): """ A class for dealing with TeX macros. made out of PublisherBase in blog_tools.py from great2.blog """ _macros = r"""\def\AA{\mathcal{A}} \def\atan{\mathrm{atan}} \def\A{\mathcal{A}} \def\B{\mathcal{B}} \def\BB{\mathbb{B}} \def\AVaR{\mathsf{AVaR}} \def\bbeta{\mathbf{\beta}} \def\bb{\mathbf b} \def\bfx{\mathbf x} \def\bm{\mathbf } \def\biTVaR{\mathsf{biTVaR}} \def\corr{\mathsf{Corr}} \def\cov{\mathsf{cov}} \def\cp{\mathsf{CP}} \def\CTE{\mathsf{CTE}} \def\CVaR{\mathsf{CVaR}} \def\dint{\displaystyle\int} \def\dsum{\displaystyle\sum} \def\ecirc{\accentset{\circ} e} \def\ecirc{\accentset{\circ} e} \def\EPD{\mathsf{EPD}} \def\ES{\mathsf{ES}} \def\esssup{\mathrm{ess\,sup}} \def\E{\mathsf{E}} \def\F{\mathscr{F}} \def\FFF{\mathscr{F}} \def\FF{\mathcal{F}} \def\G{\mathscr{G}} \def\HH{\mathbf{H}} \def\kpx{{{}_kp_x}} \def\MM{\mathcal{M}} \def\NN{\mathbb{N}} \def\nudge{2} \def\norm{} \def\OO{\mathscr{O}} \def\PPP{\mathscr{P}} \def\PP{\mathsf{P}} \def\P{\mathsf{Pr}} \def\Pr{\mathsf{Pr}} \def\QQ{\mathsf{Q}} \def\Q{\mathbb{Q}} \def\RR{\mathbb{R}} \def\SD{\mathsf{SD}} \def\spcer{\ } \def\TCE{\mathsf{TCE}} \def\TVaR{\mathsf{TVaR}} \def\Var{\mathsf{Var}} \def\var{\mathsf{var}} \def\VaR{\mathsf{VaR}} \def\WCE{\mathsf{WCE}} \def\ww{\mathbf{w}} \def\XXX{\mathcal{X}} \def\xx{\mathbf{x}} \def\XX{\mathbf{X}} \def\yy{\mathbf{y}} \def\ZZZ{\mathcal{Z}} \def\ZZ{\mathbb{Z}}"""
[docs] @staticmethod def process_tex_macros(text): """Expand standard general.tex macros in the text.""" m, regex = TeXMacros.tex_to_dict(TeXMacros._macros.strip()) return re.sub(regex, lambda x: m.get(x[0]), text, flags=re.MULTILINE)
[docs] @staticmethod def tex_to_dict(text): """ Convert text, a series of def{} macros into a dictionary returns the dictionary and the regex of all keys """ smacros = text.split('\n') smacros = [TeXMacros.tex_splitter(i) for i in smacros] m = {i: j for (i, j) in smacros} regex = '|'.join([re.escape(k) for k in m.keys()]) return m, regex
[docs] @staticmethod def tex_splitter(x): """ x is a single def style tex macro """ x = x.replace('\\def', '') i = x.find('{') return x[:i], x[i + 1:-1]
[docs]def find_tex_snippets(in_dir='\\S\\TELOS\\PIR\\docs', out_file='tex_list.csv'): """Ripgrep / TeX macro expand list of TeX snippets.""" # prod run with \\s\\telos\\ (!) in_dir = str(Path(in_dir)) cmd = ['rg', '-N', '-o', '--no-filename', '-g', '*.md', '-g', '*.qmd', r'\$.+?\$', in_dir] result = subprocess.run( cmd, capture_output=True, text=True, check=True, encoding='utf-8' ) output_text = result.stdout tm = TeXMacros() txt = tm.process_tex_macros(output_text) tex = txt.split('\n') stex = set(tex) stext = [i for i in stex if len(i) and i.find('$$') < 0 and i.find('lcroof') < 0 and i.find('#') < 0 and i.find(r'\\') < 0 ] df = pd.DataFrame({'expr': stext}) print(f'Found {len(df)} snippets!') if out_file != '': p = Path(__file__).parent / out_file print(p) df.to_csv(p, encoding='utf-8') return df