Source code for greater_tables.data.tex_list

"""
Find and process blobs of TeX.

Change target directory to find other blobs.
"""

from pathlib import Path
import re
import subprocess

import pandas as pd


[docs]class TeXMacros():
    """
    A class for dealing with TeX macros.

    made out of PublisherBase in blog_tools.py
    from great2.blog
    """

    _macros = r"""\def\AA{\mathcal{A}}
\def\atan{\mathrm{atan}}
\def\A{\mathcal{A}}
\def\B{\mathcal{B}}
\def\BB{\mathbb{B}}
\def\AVaR{\mathsf{AVaR}}
\def\bbeta{\mathbf{\beta}}
\def\bb{\mathbf b}
\def\bfx{\mathbf x}
\def\bm{\mathbf }
\def\biTVaR{\mathsf{biTVaR}}
\def\corr{\mathsf{Corr}}
\def\cov{\mathsf{cov}}
\def\cp{\mathsf{CP}}
\def\CTE{\mathsf{CTE}}
\def\CVaR{\mathsf{CVaR}}
\def\dint{\displaystyle\int}
\def\dsum{\displaystyle\sum}
\def\ecirc{\accentset{\circ} e}
\def\ecirc{\accentset{\circ} e}
\def\EPD{\mathsf{EPD}}
\def\ES{\mathsf{ES}}
\def\esssup{\mathrm{ess\,sup}}
\def\E{\mathsf{E}}
\def\F{\mathscr{F}}
\def\FFF{\mathscr{F}}
\def\FF{\mathcal{F}}
\def\G{\mathscr{G}}
\def\HH{\mathbf{H}}
\def\kpx{{{}_kp_x}}
\def\MM{\mathcal{M}}
\def\NN{\mathbb{N}}
\def\nudge{2}
\def\norm{}
\def\OO{\mathscr{O}}
\def\PPP{\mathscr{P}}
\def\PP{\mathsf{P}}
\def\P{\mathsf{Pr}}
\def\Pr{\mathsf{Pr}}
\def\QQ{\mathsf{Q}}
\def\Q{\mathbb{Q}}
\def\RR{\mathbb{R}}
\def\SD{\mathsf{SD}}
\def\spcer{\ }
\def\TCE{\mathsf{TCE}}
\def\TVaR{\mathsf{TVaR}}
\def\Var{\mathsf{Var}}
\def\var{\mathsf{var}}
\def\VaR{\mathsf{VaR}}
\def\WCE{\mathsf{WCE}}
\def\ww{\mathbf{w}}
\def\XXX{\mathcal{X}}
\def\xx{\mathbf{x}}
\def\XX{\mathbf{X}}
\def\yy{\mathbf{y}}
\def\ZZZ{\mathcal{Z}}
\def\ZZ{\mathbb{Z}}"""

[docs]    @staticmethod
    def process_tex_macros(text):
        """Expand standard general.tex macros in the text."""
        m, regex = TeXMacros.tex_to_dict(TeXMacros._macros.strip())
        return re.sub(regex, lambda x: m.get(x[0]), text, flags=re.MULTILINE)

[docs]    @staticmethod
    def tex_to_dict(text):
        """
        Convert text, a series of def{} macros into a dictionary
        returns the dictionary and the regex of all keys
        """
        smacros = text.split('\n')
        smacros = [TeXMacros.tex_splitter(i) for i in smacros]
        m = {i: j for (i, j) in smacros}
        regex = '|'.join([re.escape(k) for k in m.keys()])
        return m, regex

[docs]    @staticmethod
    def tex_splitter(x):
        """
        x is a single def style tex macro
        """
        x = x.replace('\\def', '')
        i = x.find('{')
        return x[:i], x[i + 1:-1]


[docs]def find_tex_snippets(in_dir='\\S\\TELOS\\PIR\\docs',
                       out_file='tex_list.csv'):
    """Ripgrep / TeX macro expand list of TeX snippets."""
    # prod run with \\s\\telos\\ (!)
    in_dir = str(Path(in_dir))
    cmd = ['rg', '-N', '-o', '--no-filename',
         '-g', '*.md',
         '-g', '*.qmd',
         r'\$.+?\$',
         in_dir]
    result = subprocess.run(
        cmd,
        capture_output=True,
        text=True,
        check=True,
        encoding='utf-8'
    )
    output_text = result.stdout
    tm = TeXMacros()
    txt = tm.process_tex_macros(output_text)
    tex = txt.split('\n')
    stex = set(tex)
    stext = [i for i in stex if len(i)
    and i.find('$$') < 0
    and i.find('lcroof') < 0
    and i.find('#') < 0
    and i.find(r'\\') < 0
    ]
    df = pd.DataFrame({'expr': stext})
    print(f'Found {len(df)} snippets!')
    if out_file != '':
        p = Path(__file__).parent / out_file
        print(p)
        df.to_csv(p, encoding='utf-8')
    return df