From 2846443669725c1238ab59609d87442b672540b3 Mon Sep 17 00:00:00 2001 From: NT Date: Tue, 29 Jun 2021 18:02:09 +0200 Subject: [PATCH] pdf export helpers --- fixup-latex.py | 92 +++++++++++++++++++++++++++++++---------- json-cleanup-for-pdf.py | 51 +++++++++++++++++++++++ make-pdf.sh | 46 ++++++++++++++++++++- 3 files changed, 166 insertions(+), 23 deletions(-) create mode 100644 json-cleanup-for-pdf.py diff --git a/fixup-latex.py b/fixup-latex.py index cfce919..3cb8c06 100644 --- a/fixup-latex.py +++ b/fixup-latex.py @@ -5,9 +5,9 @@ import sys, os, re # fix jupyter book latex output #filter_mem = re.compile(r".+\|\s+(\d+)MiB\s+/\s+(\d+)MiB\s+\|")#') -ft2 = re.compile(r"tst") -ft3 = re.compile(r"’") -fte = re.compile(r"👋") +#ft2 = re.compile(r"tst") +#ft3 = re.compile(r"’") +#fte = re.compile(r"👋") # notebooks, parse and remove lines with WARNING:tensorflow + next? ; remove full "name": "stderr" {} block? (grep stderr *ipynb) @@ -21,27 +21,75 @@ fte = re.compile(r"👋") # u = np.asarray( [0.008612174447657694, 0.02584669669548606, ... ] ) -path = "tmp2.txt" # simple -path = "tmp.txt" # utf8 -#path = "book.tex-in.bak" # full utf8 -outf = "tmpOut.txt" +inf = "book-in.tex" +outf = "book-in2.tex" +print("Start fixup latex, "+inf+" -> "+outf+" \n\n") -with open(outf, 'w') as fout: - with open(path, 'r') as f: - c = 0 - for line in iter(f.readline, ''): - line = re.sub('’', '\'', str(line)) - line = re.sub('[abz]', '.', str(line)) +reSkip = [] ; reSCnt = [] +reSkip.append( re.compile(r"catcode") ) ; reSCnt.append( 1 ) +reSkip.append( re.compile(r"sepackage{fontspec}") ) ; reSCnt.append( 1 ) +reSkip.append( re.compile(r"defaultfontfeatures") ) ; reSCnt.append( 1 ) +reSkip.append( re.compile(r"polyglossia") ) ; reSCnt.append( 1 ) +reSkip.append( re.compile(r"setmainlanguage{english}") ) ; reSCnt.append( 1 ) +reSkip.append( re.compile(r"addto.captionsenglish") ) ; reSCnt.append( 1 ) +reSkip.append( re.compile(r"set....font{Free") ) ; reSCnt.append( 7 ) +reSkip.append( re.compile(r"ucharclasses") ) ; reSCnt.append( 1 ) +reSkip.append( re.compile(r"unicode-math") ) ; reSCnt.append( 1 ) - t = ft3.search(str(line)) - if t is not None: - print("H " + format(t) +" "+ format(t.group(0)) ) +#reSkip.append( re.compile(r"") ) +#reSkip.append( re.compile(r"") ) +#reSkip.append( re.compile(r"") ) +#reSkip.append( re.compile(r"") ) - t = fte.search(str(line)) - if t is not None: - print("E " + format(t) + format(t.group(0)) ) +# reSkip.append( re.compile(r"") ) - fout.write(line) - print(line[:-1]) - c = c+1 +def parseF(inf,outf,reSkip,reSCnt): + print("Fixup, "+inf+" -> "+outf+" ") + with open(outf, 'w') as fout: + with open(inf, 'r') as f: + c = 0 + skip = 0 + skipTot = 0 + for line in iter(f.readline, ''): + + # skip lines? + if skip==0: + for r in range(len(reSkip)): + t = reSkip[r].search(str(line)) + if t is not None: + #print(format(c)+" skip due to '" + format(t) +"', RE #"+format(r)+" , skip "+format(reSCnt[r]) ) # debug + skip = reSCnt[r] + skipTot += reSCnt[r] + + if skip>0: + skip = skip-1 + fout.write("% SKIP due to RE #"+format(r)+" , L"+format(reSCnt[r]) +" "+line) + #print("S "+line[:-1]) # debug + else: + fout.write(line) + #print(line[:-1]) # debug + + c = c+1 + + # line = re.sub('’', '\'', str(line)) + # line = re.sub('[abz]', '.', str(line)) + + # t = ft3.search(str(line)) + # if t is not None: + # print("H " + format(t) +" "+ format(t.group(0)) ) + + # t = fte.search(str(line)) + # if t is not None: + # print("E " + format(t) + format(t.group(0)) ) + print("Fixup -> "+outf+" done, skips: "+format(skipTot) +" \n") + +parseF(inf,outf,reSkip,reSCnt) + +inf = "sphinxmessages-in.sty" +outf = "sphinxmessages.sty" + +reSkip = [] ; reSCnt = [] +reSkip.append( re.compile(r"addto.captionsenglish") ) ; reSCnt.append( 1 ) + +parseF(inf,outf,reSkip,reSCnt) diff --git a/json-cleanup-for-pdf.py b/json-cleanup-for-pdf.py new file mode 100644 index 0000000..6790874 --- /dev/null +++ b/json-cleanup-for-pdf.py @@ -0,0 +1,51 @@ +import json, re + +fn="diffphys-code-burgers.ipynb" +fnOut="diffphys-code-burgers-r.ipynb" + +with open(fn) as file: + d = json.load(file) + +print(d.keys()) +#print(d["cells"][0].keys()) + +re1 = re.compile(r"WARNING:tensorflow:") + +t="cells" +for i in range(len(d[t])): + #for i in range(len(d[t])): + #print(d[t][0]["cell_type"]) + #print(d[t][i]["cell_type"]) + + # remove images after code + + if d[t][i]["cell_type"]=="code": + #print(d[t][i].keys()) + #d[t][i]["outputs"] = "" + #print(d[t][i]["outputs"]) + + #print(len( d[t][i]["outputs"] )) + for j in range(len( d[t][i]["outputs"] )): + #print(type( d[t][i]["outputs"][j] )) + #print( d[t][i]["outputs"][j].keys() ) + + # images + if d[t][i]["outputs"][j]["output_type"]=="stream": + print( len( d[t][i]["outputs"][j]["text"] ) ) + + dell = [] + for k in range( len( d[t][i]["outputs"][j]["text"] ) ): + num = re1.search( d[t][i]["outputs"][j]["text"][k] ) + if num is not None: + dell.append(d[t][i]["outputs"][j]["text"][k]) + print( format(num) +" " + d[t][i]["outputs"][j]["text"][k] ) # len( d[t][i]["outputs"][j]["text"][k] ) ) + for dl in dell: + d[t][i]["outputs"][j]["text"].remove(dl) + + print( format( len( d[t][i]["outputs"][j]["text"] )) + " A") + +#print(d["cells"]) + +with open(fnOut,'w') as fileOut: + json.dump(d,fileOut, indent=1, sort_keys=True) + diff --git a/make-pdf.sh b/make-pdf.sh index 2aba307..dee8d42 100755 --- a/make-pdf.sh +++ b/make-pdf.sh @@ -1,11 +1,55 @@ # source this file with "." in a shell +#DIR=/Users/thuerey/Dropbox/mbaDevelSelected/pbdl-book/ +DIR=/Users/thuerey/Dropbox/mbaDevelSelected/pbdl-book-cleanCheckout/ +cd ${DIR} + +# warning - modifies notebooks! +python3.7 json-cleanup-for-pdf.py + +exit + + + + +# GEN! +/Users/thuerey/Library/Python/3.7/bin/jupyter-book build . --builder pdflatex + + +cd _build/latex + +rm -f book-in.tex sphinxmessages-in.sty +mv book.tex book-in.tex +mv sphinxmessages.sty sphinxmessages-in.sty + +python3.7 ../../fixup-latex.py +# generates book-in2.tex + +# remove unicode chars +iconv -c -f utf-8 -t ascii book-in2.tex > book.tex + +exit + + + + + + + + + + + + +# OLD VERSION + echo echo Note: first comment out PG chapter in _toc echo Note: manually quit first latex pass with shift-x echo -DIR=/Users/thuerey/Dropbox/mbaDevelSelected/pbdl-book/ +#DIR=/Users/thuerey/Dropbox/mbaDevelSelected/pbdl-book/ +DIR=/Users/thuerey/Dropbox/mbaDevelSelected/pbdl-book-cleanCheckout/ cd ${DIR} #echo Note: make sure to copy latex helpers! cp ./latex-helpers/* ./_build/latex/