From 0588e40d8c073ed784712f898f1c71f1dcbb9997 Mon Sep 17 00:00:00 2001 From: NT Date: Tue, 17 Aug 2021 21:11:13 +0200 Subject: [PATCH] updated cleanup and unicode scripts --- fixup-latex.py | 55 ++++++++++++++++++++++++++++++----------- json-cleanup-for-pdf.py | 38 +++++++++++++++------------- make-pdf.sh | 6 ++--- 3 files changed, 65 insertions(+), 34 deletions(-) diff --git a/fixup-latex.py b/fixup-latex.py index d961c44..0ef66cc 100644 --- a/fixup-latex.py +++ b/fixup-latex.py @@ -2,13 +2,15 @@ import sys, os, re # fix jupyter book latex output -#filter_mem = re.compile(r".+\|\s+(\d+)MiB\s+/\s+(\d+)MiB\s+\|")#') -#ft2 = re.compile(r"tst") -#ft3 = re.compile(r"’") -#fte = re.compile(r"👋") +# TODOs +# - check, remove full "name": "stderr" {} block? (grep stderr *ipynb) ??? +# or whole warning/err empty blocks... +# - replace phi symbol w text in phiflow + +# older tests +#ft1 = re.compile(r"’") +#ft2 = re.compile(r"👋") -# TODO check, remove full "name": "stderr" {} block? (grep stderr *ipynb) ??? -# TODO , replace phi symbol w text in phiflow inf = "book-in.tex" outf = "book-in2.tex" @@ -26,12 +28,12 @@ reSkip.append( re.compile(r"ucharclasses") ) ; reSCnt.append( 1 ) reSkip.append( re.compile(r"unicode-math") ) ; reSCnt.append( 1 ) # latex fixup, remove references chapter -reSkip.append( re.compile(r"chapter.References" ) ) -reSkip.append( re.compile(r"detokenize.references.references" ) ) +reSkip.append( re.compile(r"chapter.References" ) ); reSCnt.append( 1 ) +reSkip.append( re.compile(r"detokenize.references.references" ) ); reSCnt.append( 1 ) -#reSkip.append( re.compile(r"") ) -#reSkip.append( re.compile(r"") ) -#reSkip.append( re.compile(r"") ) +#reSkip.append( re.compile(r"") ); reSCnt.append( 1 ) +#reSkip.append( re.compile(r"") ); reSCnt.append( 1 ) +#reSkip.append( re.compile(r"") ); reSCnt.append( 1 ) # ugly, manually fix citations in captions one by one recs = []; rect = [] @@ -44,6 +46,20 @@ rect.append( "parametrized GAN {[}\\\\protect\\\\hyperlink{cite.references:id2}{ recs.append( re.compile(r"approach using continuous convolutions {\[}.hyperlink{cite.references:id12}{UPTK19}{\]}" ) ) rect.append( "approach using continuous convolutions {[}\\\\protect\\\\hyperlink{cite.references:id12}{UPTK19}{]}" ) +# fixup unicode symbols + +recs.append( re.compile(r"’" ) ) # unicode ' +rect.append( "\'" ) + +recs.append( re.compile(r"Φ") ) # phiflow , ... differentiable simulation framework ... +rect.append( "$\\\\phi$" ) + +recs.append( re.compile(r"“") ) # "..." +rect.append( "\'\'" ) + +recs.append( re.compile(r"”") ) +rect.append( "\'\'" ) + # fixup title , cumbersome... # fix backslashes... saves at least typing a few of them! still needs manual \ -> \\ , could be done better @@ -74,6 +90,9 @@ rect.append( 'author{}' ) recs.append( re.compile(r"date{(.*)}") ) rect.append( r'date{\\centering{\1}}' ) +#print(len(rect)) +#print(len(recs)) +#exit(1) # --- @@ -81,6 +100,11 @@ rect.append( r'date{\\centering{\1}}' ) # only do replacements via recs for book.tex , via applyRecs=True def parseF(inf,outf,reSkip,reSCnt,applyRecs=False): print("Fixup, "+inf+" -> "+outf+" ") + + if len(reSkip) != len(reSCnt): # sanity check + print("Error for "+inf+" reSkip cnt: " + format([ len(reSkip), len(reSCnt) ]) ) + exit(1) + with open(outf, 'w') as fout: with open(inf, 'r') as f: c = 0 @@ -89,6 +113,7 @@ def parseF(inf,outf,reSkip,reSCnt,applyRecs=False): for line in iter(f.readline, ''): # skip lines? + rSkip = -1 if skip==0: for r in range(len(reSkip)): t = reSkip[r].search(str(line)) @@ -96,14 +121,16 @@ def parseF(inf,outf,reSkip,reSCnt,applyRecs=False): #print(format(c)+" skip due to '" + format(t) +"', RE #"+format(r)+" , skip "+format(reSCnt[r]) ) # debug skip = reSCnt[r] skipTot += reSCnt[r] + rSkip = r if skip>0: skip = skip-1 - fout.write("% SKIP due to RE #"+format(r)+" , L"+format(reSCnt[r]) +" "+line) + fout.write("% SKIP due to RE #"+format(rSkip)+" , L"+format(reSCnt[rSkip]) +" "+line) #print("S "+line[:-1]) # debug else: if applyRecs: # fix captions and apply other latex replacements + #print(len(rect)); print(len(recs)) for i in range(len(recs)): line = recs[i].sub( rect[i], line ) # replace all @@ -126,7 +153,7 @@ def parseF(inf,outf,reSkip,reSCnt,applyRecs=False): parseF(inf,outf,reSkip,reSCnt,applyRecs=True) -#exit(1); print("debug exit!"); exit(1) +# print("debug exit!"); exit(1) #--- @@ -158,7 +185,7 @@ parseF(inf,outf,reSkip,reSCnt) #--- -# disable for now? +# disable for now? keep openRight if 0: inf = "sphinxmanual-in.cls" outf = "sphinxmanual.cls" diff --git a/json-cleanup-for-pdf.py b/json-cleanup-for-pdf.py index 597a2e8..d0cb59c 100644 --- a/json-cleanup-for-pdf.py +++ b/json-cleanup-for-pdf.py @@ -1,6 +1,6 @@ import sys, json, re, os # usage: json-cleanup-for-pdf.py -# if int>0, disable PDF mode (only do WWW cleanup) +# if int>0, disable PDF mode (only do WWW cleanup, note metadata.name still needs to be cleaned up manually) # disableWrites = True # debugging @@ -13,12 +13,12 @@ if len(sys.argv)>1: pdfMode = False fileList = [ - "diffphys-code-burgers.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF + "diffphys-code-burgers.ipynb", "diffphys-code-ns.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF "bayesian-code.ipynb", "supervised-airfoils.ipynb" # pytorch ] #fileList = [ "diffphys-code-burgers.ipynb"] # debug -#fileList = [ "diffphys-code-sol.ipynb"] # debug +#fileList = [ "diffphys-code-ns.ipynb"] # debug # main @@ -45,16 +45,18 @@ for fnOut in fileList: #print(d.keys()) #print(d["cells"][0].keys()) - # remove TF / pytorch warnings - re1 = re.compile(r"WARNING:tensorflow:") - re2 = re.compile(r"UserWarning:") - re4 = re.compile(r"DeprecationWarning:") - re5 = re.compile(r"InsecureRequestWarning:") # for https download + # remove TF / pytorch warnings, build list of regular expressions to search for + res = [] + res.append( re.compile(r"WARNING:tensorflow:") ) + res.append( re.compile(r"UserWarning:") ) + res.append( re.compile(r"DeprecationWarning:") ) + res.append( re.compile(r"InsecureRequestWarning") ) # for https download + res.append( re.compile(r"Building wheel") ) # phiflow install, also gives weird unicode characters # remove all "warnings.warn" from phiflow? # shorten data line: "0.008612174447657694, 0.02584669669548606, 0.043136357266407785" - re3 = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" ) - re3t = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]" + reD = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" ) + reDt = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]" t="cells" okay = 0 @@ -75,7 +77,7 @@ for fnOut in fileList: for j in range(len( d[t][i]["source"] )): #print( d[t][i]["source"][j] ) #print( type(d[t][i]["source"][j] )) - dsOut = re3.sub( re3t, d[t][i]["source"][j] ) # replace long number string (only for burgers) + dsOut = reD.sub( reDt, d[t][i]["source"][j] ) # replace long number string (only for burgers) d[t][i]["source"][j] = dsOut deletes = deletes+1 #print( d[t][i]["source"][j] +"\n >>> \n" +d2 ) @@ -91,12 +93,14 @@ for fnOut in fileList: dell = [] # collect entries to delete for k in range( len( d[t][i]["outputs"][j]["text"] ) ): - nums = [] - nums.append( re1.search( d[t][i]["outputs"][j]["text"][k] ) ) - nums.append( re2.search( d[t][i]["outputs"][j]["text"][k] ) ) - nums.append( re4.search( d[t][i]["outputs"][j]["text"][k] ) ) - nums.append( re5.search( d[t][i]["outputs"][j]["text"][k] ) ) - if (nums[0] is None) and (nums[1] is None): + #print(" tout "+ d[t][i]["outputs"][j]["text"][k] ) # debug , print all lines + nums = []; all_good = True + for rr in range(len(res)): + nums.append( res[rr].search( d[t][i]["outputs"][j]["text"][k] ) ) + if nums[-1] is not None: + all_good = False # skip! + + if all_good: okay = okay+1 else: # delete line "dell" deletes = deletes+1 diff --git a/make-pdf.sh b/make-pdf.sh index c5bec9e..e755bbf 100755 --- a/make-pdf.sh +++ b/make-pdf.sh @@ -5,7 +5,7 @@ echo WARNING - still requires one manual quit of first pdf/latex pass, use shift echo # do clean git checkout for changes from json-cleanup-for-pdf.py? -# git checkout diffphys-code-burgers.ipynb diffphys-code-sol.ipynb physicalloss-code.ipynb bayesian-code.ipynb supervised-airfoils.ipynb +# git checkout diffphys-code-burgers.ipynb diffphys-code-ns.ipynb diffphys-code-sol.ipynb physicalloss-code.ipynb bayesian-code.ipynb supervised-airfoils.ipynb # warning - modifies notebooks! python3.7 json-cleanup-for-pdf.py @@ -28,7 +28,7 @@ mv sphinxmanual.cls sphinxmanual-in.cls python3.7 ../../fixup-latex.py # generates book-in2.tex -# remove unicode chars +# remove unicode chars via unix iconv iconv -c -f utf-8 -t ascii book-in2.tex > book.tex # finally run pdflatex, now it should work: @@ -36,8 +36,8 @@ iconv -c -f utf-8 -t ascii book-in2.tex > book.tex pdflatex book pdflatex book +# for convenience, archive results in main dir mv book.pdf ../../book-pdflatex.pdf - tar czvf ../../pbdl-latex-for-arxiv.tar.gz *