updated cleanup and unicode scripts
This commit is contained in:
parent
e731e11393
commit
0588e40d8c
@ -2,13 +2,15 @@ import sys, os, re
|
|||||||
|
|
||||||
# fix jupyter book latex output
|
# fix jupyter book latex output
|
||||||
|
|
||||||
#filter_mem = re.compile(r".+\|\s+(\d+)MiB\s+/\s+(\d+)MiB\s+\|")#')
|
# TODOs
|
||||||
#ft2 = re.compile(r"tst")
|
# - check, remove full "name": "stderr" {} block? (grep stderr *ipynb) ???
|
||||||
#ft3 = re.compile(r"’")
|
# or whole warning/err empty blocks...
|
||||||
#fte = re.compile(r"👋")
|
# - replace phi symbol w text in phiflow
|
||||||
|
|
||||||
|
# older tests
|
||||||
|
#ft1 = re.compile(r"’")
|
||||||
|
#ft2 = re.compile(r"👋")
|
||||||
|
|
||||||
# TODO check, remove full "name": "stderr" {} block? (grep stderr *ipynb) ???
|
|
||||||
# TODO , replace phi symbol w text in phiflow
|
|
||||||
|
|
||||||
inf = "book-in.tex"
|
inf = "book-in.tex"
|
||||||
outf = "book-in2.tex"
|
outf = "book-in2.tex"
|
||||||
@ -26,12 +28,12 @@ reSkip.append( re.compile(r"ucharclasses") ) ; reSCnt.append( 1 )
|
|||||||
reSkip.append( re.compile(r"unicode-math") ) ; reSCnt.append( 1 )
|
reSkip.append( re.compile(r"unicode-math") ) ; reSCnt.append( 1 )
|
||||||
|
|
||||||
# latex fixup, remove references chapter
|
# latex fixup, remove references chapter
|
||||||
reSkip.append( re.compile(r"chapter.References" ) )
|
reSkip.append( re.compile(r"chapter.References" ) ); reSCnt.append( 1 )
|
||||||
reSkip.append( re.compile(r"detokenize.references.references" ) )
|
reSkip.append( re.compile(r"detokenize.references.references" ) ); reSCnt.append( 1 )
|
||||||
|
|
||||||
#reSkip.append( re.compile(r"") )
|
#reSkip.append( re.compile(r"") ); reSCnt.append( 1 )
|
||||||
#reSkip.append( re.compile(r"") )
|
#reSkip.append( re.compile(r"") ); reSCnt.append( 1 )
|
||||||
#reSkip.append( re.compile(r"") )
|
#reSkip.append( re.compile(r"") ); reSCnt.append( 1 )
|
||||||
|
|
||||||
# ugly, manually fix citations in captions one by one
|
# ugly, manually fix citations in captions one by one
|
||||||
recs = []; rect = []
|
recs = []; rect = []
|
||||||
@ -44,6 +46,20 @@ rect.append( "parametrized GAN {[}\\\\protect\\\\hyperlink{cite.references:id2}{
|
|||||||
recs.append( re.compile(r"approach using continuous convolutions {\[}.hyperlink{cite.references:id12}{UPTK19}{\]}" ) )
|
recs.append( re.compile(r"approach using continuous convolutions {\[}.hyperlink{cite.references:id12}{UPTK19}{\]}" ) )
|
||||||
rect.append( "approach using continuous convolutions {[}\\\\protect\\\\hyperlink{cite.references:id12}{UPTK19}{]}" )
|
rect.append( "approach using continuous convolutions {[}\\\\protect\\\\hyperlink{cite.references:id12}{UPTK19}{]}" )
|
||||||
|
|
||||||
|
# fixup unicode symbols
|
||||||
|
|
||||||
|
recs.append( re.compile(r"’" ) ) # unicode '
|
||||||
|
rect.append( "\'" )
|
||||||
|
|
||||||
|
recs.append( re.compile(r"Φ") ) # phiflow , ... differentiable simulation framework ...
|
||||||
|
rect.append( "$\\\\phi$" )
|
||||||
|
|
||||||
|
recs.append( re.compile(r"“") ) # "..."
|
||||||
|
rect.append( "\'\'" )
|
||||||
|
|
||||||
|
recs.append( re.compile(r"”") )
|
||||||
|
rect.append( "\'\'" )
|
||||||
|
|
||||||
# fixup title , cumbersome...
|
# fixup title , cumbersome...
|
||||||
|
|
||||||
# fix backslashes... saves at least typing a few of them! still needs manual \ -> \\ , could be done better
|
# fix backslashes... saves at least typing a few of them! still needs manual \ -> \\ , could be done better
|
||||||
@ -74,6 +90,9 @@ rect.append( 'author{}' )
|
|||||||
recs.append( re.compile(r"date{(.*)}") )
|
recs.append( re.compile(r"date{(.*)}") )
|
||||||
rect.append( r'date{\\centering{\1}}' )
|
rect.append( r'date{\\centering{\1}}' )
|
||||||
|
|
||||||
|
#print(len(rect))
|
||||||
|
#print(len(recs))
|
||||||
|
#exit(1)
|
||||||
|
|
||||||
|
|
||||||
# ---
|
# ---
|
||||||
@ -81,6 +100,11 @@ rect.append( r'date{\\centering{\1}}' )
|
|||||||
# only do replacements via recs for book.tex , via applyRecs=True
|
# only do replacements via recs for book.tex , via applyRecs=True
|
||||||
def parseF(inf,outf,reSkip,reSCnt,applyRecs=False):
|
def parseF(inf,outf,reSkip,reSCnt,applyRecs=False):
|
||||||
print("Fixup, "+inf+" -> "+outf+" ")
|
print("Fixup, "+inf+" -> "+outf+" ")
|
||||||
|
|
||||||
|
if len(reSkip) != len(reSCnt): # sanity check
|
||||||
|
print("Error for "+inf+" reSkip cnt: " + format([ len(reSkip), len(reSCnt) ]) )
|
||||||
|
exit(1)
|
||||||
|
|
||||||
with open(outf, 'w') as fout:
|
with open(outf, 'w') as fout:
|
||||||
with open(inf, 'r') as f:
|
with open(inf, 'r') as f:
|
||||||
c = 0
|
c = 0
|
||||||
@ -89,6 +113,7 @@ def parseF(inf,outf,reSkip,reSCnt,applyRecs=False):
|
|||||||
for line in iter(f.readline, ''):
|
for line in iter(f.readline, ''):
|
||||||
|
|
||||||
# skip lines?
|
# skip lines?
|
||||||
|
rSkip = -1
|
||||||
if skip==0:
|
if skip==0:
|
||||||
for r in range(len(reSkip)):
|
for r in range(len(reSkip)):
|
||||||
t = reSkip[r].search(str(line))
|
t = reSkip[r].search(str(line))
|
||||||
@ -96,14 +121,16 @@ def parseF(inf,outf,reSkip,reSCnt,applyRecs=False):
|
|||||||
#print(format(c)+" skip due to '" + format(t) +"', RE #"+format(r)+" , skip "+format(reSCnt[r]) ) # debug
|
#print(format(c)+" skip due to '" + format(t) +"', RE #"+format(r)+" , skip "+format(reSCnt[r]) ) # debug
|
||||||
skip = reSCnt[r]
|
skip = reSCnt[r]
|
||||||
skipTot += reSCnt[r]
|
skipTot += reSCnt[r]
|
||||||
|
rSkip = r
|
||||||
|
|
||||||
if skip>0:
|
if skip>0:
|
||||||
skip = skip-1
|
skip = skip-1
|
||||||
fout.write("% SKIP due to RE #"+format(r)+" , L"+format(reSCnt[r]) +" "+line)
|
fout.write("% SKIP due to RE #"+format(rSkip)+" , L"+format(reSCnt[rSkip]) +" "+line)
|
||||||
#print("S "+line[:-1]) # debug
|
#print("S "+line[:-1]) # debug
|
||||||
else:
|
else:
|
||||||
if applyRecs:
|
if applyRecs:
|
||||||
# fix captions and apply other latex replacements
|
# fix captions and apply other latex replacements
|
||||||
|
#print(len(rect)); print(len(recs))
|
||||||
for i in range(len(recs)):
|
for i in range(len(recs)):
|
||||||
line = recs[i].sub( rect[i], line ) # replace all
|
line = recs[i].sub( rect[i], line ) # replace all
|
||||||
|
|
||||||
@ -126,7 +153,7 @@ def parseF(inf,outf,reSkip,reSCnt,applyRecs=False):
|
|||||||
|
|
||||||
parseF(inf,outf,reSkip,reSCnt,applyRecs=True)
|
parseF(inf,outf,reSkip,reSCnt,applyRecs=True)
|
||||||
|
|
||||||
#exit(1); print("debug exit!"); exit(1)
|
# print("debug exit!"); exit(1)
|
||||||
|
|
||||||
#---
|
#---
|
||||||
|
|
||||||
@ -158,7 +185,7 @@ parseF(inf,outf,reSkip,reSCnt)
|
|||||||
|
|
||||||
#---
|
#---
|
||||||
|
|
||||||
# disable for now?
|
# disable for now? keep openRight
|
||||||
if 0:
|
if 0:
|
||||||
inf = "sphinxmanual-in.cls"
|
inf = "sphinxmanual-in.cls"
|
||||||
outf = "sphinxmanual.cls"
|
outf = "sphinxmanual.cls"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import sys, json, re, os
|
import sys, json, re, os
|
||||||
# usage: json-cleanup-for-pdf.py <int>
|
# usage: json-cleanup-for-pdf.py <int>
|
||||||
# if int>0, disable PDF mode (only do WWW cleanup)
|
# if int>0, disable PDF mode (only do WWW cleanup, note metadata.name still needs to be cleaned up manually)
|
||||||
|
|
||||||
# disableWrites = True # debugging
|
# disableWrites = True # debugging
|
||||||
|
|
||||||
@ -13,12 +13,12 @@ if len(sys.argv)>1:
|
|||||||
pdfMode = False
|
pdfMode = False
|
||||||
|
|
||||||
fileList = [
|
fileList = [
|
||||||
"diffphys-code-burgers.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF
|
"diffphys-code-burgers.ipynb", "diffphys-code-ns.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF
|
||||||
"bayesian-code.ipynb", "supervised-airfoils.ipynb" # pytorch
|
"bayesian-code.ipynb", "supervised-airfoils.ipynb" # pytorch
|
||||||
]
|
]
|
||||||
|
|
||||||
#fileList = [ "diffphys-code-burgers.ipynb"] # debug
|
#fileList = [ "diffphys-code-burgers.ipynb"] # debug
|
||||||
#fileList = [ "diffphys-code-sol.ipynb"] # debug
|
#fileList = [ "diffphys-code-ns.ipynb"] # debug
|
||||||
|
|
||||||
|
|
||||||
# main
|
# main
|
||||||
@ -45,16 +45,18 @@ for fnOut in fileList:
|
|||||||
|
|
||||||
#print(d.keys()) #print(d["cells"][0].keys())
|
#print(d.keys()) #print(d["cells"][0].keys())
|
||||||
|
|
||||||
# remove TF / pytorch warnings
|
# remove TF / pytorch warnings, build list of regular expressions to search for
|
||||||
re1 = re.compile(r"WARNING:tensorflow:")
|
res = []
|
||||||
re2 = re.compile(r"UserWarning:")
|
res.append( re.compile(r"WARNING:tensorflow:") )
|
||||||
re4 = re.compile(r"DeprecationWarning:")
|
res.append( re.compile(r"UserWarning:") )
|
||||||
re5 = re.compile(r"InsecureRequestWarning:") # for https download
|
res.append( re.compile(r"DeprecationWarning:") )
|
||||||
|
res.append( re.compile(r"InsecureRequestWarning") ) # for https download
|
||||||
|
res.append( re.compile(r"Building wheel") ) # phiflow install, also gives weird unicode characters
|
||||||
# remove all "warnings.warn" from phiflow?
|
# remove all "warnings.warn" from phiflow?
|
||||||
|
|
||||||
# shorten data line: "0.008612174447657694, 0.02584669669548606, 0.043136357266407785"
|
# shorten data line: "0.008612174447657694, 0.02584669669548606, 0.043136357266407785"
|
||||||
re3 = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" )
|
reD = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" )
|
||||||
re3t = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]"
|
reDt = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]"
|
||||||
|
|
||||||
t="cells"
|
t="cells"
|
||||||
okay = 0
|
okay = 0
|
||||||
@ -75,7 +77,7 @@ for fnOut in fileList:
|
|||||||
for j in range(len( d[t][i]["source"] )):
|
for j in range(len( d[t][i]["source"] )):
|
||||||
#print( d[t][i]["source"][j] )
|
#print( d[t][i]["source"][j] )
|
||||||
#print( type(d[t][i]["source"][j] ))
|
#print( type(d[t][i]["source"][j] ))
|
||||||
dsOut = re3.sub( re3t, d[t][i]["source"][j] ) # replace long number string (only for burgers)
|
dsOut = reD.sub( reDt, d[t][i]["source"][j] ) # replace long number string (only for burgers)
|
||||||
d[t][i]["source"][j] = dsOut
|
d[t][i]["source"][j] = dsOut
|
||||||
deletes = deletes+1
|
deletes = deletes+1
|
||||||
#print( d[t][i]["source"][j] +"\n >>> \n" +d2 )
|
#print( d[t][i]["source"][j] +"\n >>> \n" +d2 )
|
||||||
@ -91,12 +93,14 @@ for fnOut in fileList:
|
|||||||
|
|
||||||
dell = [] # collect entries to delete
|
dell = [] # collect entries to delete
|
||||||
for k in range( len( d[t][i]["outputs"][j]["text"] ) ):
|
for k in range( len( d[t][i]["outputs"][j]["text"] ) ):
|
||||||
nums = []
|
#print(" tout "+ d[t][i]["outputs"][j]["text"][k] ) # debug , print all lines
|
||||||
nums.append( re1.search( d[t][i]["outputs"][j]["text"][k] ) )
|
nums = []; all_good = True
|
||||||
nums.append( re2.search( d[t][i]["outputs"][j]["text"][k] ) )
|
for rr in range(len(res)):
|
||||||
nums.append( re4.search( d[t][i]["outputs"][j]["text"][k] ) )
|
nums.append( res[rr].search( d[t][i]["outputs"][j]["text"][k] ) )
|
||||||
nums.append( re5.search( d[t][i]["outputs"][j]["text"][k] ) )
|
if nums[-1] is not None:
|
||||||
if (nums[0] is None) and (nums[1] is None):
|
all_good = False # skip!
|
||||||
|
|
||||||
|
if all_good:
|
||||||
okay = okay+1
|
okay = okay+1
|
||||||
else: # delete line "dell"
|
else: # delete line "dell"
|
||||||
deletes = deletes+1
|
deletes = deletes+1
|
||||||
|
@ -5,7 +5,7 @@ echo WARNING - still requires one manual quit of first pdf/latex pass, use shift
|
|||||||
echo
|
echo
|
||||||
|
|
||||||
# do clean git checkout for changes from json-cleanup-for-pdf.py?
|
# do clean git checkout for changes from json-cleanup-for-pdf.py?
|
||||||
# git checkout diffphys-code-burgers.ipynb diffphys-code-sol.ipynb physicalloss-code.ipynb bayesian-code.ipynb supervised-airfoils.ipynb
|
# git checkout diffphys-code-burgers.ipynb diffphys-code-ns.ipynb diffphys-code-sol.ipynb physicalloss-code.ipynb bayesian-code.ipynb supervised-airfoils.ipynb
|
||||||
|
|
||||||
# warning - modifies notebooks!
|
# warning - modifies notebooks!
|
||||||
python3.7 json-cleanup-for-pdf.py
|
python3.7 json-cleanup-for-pdf.py
|
||||||
@ -28,7 +28,7 @@ mv sphinxmanual.cls sphinxmanual-in.cls
|
|||||||
python3.7 ../../fixup-latex.py
|
python3.7 ../../fixup-latex.py
|
||||||
# generates book-in2.tex
|
# generates book-in2.tex
|
||||||
|
|
||||||
# remove unicode chars
|
# remove unicode chars via unix iconv
|
||||||
iconv -c -f utf-8 -t ascii book-in2.tex > book.tex
|
iconv -c -f utf-8 -t ascii book-in2.tex > book.tex
|
||||||
|
|
||||||
# finally run pdflatex, now it should work:
|
# finally run pdflatex, now it should work:
|
||||||
@ -36,8 +36,8 @@ iconv -c -f utf-8 -t ascii book-in2.tex > book.tex
|
|||||||
pdflatex book
|
pdflatex book
|
||||||
pdflatex book
|
pdflatex book
|
||||||
|
|
||||||
|
# for convenience, archive results in main dir
|
||||||
mv book.pdf ../../book-pdflatex.pdf
|
mv book.pdf ../../book-pdflatex.pdf
|
||||||
|
|
||||||
tar czvf ../../pbdl-latex-for-arxiv.tar.gz *
|
tar czvf ../../pbdl-latex-for-arxiv.tar.gz *
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user