From 0588e40d8c073ed784712f898f1c71f1dcbb9997 Mon Sep 17 00:00:00 2001
From: NT <nils.thuerey@tum.de>
Date: Tue, 17 Aug 2021 21:11:13 +0200
Subject: [PATCH] updated cleanup and unicode scripts

---
 fixup-latex.py          | 55 ++++++++++++++++++++++++++++++-----------
 json-cleanup-for-pdf.py | 38 +++++++++++++++-------------
 make-pdf.sh             |  6 ++---
 3 files changed, 65 insertions(+), 34 deletions(-)

diff --git a/fixup-latex.py b/fixup-latex.py
index d961c44..0ef66cc 100644
--- a/fixup-latex.py
+++ b/fixup-latex.py
@@ -2,13 +2,15 @@ import sys, os, re
 
 # fix jupyter book latex output
 
-#filter_mem = re.compile(r".+\|\s+(\d+)MiB\s+/\s+(\d+)MiB\s+\|")#')
-#ft2 = re.compile(r"tst")
-#ft3 = re.compile(r"’")
-#fte = re.compile(r"👋")
+# TODOs 
+# - check, remove full "name": "stderr" {} block?  (grep stderr *ipynb) ???
+# 		or whole warning/err empty blocks...
+# - replace phi symbol w text in phiflow
+
+# older tests
+#ft1 = re.compile(r"’")
+#ft2 = re.compile(r"👋")
 
-# TODO check, remove full "name": "stderr" {} block?  (grep stderr *ipynb) ???
-# TODO , replace phi symbol w text in phiflow
 
 inf  = "book-in.tex" 
 outf = "book-in2.tex"
@@ -26,12 +28,12 @@ reSkip.append( re.compile(r"ucharclasses") ) ; reSCnt.append( 1 )
 reSkip.append( re.compile(r"unicode-math") ) ; reSCnt.append( 1 )
 
 # latex fixup, remove references chapter
-reSkip.append( re.compile(r"chapter.References" ) )
-reSkip.append( re.compile(r"detokenize.references.references" ) )
+reSkip.append( re.compile(r"chapter.References" ) ); reSCnt.append( 1 )
+reSkip.append( re.compile(r"detokenize.references.references" ) ); reSCnt.append( 1 )
 
-#reSkip.append( re.compile(r"") )
-#reSkip.append( re.compile(r"") )
-#reSkip.append( re.compile(r"") )
+#reSkip.append( re.compile(r"") ); reSCnt.append( 1 )
+#reSkip.append( re.compile(r"") ); reSCnt.append( 1 )
+#reSkip.append( re.compile(r"") ); reSCnt.append( 1 )
 
 # ugly, manually fix citations in captions one by one
 recs = []; rect = []
@@ -44,6 +46,20 @@ rect.append( "parametrized GAN {[}\\\\protect\\\\hyperlink{cite.references:id2}{
 recs.append( re.compile(r"approach using continuous convolutions {\[}.hyperlink{cite.references:id12}{UPTK19}{\]}" ) )
 rect.append( "approach using continuous convolutions {[}\\\\protect\\\\hyperlink{cite.references:id12}{UPTK19}{]}" )
 
+# fixup unicode symbols 
+
+recs.append( re.compile(r"’" ) ) # unicode ' 
+rect.append( "\'" )
+
+recs.append( re.compile(r"Φ") ) # phiflow , ... differentiable simulation framework ...
+rect.append( "$\\\\phi$" )
+
+recs.append( re.compile(r"“") ) # "..."
+rect.append( "\'\'" )
+
+recs.append( re.compile(r"”") )
+rect.append( "\'\'" )
+
 # fixup title , cumbersome...
 
 # fix backslashes...  saves at least typing a few of them! still needs manual \ -> \\ , could be done better
@@ -74,6 +90,9 @@ rect.append( 'author{}' )
 recs.append( re.compile(r"date{(.*)}") )
 rect.append( r'date{\\centering{\1}}' )
 
+#print(len(rect))
+#print(len(recs))
+#exit(1)
 
 
 # ---
@@ -81,6 +100,11 @@ rect.append( r'date{\\centering{\1}}' )
 # only do replacements via recs for book.tex , via applyRecs=True
 def parseF(inf,outf,reSkip,reSCnt,applyRecs=False):
 	print("Fixup, "+inf+" -> "+outf+" ")
+
+	if len(reSkip) != len(reSCnt): # sanity check
+		print("Error for "+inf+" reSkip cnt: " + format([ len(reSkip), len(reSCnt) ]) )
+		exit(1)
+
 	with open(outf, 'w') as fout:
 		with open(inf, 'r') as f:
 			c = 0
@@ -89,6 +113,7 @@ def parseF(inf,outf,reSkip,reSCnt,applyRecs=False):
 			for line in iter(f.readline, ''):
 
 				# skip lines?
+				rSkip = -1
 				if skip==0:
 					for r in range(len(reSkip)):
 						t = reSkip[r].search(str(line))
@@ -96,14 +121,16 @@ def parseF(inf,outf,reSkip,reSCnt,applyRecs=False):
 							#print(format(c)+" skip due to '" + format(t) +"',  RE #"+format(r)+" , skip "+format(reSCnt[r]) )  # debug
 							skip = reSCnt[r]
 							skipTot += reSCnt[r]
+							rSkip = r
 
 				if skip>0:
 					skip = skip-1
-					fout.write("% SKIP due to RE #"+format(r)+" , L"+format(reSCnt[r]) +"   "+line)
+					fout.write("% SKIP due to RE #"+format(rSkip)+" , L"+format(reSCnt[rSkip]) +"   "+line)
 					#print("S "+line[:-1]) # debug
 				else:
 					if applyRecs:
 						# fix captions and apply other latex replacements
+						#print(len(rect)); print(len(recs))
 						for i in range(len(recs)):
 							line = recs[i].sub( rect[i], line )  # replace all
 
@@ -126,7 +153,7 @@ def parseF(inf,outf,reSkip,reSCnt,applyRecs=False):
 
 parseF(inf,outf,reSkip,reSCnt,applyRecs=True)
 
-#exit(1); print("debug exit!"); exit(1)
+# print("debug exit!"); exit(1)
 
 #---
 
@@ -158,7 +185,7 @@ parseF(inf,outf,reSkip,reSCnt)
 
 #---
 
-# disable for now?
+# disable for now? keep openRight
 if 0:
 	inf  = "sphinxmanual-in.cls" 
 	outf = "sphinxmanual.cls"
diff --git a/json-cleanup-for-pdf.py b/json-cleanup-for-pdf.py
index 597a2e8..d0cb59c 100644
--- a/json-cleanup-for-pdf.py
+++ b/json-cleanup-for-pdf.py
@@ -1,6 +1,6 @@
 import sys, json, re, os
 # usage: json-cleanup-for-pdf.py <int>
-# if int>0, disable PDF mode (only do WWW cleanup)
+# if int>0, disable PDF mode (only do WWW cleanup, note metadata.name still needs to be cleaned up manually)
 
 # disableWrites = True # debugging
 
@@ -13,12 +13,12 @@ if len(sys.argv)>1:
 		pdfMode = False
 
 fileList = [ 
-	"diffphys-code-burgers.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF
+	"diffphys-code-burgers.ipynb", "diffphys-code-ns.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF
 	"bayesian-code.ipynb", "supervised-airfoils.ipynb" # pytorch
 	]
 
 #fileList = [ "diffphys-code-burgers.ipynb"] # debug
-#fileList = [ "diffphys-code-sol.ipynb"] # debug
+#fileList = [ "diffphys-code-ns.ipynb"] # debug
 
 
 # main
@@ -45,16 +45,18 @@ for fnOut in fileList:
 
 	#print(d.keys()) #print(d["cells"][0].keys())
 
-	# remove TF / pytorch warnings
-	re1 = re.compile(r"WARNING:tensorflow:")
-	re2 = re.compile(r"UserWarning:")
-	re4 = re.compile(r"DeprecationWarning:")
-	re5 = re.compile(r"InsecureRequestWarning:") # for https download
+	# remove TF / pytorch warnings, build list of regular expressions to search for
+	res = []
+	res.append( re.compile(r"WARNING:tensorflow:") )
+	res.append( re.compile(r"UserWarning:") )
+	res.append( re.compile(r"DeprecationWarning:") )
+	res.append( re.compile(r"InsecureRequestWarning") ) # for https download
+	res.append( re.compile(r"Building wheel") ) # phiflow install, also gives weird unicode characters
 	# remove all "warnings.warn" from phiflow?
 
 	# shorten data line: "0.008612174447657694, 0.02584669669548606, 0.043136357266407785"
-	re3 = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" )
-	re3t = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]"
+	reD = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" )
+	reDt = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]"
 
 	t="cells"
 	okay = 0
@@ -75,7 +77,7 @@ for fnOut in fileList:
 				for j in range(len( d[t][i]["source"] )):
 					#print( d[t][i]["source"][j] )
 					#print( type(d[t][i]["source"][j] ))
-					dsOut = re3.sub( re3t, d[t][i]["source"][j] )  # replace long number string (only for burgers)
+					dsOut = reD.sub( reDt, d[t][i]["source"][j] )  # replace long number string (only for burgers)
 					d[t][i]["source"][j] = dsOut
 					deletes = deletes+1
 					#print( d[t][i]["source"][j] +"\n >>> \n" +d2 )
@@ -91,12 +93,14 @@ for fnOut in fileList:
 
 					dell = [] # collect entries to delete
 					for k in range(  len( d[t][i]["outputs"][j]["text"] )  ):
-						nums = []
-						nums.append( re1.search( d[t][i]["outputs"][j]["text"][k] ) )
-						nums.append( re2.search( d[t][i]["outputs"][j]["text"][k] ) )
-						nums.append( re4.search( d[t][i]["outputs"][j]["text"][k] ) )
-						nums.append( re5.search( d[t][i]["outputs"][j]["text"][k] ) )
-						if (nums[0] is None) and (nums[1] is None):
+						#print(" tout "+   d[t][i]["outputs"][j]["text"][k] ) # debug , print all lines
+						nums = []; all_good = True
+						for rr in range(len(res)):
+							nums.append( res[rr].search( d[t][i]["outputs"][j]["text"][k] ) )
+							if nums[-1] is not None:
+								all_good = False # skip!
+
+						if all_good:
 							okay = okay+1
 						else: # delete line "dell"
 							deletes = deletes+1
diff --git a/make-pdf.sh b/make-pdf.sh
index c5bec9e..e755bbf 100755
--- a/make-pdf.sh
+++ b/make-pdf.sh
@@ -5,7 +5,7 @@ echo WARNING - still requires one manual quit of first pdf/latex pass, use shift
 echo
 
 # do clean git checkout for changes from json-cleanup-for-pdf.py?
-# git checkout diffphys-code-burgers.ipynb diffphys-code-sol.ipynb physicalloss-code.ipynb bayesian-code.ipynb supervised-airfoils.ipynb
+# git checkout diffphys-code-burgers.ipynb diffphys-code-ns.ipynb diffphys-code-sol.ipynb physicalloss-code.ipynb bayesian-code.ipynb supervised-airfoils.ipynb
 
 # warning - modifies notebooks!
 python3.7 json-cleanup-for-pdf.py
@@ -28,7 +28,7 @@ mv sphinxmanual.cls sphinxmanual-in.cls
 python3.7 ../../fixup-latex.py
 # generates book-in2.tex
 
-# remove unicode chars
+# remove unicode chars via unix iconv
 iconv -c -f utf-8 -t ascii book-in2.tex > book.tex
 
 # finally run pdflatex, now it should work:
@@ -36,8 +36,8 @@ iconv -c -f utf-8 -t ascii book-in2.tex > book.tex
 pdflatex book
 pdflatex book
 
+# for convenience, archive results in main dir
 mv book.pdf ../../book-pdflatex.pdf
-
 tar czvf ../../pbdl-latex-for-arxiv.tar.gz *