pbdl-book/json-cleanup-for-pdf.py

131 lines
4.1 KiB
Python
Raw Normal View History

import sys, json, re, os
# usage: json-cleanup-for-pdf.py <int>
2021-08-17 21:11:13 +02:00
# if int>0, disable PDF mode (only do WWW cleanup, note metadata.name still needs to be cleaned up manually)
# disableWrites = True # debugging
pdfMode = True
print(format(sys.argv))
if len(sys.argv)>1:
if int(sys.argv[1])>0:
print("WWW mode on")
pdfMode = False
2021-06-29 18:02:09 +02:00
2021-06-30 17:03:07 +02:00
fileList = [
2021-08-17 21:11:13 +02:00
"diffphys-code-burgers.ipynb", "diffphys-code-ns.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF
"bayesian-code.ipynb", "supervised-airfoils.ipynb", # pytorch
"reinflearn-code.ipynb", # phiflow
2022-03-18 06:57:00 +01:00
"physgrad-comparison.ipynb", # jax
2021-06-30 17:03:07 +02:00
]
2021-06-29 18:02:09 +02:00
#fileList = [ "diffphys-code-burgers.ipynb"] # debug, only 1 file
#fileList = [ "diffphys-code-ns.ipynb"] # debug, only 1 file
2021-06-30 18:55:15 +02:00
# main
2021-06-30 18:55:15 +02:00
2021-06-30 17:03:07 +02:00
for fnOut in fileList:
# create backups
fn0 = fnOut[:-5] + "bak"
fn = fn0 + "0"; cnt = 0
while os.path.isfile(fn):
#print("Error: "+fn+" already exists!"); exit(1)
print("Warning: "+fn+" already exists!")
fn = fn0 + format(cnt); cnt=cnt+1
2021-06-30 17:03:07 +02:00
print("renaming "+fnOut+ " to "+fn )
if os.path.isfile(fnOut):
os.rename(fnOut, fn)
if not os.path.isfile(fn):
print("Error: "+fn+" missing!")
exit(1)
2021-06-29 18:02:09 +02:00
2021-06-30 17:03:07 +02:00
with open(fn) as file:
d = json.load(file)
2021-06-29 18:02:09 +02:00
2021-06-30 17:03:07 +02:00
#print(d.keys()) #print(d["cells"][0].keys())
2021-06-29 18:02:09 +02:00
2021-08-17 21:11:13 +02:00
# remove TF / pytorch warnings, build list of regular expressions to search for
res = []
res.append( re.compile(r"WARNING:tensorflow:") )
res.append( re.compile(r"UserWarning:") )
res.append( re.compile(r"DeprecationWarning:") )
res.append( re.compile(r"InsecureRequestWarning") ) # for https download
res.append( re.compile(r"Building wheel") ) # phiflow install, also gives weird unicode characters
res.append( re.compile(r"warnings.warn") ) # phiflow warnings
2022-03-18 06:57:00 +01:00
res.append( re.compile(r"WARNING:absl") ) # jax warnings
# remove all "warnings.warn" from phiflow?
# shorten data line: "0.008612174447657694, 0.02584669669548606, 0.043136357266407785"
2021-08-17 21:11:13 +02:00
reD = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" )
reDt = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]"
2021-06-29 18:02:09 +02:00
2021-06-30 17:03:07 +02:00
t="cells"
okay = 0
deletes = 0
for i in range(len(d[t])):
#for i in range(len(d[t])):
#print(d[t][0]["cell_type"])
#print(d[t][i]["cell_type"])
2021-06-29 18:02:09 +02:00
2021-06-30 17:03:07 +02:00
# remove images after code
2021-06-29 18:02:09 +02:00
2021-06-30 17:03:07 +02:00
if d[t][i]["cell_type"]=="code":
#print(d[t][i].keys())
#d[t][i]["outputs"] = ""
#print(d[t][i]["outputs"])
2021-06-29 18:02:09 +02:00
if pdfMode:
for j in range(len( d[t][i]["source"] )):
#print( d[t][i]["source"][j] )
#print( type(d[t][i]["source"][j] ))
2021-08-17 21:11:13 +02:00
dsOut = reD.sub( reDt, d[t][i]["source"][j] ) # replace long number string (only for burgers)
d[t][i]["source"][j] = dsOut
deletes = deletes+1
#print( d[t][i]["source"][j] +"\n >>> \n" +d2 )
2021-06-30 18:55:15 +02:00
2021-06-30 17:03:07 +02:00
#print(len( d[t][i]["outputs"] ))
for j in range(len( d[t][i]["outputs"] )):
#print(type( d[t][i]["outputs"][j] ))
#print( d[t][i]["outputs"][j].keys() )
2021-06-29 18:02:09 +02:00
2021-06-30 17:03:07 +02:00
# images
if d[t][i]["outputs"][j]["output_type"]=="stream":
2021-08-06 20:20:31 +02:00
#print("len "+ len( d[t][i]["outputs"][j]["text"] ) )
2021-06-29 18:02:09 +02:00
2021-06-30 17:03:07 +02:00
dell = [] # collect entries to delete
for k in range( len( d[t][i]["outputs"][j]["text"] ) ):
2021-08-17 21:11:13 +02:00
#print(" tout "+ d[t][i]["outputs"][j]["text"][k] ) # debug , print all lines
nums = []; all_good = True
for rr in range(len(res)):
nums.append( res[rr].search( d[t][i]["outputs"][j]["text"][k] ) )
if nums[-1] is not None:
all_good = False # skip!
if all_good:
2021-06-30 17:03:07 +02:00
okay = okay+1
else: # delete line "dell"
deletes = deletes+1
dell.append(d[t][i]["outputs"][j]["text"][k])
2021-06-30 22:03:39 +02:00
#print( format(nums) +" " + d[t][i]["outputs"][j]["text"][k] ) # len( d[t][i]["outputs"][j]["text"][k] ) )
2021-06-30 17:03:07 +02:00
for dl in dell:
d[t][i]["outputs"][j]["text"].remove(dl)
2021-08-06 20:20:31 +02:00
#print("len after "+format( len( d[t][i]["outputs"][j]["text"] )) + " A") # debug
2021-06-30 17:03:07 +02:00
if deletes==0:
print("Warning: Nothing found in "+fn+"!")
if not os.path.isfile(fnOut):
os.rename(fn, fnOut)
else:
print("Error, both files exist!?")
exit(1)
else:
print(" ... writing "+fnOut )
with open(fnOut,'w') as fileOut:
json.dump(d,fileOut, indent=1, sort_keys=True)
2021-06-29 18:02:09 +02:00