pbdl-book/json-cleanup-for-pdf.py

import sys, json, re, os
# usage: json-cleanup-for-pdf.py <int>
# if int>0, disable PDF mode (only do WWW cleanup, note metadata.name still needs to be cleaned up manually)

# disableWrites = True # debugging

pdfMode = True

print(format(sys.argv))
if len(sys.argv)>1:
	if int(sys.argv[1])>0:
		print("WWW mode on")
		pdfMode = False

fileList = [ 
	"diffphys-code-burgers.ipynb", "diffphys-code-ns.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF
	"bayesian-code.ipynb", "supervised-airfoils.ipynb", # pytorch
	"reinflearn-code.ipynb", # phiflow
	"physgrad-comparison.ipynb", # jax
	]

#fileList = [ "diffphys-code-burgers.ipynb"] # debug, only 1 file
#fileList = [ "diffphys-code-ns.ipynb"] # debug, only 1 file


# main

for fnOut in fileList:
	# create backups
	fn0 = fnOut[:-5] + "bak"
	fn = fn0 + "0"; cnt = 0
	while os.path.isfile(fn):
		#print("Error: "+fn+" already exists!"); exit(1)
		print("Warning: "+fn+" already exists!")
		fn = fn0 + format(cnt); cnt=cnt+1

	print("renaming "+fnOut+ " to "+fn )
	if os.path.isfile(fnOut):
		os.rename(fnOut, fn)
	if not os.path.isfile(fn):
		print("Error: "+fn+" missing!")
		exit(1)

	with open(fn) as file:
		d = json.load(file)

	#print(d.keys()) #print(d["cells"][0].keys())

	# remove TF / pytorch warnings, build list of regular expressions to search for
	res = []
	res.append( re.compile(r"WARNING:tensorflow:") )
	res.append( re.compile(r"UserWarning:") )
	res.append( re.compile(r"DeprecationWarning:") )
	res.append( re.compile(r"InsecureRequestWarning") ) # for https download
	res.append( re.compile(r"Building wheel") ) # phiflow install, also gives weird unicode characters
	res.append( re.compile(r"warnings.warn") )  # phiflow warnings
	res.append( re.compile(r"WARNING:absl") )  # jax warnings
	# remove all "warnings.warn" from phiflow?

	# shorten data line: "0.008612174447657694, 0.02584669669548606, 0.043136357266407785"
	reD = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" )
	reDt = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]"

	t="cells"
	okay = 0
	deletes = 0
	for i in range(len(d[t])):
		#for i in range(len(d[t])):
			#print(d[t][0]["cell_type"])
		#print(d[t][i]["cell_type"])

		# remove images after code

		if d[t][i]["cell_type"]=="code":
			#print(d[t][i].keys())
			#d[t][i]["outputs"] = ""
			#print(d[t][i]["outputs"])

			if pdfMode:
				for j in range(len( d[t][i]["source"] )):
					#print( d[t][i]["source"][j] )
					#print( type(d[t][i]["source"][j] ))
					dsOut = reD.sub( reDt, d[t][i]["source"][j] )  # replace long number string (only for burgers)
					d[t][i]["source"][j] = dsOut
					deletes = deletes+1
					#print( d[t][i]["source"][j] +"\n >>> \n" +d2 )

			#print(len( d[t][i]["outputs"] ))
			for j in range(len( d[t][i]["outputs"] )):
				#print(type( d[t][i]["outputs"][j] ))
				#print( d[t][i]["outputs"][j].keys() )

				# images
				if d[t][i]["outputs"][j]["output_type"]=="stream":
					#print("len "+  len( d[t][i]["outputs"][j]["text"] ) )

					dell = [] # collect entries to delete
					for k in range(  len( d[t][i]["outputs"][j]["text"] )  ):
						#print(" tout "+   d[t][i]["outputs"][j]["text"][k] ) # debug , print all lines
						nums = []; all_good = True
						for rr in range(len(res)):
							nums.append( res[rr].search( d[t][i]["outputs"][j]["text"][k] ) )
							if nums[-1] is not None:
								all_good = False # skip!

						if all_good:
							okay = okay+1
						else: # delete line "dell"
							deletes = deletes+1
							dell.append(d[t][i]["outputs"][j]["text"][k])
							#print( format(nums) +"  " + d[t][i]["outputs"][j]["text"][k] ) # len( d[t][i]["outputs"][j]["text"][k] ) )

					for dl in dell:
						d[t][i]["outputs"][j]["text"].remove(dl)

					#print("len after "+format( len( d[t][i]["outputs"][j]["text"] )) + " A") # debug

	if deletes==0:
		print("Warning: Nothing found in "+fn+"!")
		if not os.path.isfile(fnOut):
			os.rename(fn, fnOut)
		else:
			print("Error, both files exist!?")
			exit(1)

	else:
		print(" ... writing "+fnOut )
		with open(fnOut,'w') as fileOut:
			json.dump(d,fileOut, indent=1, sort_keys=True)
added cleanup of notebooks for www version 2021-07-21 20:31:14 +02:00			`import sys, json, re, os`
			`# usage: json-cleanup-for-pdf.py <int>`
updated cleanup and unicode scripts 2021-08-17 21:11:13 +02:00			`# if int>0, disable PDF mode (only do WWW cleanup, note metadata.name still needs to be cleaned up manually)`
added cleanup of notebooks for www version 2021-07-21 20:31:14 +02:00
			`# disableWrites = True # debugging`

			`pdfMode = True`

			`print(format(sys.argv))`
			`if len(sys.argv)>1:`
			`if int(sys.argv[1])>0:`
			`print("WWW mode on")`
			`pdfMode = False`
pdf export helpers 2021-06-29 18:02:09 +02:00
scripts for pdf 2021-06-30 17:03:07 +02:00			`fileList = [`
updated cleanup and unicode scripts 2021-08-17 21:11:13 +02:00			`"diffphys-code-burgers.ipynb", "diffphys-code-ns.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF`
updated graphs for BNN code, removed phiflow warnings via json-cleanup script 2021-08-22 14:04:14 +02:00			`"bayesian-code.ipynb", "supervised-airfoils.ipynb", # pytorch`
			`"reinflearn-code.ipynb", # phiflow`
updated PG simple example 2022-03-18 06:57:00 +01:00			`"physgrad-comparison.ipynb", # jax`
scripts for pdf 2021-06-30 17:03:07 +02:00			`]`
pdf export helpers 2021-06-29 18:02:09 +02:00
updated graphs for BNN code, removed phiflow warnings via json-cleanup script 2021-08-22 14:04:14 +02:00			`#fileList = [ "diffphys-code-burgers.ipynb"] # debug, only 1 file`
			`#fileList = [ "diffphys-code-ns.ipynb"] # debug, only 1 file`
fixing commit, only json script 2021-06-30 18:55:15 +02:00
added cleanup of notebooks for www version 2021-07-21 20:31:14 +02:00
			`# main`
fixing commit, only json script 2021-06-30 18:55:15 +02:00
scripts for pdf 2021-06-30 17:03:07 +02:00			`for fnOut in fileList:`
added cleanup of notebooks for www version 2021-07-21 20:31:14 +02:00			`# create backups`
			`fn0 = fnOut[:-5] + "bak"`
			`fn = fn0 + "0"; cnt = 0`
			`while os.path.isfile(fn):`
updated graphs for BNN code, removed phiflow warnings via json-cleanup script 2021-08-22 14:04:14 +02:00			`#print("Error: "+fn+" already exists!"); exit(1)`
added cleanup of notebooks for www version 2021-07-21 20:31:14 +02:00			`print("Warning: "+fn+" already exists!")`
			`fn = fn0 + format(cnt); cnt=cnt+1`

scripts for pdf 2021-06-30 17:03:07 +02:00			`print("renaming "+fnOut+ " to "+fn )`
			`if os.path.isfile(fnOut):`
			`os.rename(fnOut, fn)`
			`if not os.path.isfile(fn):`
			`print("Error: "+fn+" missing!")`
			`exit(1)`
pdf export helpers 2021-06-29 18:02:09 +02:00
scripts for pdf 2021-06-30 17:03:07 +02:00			`with open(fn) as file:`
			`d = json.load(file)`
pdf export helpers 2021-06-29 18:02:09 +02:00
scripts for pdf 2021-06-30 17:03:07 +02:00			`#print(d.keys()) #print(d["cells"][0].keys())`
pdf export helpers 2021-06-29 18:02:09 +02:00
updated cleanup and unicode scripts 2021-08-17 21:11:13 +02:00			`# remove TF / pytorch warnings, build list of regular expressions to search for`
			`res = []`
			`res.append( re.compile(r"WARNING:tensorflow:") )`
			`res.append( re.compile(r"UserWarning:") )`
			`res.append( re.compile(r"DeprecationWarning:") )`
			`res.append( re.compile(r"InsecureRequestWarning") ) # for https download`
			`res.append( re.compile(r"Building wheel") ) # phiflow install, also gives weird unicode characters`
updated graphs for BNN code, removed phiflow warnings via json-cleanup script 2021-08-22 14:04:14 +02:00			`res.append( re.compile(r"warnings.warn") ) # phiflow warnings`
updated PG simple example 2022-03-18 06:57:00 +01:00			`res.append( re.compile(r"WARNING:absl") ) # jax warnings`
added know your data section, minor cleanup 2021-08-03 21:55:42 +02:00			`# remove all "warnings.warn" from phiflow?`
added cleanup of notebooks for www version 2021-07-21 20:31:14 +02:00
			`# shorten data line: "0.008612174447657694, 0.02584669669548606, 0.043136357266407785"`
updated cleanup and unicode scripts 2021-08-17 21:11:13 +02:00			`reD = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" )`
			`reDt = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]"`
pdf export helpers 2021-06-29 18:02:09 +02:00
scripts for pdf 2021-06-30 17:03:07 +02:00			`t="cells"`
			`okay = 0`
			`deletes = 0`
			`for i in range(len(d[t])):`
			`#for i in range(len(d[t])):`
			`#print(d[t][0]["cell_type"])`
			`#print(d[t][i]["cell_type"])`
pdf export helpers 2021-06-29 18:02:09 +02:00
scripts for pdf 2021-06-30 17:03:07 +02:00			`# remove images after code`
pdf export helpers 2021-06-29 18:02:09 +02:00
scripts for pdf 2021-06-30 17:03:07 +02:00			`if d[t][i]["cell_type"]=="code":`
			`#print(d[t][i].keys())`
			`#d[t][i]["outputs"] = ""`
			`#print(d[t][i]["outputs"])`
pdf export helpers 2021-06-29 18:02:09 +02:00
added cleanup of notebooks for www version 2021-07-21 20:31:14 +02:00			`if pdfMode:`
			`for j in range(len( d[t][i]["source"] )):`
			`#print( d[t][i]["source"][j] )`
			`#print( type(d[t][i]["source"][j] ))`
updated cleanup and unicode scripts 2021-08-17 21:11:13 +02:00			`dsOut = reD.sub( reDt, d[t][i]["source"][j] ) # replace long number string (only for burgers)`
added cleanup of notebooks for www version 2021-07-21 20:31:14 +02:00			`d[t][i]["source"][j] = dsOut`
			`deletes = deletes+1`
			`#print( d[t][i]["source"][j] +"\n >>> \n" +d2 )`
fixing commit, only json script 2021-06-30 18:55:15 +02:00
scripts for pdf 2021-06-30 17:03:07 +02:00			`#print(len( d[t][i]["outputs"] ))`
			`for j in range(len( d[t][i]["outputs"] )):`
			`#print(type( d[t][i]["outputs"][j] ))`
			`#print( d[t][i]["outputs"][j].keys() )`
pdf export helpers 2021-06-29 18:02:09 +02:00
scripts for pdf 2021-06-30 17:03:07 +02:00			`# images`
			`if d[t][i]["outputs"][j]["output_type"]=="stream":`
added missing image, minor fixes 2021-08-06 20:20:31 +02:00			`#print("len "+ len( d[t][i]["outputs"][j]["text"] ) )`
pdf export helpers 2021-06-29 18:02:09 +02:00
scripts for pdf 2021-06-30 17:03:07 +02:00			`dell = [] # collect entries to delete`
			`for k in range( len( d[t][i]["outputs"][j]["text"] ) ):`
updated cleanup and unicode scripts 2021-08-17 21:11:13 +02:00			`#print(" tout "+ d[t][i]["outputs"][j]["text"][k] ) # debug , print all lines`
			`nums = []; all_good = True`
			`for rr in range(len(res)):`
			`nums.append( res[rr].search( d[t][i]["outputs"][j]["text"][k] ) )`
			`if nums[-1] is not None:`
			`all_good = False # skip!`

			`if all_good:`
scripts for pdf 2021-06-30 17:03:07 +02:00			`okay = okay+1`
			`else: # delete line "dell"`
			`deletes = deletes+1`
			`dell.append(d[t][i]["outputs"][j]["text"][k])`
script cleanup 2021-06-30 22:03:39 +02:00			`#print( format(nums) +" " + d[t][i]["outputs"][j]["text"][k] ) # len( d[t][i]["outputs"][j]["text"][k] ) )`
scripts for pdf 2021-06-30 17:03:07 +02:00
			`for dl in dell:`
			`d[t][i]["outputs"][j]["text"].remove(dl)`

added missing image, minor fixes 2021-08-06 20:20:31 +02:00			`#print("len after "+format( len( d[t][i]["outputs"][j]["text"] )) + " A") # debug`
scripts for pdf 2021-06-30 17:03:07 +02:00
			`if deletes==0:`
			`print("Warning: Nothing found in "+fn+"!")`
			`if not os.path.isfile(fnOut):`
			`os.rename(fn, fnOut)`
			`else:`
			`print("Error, both files exist!?")`
			`exit(1)`

			`else:`
			`print(" ... writing "+fnOut )`
			`with open(fnOut,'w') as fileOut:`
			`json.dump(d,fileOut, indent=1, sort_keys=True)`
pdf export helpers 2021-06-29 18:02:09 +02:00