scripts for pdf

2021-06-30 17:03:07 +02:00
parent 665637bc04
commit dd5abbc430
3 changed files with 105 additions and 44 deletions
--- a/json-cleanup-for-pdf.py
+++ b/json-cleanup-for-pdf.py
@@ -1,51 +1,85 @@
-import json, re
+import json, re, os

-fn="diffphys-code-burgers.ipynb"
-fnOut="diffphys-code-burgers-r.ipynb"
+fileList = [ 
+	"diffphys-code-burgers.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF
+	"bayesian-code.ipynb", "supervised-airfoils.ipynb" # pytorch
+	]

-with open(fn) as file:
-	d = json.load(file)
+for fnOut in fileList:
+	fn = fnOut[:-5] + "bak"
+	print("renaming "+fnOut+ " to "+fn )
+	if os.path.isfile(fnOut):
+		os.rename(fnOut, fn)
+	if not os.path.isfile(fn):
+		print("Error: "+fn+" missing!")
+		exit(1)

-print(d.keys())
-#print(d["cells"][0].keys())
+	#continue # exit(1)

-re1 = re.compile(r"WARNING:tensorflow:")
+	#fn="diffphys-code-burgers.ipynb"
+	#fnOut="diffphys-code-burgers-r.ipynb"

-t="cells"
-for i in range(len(d[t])):
-	#for i in range(len(d[t])):
-		#print(d[t][0]["cell_type"])
-	#print(d[t][i]["cell_type"])
+	with open(fn) as file:
+		d = json.load(file)

-	# remove images after code
+	#print(d.keys()) #print(d["cells"][0].keys())

-	if d[t][i]["cell_type"]=="code":
-		#print(d[t][i].keys())
-		#d[t][i]["outputs"] = ""
-		#print(d[t][i]["outputs"])
+	re1 = re.compile(r"WARNING:tensorflow:")
+	re2 = re.compile(r"UserWarning:")

-		#print(len( d[t][i]["outputs"] ))
-		for j in range(len( d[t][i]["outputs"] )):
-			#print(type( d[t][i]["outputs"][j] ))
-			#print( d[t][i]["outputs"][j].keys() )
+	t="cells"
+	okay = 0
+	deletes = 0
+	for i in range(len(d[t])):
+		#for i in range(len(d[t])):
+			#print(d[t][0]["cell_type"])
+		#print(d[t][i]["cell_type"])

-			# images
-			if d[t][i]["outputs"][j]["output_type"]=="stream":
-				print(  len( d[t][i]["outputs"][j]["text"] ) )
+		# remove images after code

-				dell = []
-				for k in range(  len( d[t][i]["outputs"][j]["text"] )  ):
-					num = re1.search( d[t][i]["outputs"][j]["text"][k] )
-					if num is not None:
-						dell.append(d[t][i]["outputs"][j]["text"][k])
-						print( format(num) +"  " + d[t][i]["outputs"][j]["text"][k] ) # len( d[t][i]["outputs"][j]["text"][k] ) )
-				for dl in dell:
-					d[t][i]["outputs"][j]["text"].remove(dl)
+		if d[t][i]["cell_type"]=="code":
+			#print(d[t][i].keys())
+			#d[t][i]["outputs"] = ""
+			#print(d[t][i]["outputs"])

-				print( format( len( d[t][i]["outputs"][j]["text"] )) + " A")
+			#print(len( d[t][i]["outputs"] ))
+			for j in range(len( d[t][i]["outputs"] )):
+				#print(type( d[t][i]["outputs"][j] ))
+				#print( d[t][i]["outputs"][j].keys() )

-#print(d["cells"])
+				# images
+				if d[t][i]["outputs"][j]["output_type"]=="stream":
+					print(  len( d[t][i]["outputs"][j]["text"] ) )

-with open(fnOut,'w') as fileOut:
-	json.dump(d,fileOut, indent=1, sort_keys=True)
+					dell = [] # collect entries to delete
+					for k in range(  len( d[t][i]["outputs"][j]["text"] )  ):
+						nums = []
+						nums.append( re1.search( d[t][i]["outputs"][j]["text"][k] ) )
+						nums.append( re2.search( d[t][i]["outputs"][j]["text"][k] ) )
+						if (nums[0] is None) and (nums[1] is None):
+							okay = okay+1
+						else: # delete line "dell"
+							deletes = deletes+1
+							dell.append(d[t][i]["outputs"][j]["text"][k])
+							print( format(nums) +"  " + d[t][i]["outputs"][j]["text"][k] ) # len( d[t][i]["outputs"][j]["text"][k] ) )
+
+					for dl in dell:
+						d[t][i]["outputs"][j]["text"].remove(dl)
+
+					print( format( len( d[t][i]["outputs"][j]["text"] )) + " A")
+
+	#print(d["cells"])
+
+	if deletes==0:
+		print("Warning: Nothing found in "+fn+"!")
+		if not os.path.isfile(fnOut):
+			os.rename(fn, fnOut)
+		else:
+			print("Error, both files exist!?")
+			exit(1)
+
+	else:
+		print(" ... writing "+fnOut )
+		with open(fnOut,'w') as fileOut:
+			json.dump(d,fileOut, indent=1, sort_keys=True)