Skip to content

Commit

Permalink
feat: data denoising code
Browse files Browse the repository at this point in the history
  • Loading branch information
MalMyeong committed Jan 27, 2024
1 parent 33cb2bc commit 7067a3e
Showing 1 changed file with 1 addition and 0 deletions.
1 change: 1 addition & 0 deletions code/denoise/denoising_cg.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"cells":[{"cell_type":"markdown","metadata":{"id":"_xnMOsbqHz61"},"source":["# CycleGAN for Document Denoising"]},{"cell_type":"markdown","metadata":{"id":"e1_Y75QXJS6h"},"source":["## Set up the input pipeline"]},{"cell_type":"markdown","metadata":{"id":"5fGHWOKPX4ta"},"source":["Install the [tensorflow_examples](https://github.com/tensorflow/examples) package that enables importing of the generator and the discriminator."]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":12025,"status":"ok","timestamp":1656149639500,"user":{"displayName":"Tianxiang Song","userId":"08920423447324048972"},"user_tz":-60},"id":"bJ1ROiQxJ-vY","outputId":"87ea3f90-e5c4-47f2-df0a-610c2c757e80"},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting git+https://github.com/tensorflow/examples.git\n"," Cloning https://github.com/tensorflow/examples.git to /tmp/pip-req-build-jng76rt3\n"," Running command git clone --filter=blob:none --quiet https://github.com/tensorflow/examples.git /tmp/pip-req-build-jng76rt3\n"," Resolved https://github.com/tensorflow/examples.git to commit fff4bcda7201645a1efaea4534403daf5fc03d42\n"," Preparing metadata (setup.py) ... \u001b[?25ldone\n","\u001b[?25hRequirement already satisfied: absl-py in /opt/conda/envs/denoising/lib/python3.10/site-packages (from tensorflow-examples==0.1703207612.1461250479831370929614362828255168868146460245314) (1.4.0)\n","Requirement already satisfied: six in /opt/conda/envs/denoising/lib/python3.10/site-packages (from tensorflow-examples==0.1703207612.1461250479831370929614362828255168868146460245314) (1.16.0)\n","\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n","\u001b[0m"]}],"source":["!pip install git+https://github.com/tensorflow/examples.git"]},{"cell_type":"markdown","metadata":{"id":"W6h-EBccgb67"},"source":["## Import libraries and data"]},{"cell_type":"code","execution_count":4,"metadata":{"executionInfo":{"elapsed":3798,"status":"ok","timestamp":1656149643261,"user":{"displayName":"Tianxiang Song","userId":"08920423447324048972"},"user_tz":-60},"id":"YfIk2es3hJEd"},"outputs":[{"name":"stderr","output_type":"stream","text":["2024-01-27 22:50:48.658522: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n","2024-01-27 22:50:48.712379: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-01-27 22:50:48.712429: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-01-27 22:50:48.713990: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","2024-01-27 22:50:48.722454: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n","2024-01-27 22:50:48.723596: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2024-01-27 22:50:49.801315: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","/opt/conda/envs/denoising/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).\n"," from pandas.core import (\n"]}],"source":["import tensorflow as tf\n","from tensorflow_examples.models.pix2pix import pix2pix\n","\n","import os\n","import cv2\n","import numpy as np\n","from PIL import Image\n","\n","AUTOTUNE = tf.data.AUTOTUNE"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":280,"status":"ok","timestamp":1656149662493,"user":{"displayName":"Tianxiang Song","userId":"08920423447324048972"},"user_tz":-60},"id":"kcwwQsfEqlfH","outputId":"4b449baf-29ec-449c-a61f-5180fba008af"},"outputs":[{"name":"stdout","output_type":"stream","text":["Sat Jan 27 02:09:17 2024 \n","+-----------------------------------------------------------------------------+\n","| NVIDIA-SMI 470.199.02 Driver Version: 470.199.02 CUDA Version: 11.4 |\n","|-------------------------------+----------------------+----------------------+\n","| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n","| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n","| | | MIG M. |\n","|===============================+======================+======================|\n","| 0 Tesla V100-SXM2... On | 00000000:00:05.0 Off | Off |\n","| N/A 40C P0 38W / 300W | 0MiB / 32510MiB | 0% Default |\n","| | | N/A |\n","+-------------------------------+----------------------+----------------------+\n"," \n","+-----------------------------------------------------------------------------+\n","| Processes: |\n","| GPU GI CI PID Type Process name GPU Memory |\n","| ID ID Usage |\n","|=============================================================================|\n","| No running processes found |\n","+-----------------------------------------------------------------------------+\n"]}],"source":["# check GPU details\n","!nvidia-smi"]},{"cell_type":"code","execution_count":5,"metadata":{"executionInfo":{"elapsed":238,"status":"ok","timestamp":1656149718478,"user":{"displayName":"Tianxiang Song","userId":"08920423447324048972"},"user_tz":-60},"id":"EQ7WV_T2wfqC"},"outputs":[],"source":["# the whole data path\n","path = '/data/ephemeral/home/data/medical/img/'\n","# the directory storing images to be processed\n","to_process_path = 'train/'\n","# the directory storing processed images\n","processed_path = 'processed_train/'\n","# list storing image filenames\n","to_process_img = sorted(os.listdir(path + to_process_path))"]},{"cell_type":"markdown","metadata":{"id":"bzp46qdDhbYB"},"source":["## Data preparation\n","Next step is to define function to process images and then store this images in list. As there is not as many data, we do not need to work in batches."]},{"cell_type":"code","execution_count":6,"metadata":{"executionInfo":{"elapsed":3,"status":"ok","timestamp":1656149720905,"user":{"displayName":"Tianxiang Song","userId":"08920423447324048972"},"user_tz":-60},"id":"Ss9qxw4twr4W"},"outputs":[],"source":["IMG_WIDTH = 3072\n","IMG_HEIGHT = 4096\n","\n","# prepare function\n","def process_image(path):\n"," img = cv2.imread(path)\n"," img = np.asarray(img, dtype=\"float32\")\n"," original_size = img.shape[:2]\n"," img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))\n"," img = img/255.0\n"," img = np.reshape(img, (IMG_HEIGHT, IMG_WIDTH, 3))\n"," return img, original_size"]},{"cell_type":"code","execution_count":7,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":33630,"status":"ok","timestamp":1656149758576,"user":{"displayName":"Tianxiang Song","userId":"08920423447324048972"},"user_tz":-60},"id":"WaKiQuZmww5Y","outputId":"0b8a13a3-0249-4257-9287-c2b81a944bf3"},"outputs":[],"source":["# preprocess images\n","chinese_invoice = []\n","original_sizes = []\n","image_files = [path + to_process_path + f for f in to_process_img]\n","\n","\n","for f in image_files:\n"," img, orig_size = process_image(f)\n"," chinese_invoice.append(img)\n"," original_sizes.append(orig_size)\n","\n","chinese_invoice = np.asarray(chinese_invoice)"]},{"cell_type":"markdown","metadata":{"id":"hvX8sKsfMaio"},"source":["## Import and reuse the Pix2Pix models"]},{"cell_type":"code","execution_count":8,"metadata":{"executionInfo":{"elapsed":5337,"status":"ok","timestamp":1656149766356,"user":{"displayName":"Tianxiang Song","userId":"08920423447324048972"},"user_tz":-60},"id":"8ju9Wyw87MRW"},"outputs":[{"name":"stderr","output_type":"stream","text":["2024-01-27 22:53:29.755523: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n","2024-01-27 22:53:29.756999: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n","Skipping registering GPU devices...\n"]}],"source":["OUTPUT_CHANNELS = 3\n","\n","generator_g = pix2pix.unet_generator(OUTPUT_CHANNELS, norm_type='instancenorm')\n","generator_f = pix2pix.unet_generator(OUTPUT_CHANNELS, norm_type='instancenorm')\n","\n","discriminator_x = pix2pix.discriminator(norm_type='instancenorm', target=False)\n","discriminator_y = pix2pix.discriminator(norm_type='instancenorm', target=False)"]},{"cell_type":"markdown","metadata":{"id":"G-vjRM7IffTT"},"source":["Initialize the optimizers for all the generators and the discriminators."]},{"cell_type":"code","execution_count":9,"metadata":{"executionInfo":{"elapsed":15,"status":"ok","timestamp":1656149766357,"user":{"displayName":"Tianxiang Song","userId":"08920423447324048972"},"user_tz":-60},"id":"iWCn_PVdEJZ7"},"outputs":[],"source":["generator_g_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)\n","generator_f_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)\n","\n","discriminator_x_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)\n","discriminator_y_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)"]},{"cell_type":"markdown","metadata":{"id":"aKUZnDiqQrAh"},"source":["## Checkpoints"]},{"cell_type":"code","execution_count":10,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":15383,"status":"ok","timestamp":1656149782878,"user":{"displayName":"Tianxiang Song","userId":"08920423447324048972"},"user_tz":-60},"id":"WJnftd5sQsv6","outputId":"769115e2-d5e6-4b28-9ce6-3bd0be7522dd"},"outputs":[{"name":"stdout","output_type":"stream","text":["Latest checkpoint restored!!\n"]}],"source":["checkpoint_path = \"/data/ephemeral/home/DocumentDenoise/checkpoints/cycleGAN\"\n","\n","ckpt = tf.train.Checkpoint(generator_g=generator_g,\n"," generator_f=generator_f,\n"," discriminator_x=discriminator_x,\n"," discriminator_y=discriminator_y,\n"," generator_g_optimizer=generator_g_optimizer,\n"," generator_f_optimizer=generator_f_optimizer,\n"," discriminator_x_optimizer=discriminator_x_optimizer,\n"," discriminator_y_optimizer=discriminator_y_optimizer)\n","\n","ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=1)\n","\n","# if a checkpoint exists, restore the latest checkpoint.\n","if ckpt_manager.latest_checkpoint:\n"," ckpt.restore(ckpt_manager.latest_checkpoint)\n"," print ('Latest checkpoint restored!!')"]},{"cell_type":"markdown","metadata":{"id":"1RGysMU_BZhx"},"source":["## Denoising and Save images"]},{"cell_type":"code","execution_count":11,"metadata":{"executionInfo":{"elapsed":235,"status":"ok","timestamp":1656149785484,"user":{"displayName":"Tianxiang Song","userId":"08920423447324048972"},"user_tz":-60},"id":"CTDYhlLueXr3"},"outputs":[],"source":["def tensor_to_image(tensor):\n"," tensor = tensor*255\n"," tensor = np.array(tensor, dtype=np.uint8)\n"," if np.ndim(tensor)>3:\n"," assert tensor.shape[0] == 1\n"," tensor = tensor[0]\n"," return Image.fromarray(tensor)"]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IIkNJUuMZA5o","outputId":"4769fbc1-7ce3-42df-a36d-fab87f2a18e8"},"outputs":[],"source":["i = 0\n","for m, (image, orig_size) in enumerate(zip(chinese_invoice, original_sizes)) :\n"," prediction = generator_g(image.reshape(1,IMG_HEIGHT, IMG_WIDTH,3))\n"," im_path = path + processed_path + to_process_img[i]\n"," im = tensor_to_image(prediction)\n"," im_np = np.array(im)\n"," orig_height, orig_width = orig_size\n"," im_resized = cv2.resize(im_np, (orig_width, orig_height))\n"," im_final = Image.fromarray(im_resized)\n"," im_final.save(im_path)\n"," i += 1"]},{"cell_type":"markdown","metadata":{"id":"ABGiHY6fE02b"},"source":["## Next steps\n","- Training the model on a larger dataset\n","- Tuning parameters to achieve greater performance\n","- Fine-tuning the models on a different dataset to implement more functions (e.g., watermark removal and motion deblur)"]}],"metadata":{"accelerator":"GPU","colab":{"collapsed_sections":[],"name":"chineseInvoice_cg.ipynb","provenance":[{"file_id":"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/generative/cyclegan.ipynb","timestamp":1655569791492}]},"gpuClass":"standard","kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"}},"nbformat":4,"nbformat_minor":0}

0 comments on commit 7067a3e

Please sign in to comment.