From 0659b3c5d72c66a8254208dff616737974a6dda3 Mon Sep 17 00:00:00 2001
From: sohaibanwaar <sohaibanwaar36@gmail.com>
Date: Wed, 26 Jul 2023 10:52:52 +0500
Subject: [PATCH] add: adding pcd supprt

---
 .gitignore                          |  17 ++
 build/lib/segment_lidar/samlidar.py | 428 ++++++++++++++++++++++++++++
 requirements.txt                    |   1 +
 segment_lidar/pcd.py                | 157 ++++++++++
 segment_lidar/samlidar.py           | 250 +++++++++++-----
 5 files changed, 775 insertions(+), 78 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 build/lib/segment_lidar/samlidar.py
 create mode 100644 segment_lidar/pcd.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..cb32794
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,17 @@
+.flake8
+labeled.tif
+raster.tif
+segmented.las
+segmented1.las
+test.ipynb
+.ipynb_checkpoints/test-checkpoint.ipynb
+.vscode/settings.json
+__pycache__/pcd.cpython-39.pyc
+segment_lidar/__pycache__/__init__.cpython-39.pyc
+segment_lidar/__pycache__/pcd.cpython-39.pyc
+segment_lidar/__pycache__/samlidar.cpython-39.pyc
+segment_lidar.egg-info/dependency_links.txt
+segment_lidar.egg-info/PKG-INFO
+segment_lidar.egg-info/requires.txt
+segment_lidar.egg-info/SOURCES.txt
+segment_lidar.egg-info/top_level.txt
diff --git a/build/lib/segment_lidar/samlidar.py b/build/lib/segment_lidar/samlidar.py
new file mode 100644
index 0000000..f526bb4
--- /dev/null
+++ b/build/lib/segment_lidar/samlidar.py
@@ -0,0 +1,428 @@
+# Copyright (c) 2023 - University of Liège
+# Author : Anass Yarroudh (ayarroudh@uliege.be), Geomatics Unit of ULiege
+# This file is distributed under the BSD-3 licence. See LICENSE file for complete text of the license.
+
+import os
+import CSF
+import time
+import torch
+import numpy as np
+import supervision as sv
+from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
+from samgeo import SamGeo
+from samgeo.text_sam import LangSAM
+from typing import List, Tuple
+import rasterio
+import laspy
+import cv2
+
+
+def cloud_to_image(points: np.ndarray, minx: float, maxx: float, miny: float, maxy: float, resolution: float) -> np.ndarray:
+    """
+    Converts a point cloud to an image.
+
+    :param points: An array of points in the cloud, where each row represents a point.
+                   The array shape can be (N, 3) or (N, 6).
+                   If the shape is (N, 3), each point is assumed to have white color (255, 255, 255).
+                   If the shape is (N, 6), the last three columns represent the RGB color values for each point.
+    :type points: ndarray
+    :param minx: The minimum x-coordinate value of the cloud bounding box.
+    :type minx: float
+    :param maxx: The maximum x-coordinate value of the cloud bounding box.
+    :type maxx: float
+    :param miny: The minimum y-coordinate value of the cloud bounding box.
+    :type miny: float
+    :param maxy: The maximum y-coordinate value of the cloud bounding box.
+    :type maxy: float
+    :param resolution: The resolution of the image in units per pixel.
+    :type resolution: float
+    :return: An image array representing the point cloud, where each pixel contains the RGB color values
+             of the corresponding point in the cloud.
+    :rtype: ndarray
+    :raises ValueError: If the shape of the points array is not valid or if any parameter is invalid.
+    """
+    if points.shape[1] == 3:
+        colors = np.array([255, 255, 255])
+    else:
+        colors = points[:, -3:]
+
+    x = (points[:, 0] - minx) / resolution
+    y = (maxy - points[:, 1]) / resolution
+    pixel_x = np.floor(x).astype(int)
+    pixel_y = np.floor(y).astype(int)
+
+    width = int((maxx - minx) / resolution) + 1
+    height = int((maxy - miny) / resolution) + 1
+
+    image = np.zeros((height, width, 3), dtype=np.uint8)
+    image[pixel_y, pixel_x] = colors
+
+    return image
+
+
+def image_to_cloud(points: np.ndarray, minx: float, maxy: float, image: np.ndarray, resolution: float) -> List[int]:
+    """
+    Converts an image to a point cloud with segment IDs.
+
+    :param points: An array of points representing the cloud, where each row represents a point.
+                   The array shape is (N, 3) where each point contains x, y, and z coordinates.
+    :param minx: The minimum x-coordinate value of the cloud bounding box.
+    :param maxy: The maximum y-coordinate value of the cloud bounding box.
+    :param image: The image array representing the input image.
+    :param resolution: The resolution of the image in units per pixel.
+    :return: A list of segment IDs for each point in the cloud. The segment ID represents the color segment
+             in the input image that the corresponding point belongs to.
+    :rtype: List[int]
+    """
+    segment_ids = []
+    unique_values = {}
+    image = np.asarray(image)
+
+    # Calculate pixel coordinates for all points
+    x = (points[:, 0] - minx) / resolution
+    y = (maxy - points[:, 1]) / resolution
+    pixel_x = np.floor(x).astype(int)
+    pixel_y = np.floor(y).astype(int)
+
+    # Mask points outside image bounds
+    out_of_bounds = (pixel_x < 0) | (pixel_x >= image.shape[1]) | (pixel_y < 0) | (pixel_y >= image.shape[0])
+    segment_ids.extend([-1] * np.sum(out_of_bounds))
+
+    # Extract RGB values for valid points
+    valid_points = ~out_of_bounds
+    rgb = image[pixel_y[valid_points], pixel_x[valid_points]]
+
+    # Map RGB values to unique segment IDs
+    for rgb_val in rgb:
+        if rgb_val not in unique_values:
+            unique_values[rgb_val] = len(unique_values)
+
+        segment_ids.append(unique_values[rgb_val])
+
+    return segment_ids
+
+
+class mask:
+    def __init__(self, crop_n_layers: int = 1, crop_n_points_downscale_factor: int = 1, min_mask_region_area: int = 200, points_per_side: int = 5, pred_iou_thresh: float = 0.90, stability_score_thresh: float = 0.92):
+        """
+        Initializes an instance of the mask class.
+
+        :param crop_n_layers: The number of layers to crop from the top of the image, defaults to 1.
+        :type crop_n_layers: int
+        :param crop_n_points_downscale_factor: The downscale factor for the number of points in the cropped image, defaults to 1.
+        :type crop_n_points_downscale_factor: int
+        :param min_mask_region_area: The minimum area of a mask region, defaults to 1000.
+        :type min_mask_region_area: int
+        :param points_per_side: The number of points per side of the mask region, defaults to 32.
+        :type points_per_side: int
+        :param pred_iou_thresh: The IoU threshold for the predicted mask region, defaults to 0.90.
+        :type pred_iou_thresh: float
+        :param stability_score_thresh: The stability score threshold for the predicted mask region, defaults to 0.92.
+        :type stability_score_thresh: float
+        """
+        self.crop_n_layers = crop_n_layers
+        self.crop_n_points_downscale_factor = crop_n_points_downscale_factor
+        self.min_mask_region_area = min_mask_region_area
+        self.points_per_side = points_per_side
+        self.pred_iou_thresh = pred_iou_thresh
+        self.stability_score_thresh = stability_score_thresh
+
+
+class SamLidar:
+    def __init__(self, ckpt_path: str, algorithm: str = 'segment-geospatial', model_type: str = 'vit_h', resolution: float = 0.25, device: str = 'cuda:0', sam_kwargs: bool = False) -> None:
+        """
+        Initializes an instance of the SamLidar class.
+
+        :param algorithm: The algorithm to use, defaults to 'segment-geospatial'.
+        :type algorithm: str
+        :param model_type: The type of the model, defaults to 'vit_h'.
+        :type model_type: str
+        :param ckpt_path: The path to the model checkpoint.
+        :type ckpt_path: str
+        :param resolution: The resolution value, defaults to 0.25.
+        :type resolution: float
+        :param device: The device to use, defaults to 'cuda:0'.
+        :type device: str
+        :param sam_kwargs: Whether to use the SAM kwargs when using 'segment-geospatial' as algorithm, defaults to False.
+        :type sam_kwargs: bool
+        """
+        self.algorithm = algorithm
+        self.model_type = model_type
+        self.ckpt_path = ckpt_path
+        self.resolution = resolution
+        self.device = torch.device('cuda:0') if device == 'cuda:0' and torch.cuda.is_available() else torch.device('cpu')
+        self.mask = mask()
+
+        if sam_kwargs or algorithm == 'segment-anything':
+            self.mask_generator = SamAutomaticMaskGenerator(
+                model=sam_model_registry[model_type](checkpoint=ckpt_path).to(device=self.device),
+                crop_n_layers=self.mask.crop_n_layers,
+                crop_n_points_downscale_factor=self.mask.crop_n_points_downscale_factor,
+                min_mask_region_area=self.mask.min_mask_region_area,
+                points_per_side=self.mask.points_per_side,
+                pred_iou_thresh=self.mask.pred_iou_thresh,
+                stability_score_thresh=self.mask.stability_score_thresh
+            )
+
+        if sam_kwargs:
+            self.sam_kwargs = {
+                'crop_n_layers': self.mask.crop_n_layers,
+                'crop_n_points_downscale_factor': self.mask.crop_n_points_downscale_factor,
+                'min_mask_region_area': self.mask.min_mask_region_area,
+                'points_per_side': self.mask.points_per_side,
+                'pred_iou_thresh': self.mask.pred_iou_thresh,
+                'stability_score_thresh': self.mask.stability_score_thresh
+            }
+        else:
+            self.sam_kwargs = None
+
+        self.sam_geo = SamGeo(
+            model_type=self.model_type,
+            checkpoint=self.ckpt_path,
+            device=self.device,
+            sam_kwargs=self.sam_kwargs
+        )
+
+        os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
+
+
+
+    def read(self, path: str, classification: int = None) -> np.ndarray:
+        """
+        Reads a point cloud from a file and returns it as a NumPy array.
+
+        :param path: The path to the input file.
+        :type path: str
+        :param classification: The optional classification value to filter the point cloud, defaults to None.
+        :type classification: int, optional
+        :return: The point cloud as a NumPy array.
+        :rtype: np.ndarray
+        :raises ValueError: If the input file format is not supported.
+        """
+        start = time.time()
+        extension = os.path.splitext(path)[1]
+        try:
+            if extension not in ['.laz', '.las', '.npy']:
+                raise ValueError(f'The input file format {extension} is not supported.\nThe file format should be [.las/.laz].')
+        except ValueError as error:
+            message = str(error)
+            lines = message.split('\n')
+            print(lines[-2])
+            print(lines[-1])
+            exit()
+
+        print(f'Reading {path}...')
+
+        if extension == '.npy':
+            points = np.load(path)
+        elif extension in ['.laz', '.las']:
+            las = laspy.read(path)
+            if classification == None:
+                print(f'- Classification value is not provided. Reading all points...')
+                pcd = las.points
+            else:
+                try:
+                    if hasattr(las, 'classification'):
+                        print(f'- Reading points with classification value {classification}...')
+                        pcd = las.points[las.raw_classification == classification]
+                    else:
+                        raise ValueError(f'The input file does not contain classification values.')
+                except ValueError as error:
+                    message = str(error)
+                    lines = message.split('\n')
+                    print(lines[-2])
+                    print(lines[-1])
+                    exit()
+
+            if hasattr(las, 'red') and hasattr(las, 'green') and hasattr(las, 'blue'):
+                print(f'- Reading RGB values...')
+                points = np.vstack((pcd.x, pcd.y, pcd.z, pcd.red / 255.0, pcd.green / 255.0, pcd.blue / 255.0)).transpose()
+            else:
+                print(f'- RGB values are not provided. Reading only XYZ values...')
+                points = np.vstack((pcd.x, pcd.y, pcd.z)).transpose()
+
+        end = time.time()
+        print(f'File reading is completed in {end - start:.2f} seconds. The point cloud contains {points.shape[0]} points.\n')
+        return points
+
+
+    def csf(self, points: np.ndarray, class_threshold: float = 0.5, cloth_resolution: float = 0.2, iterations: int = 500, slope_smooth: bool = False) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Applies the CSF (Cloth Simulation Filter) algorithm to filter ground points in a point cloud.
+
+        :param points: The input point cloud as a NumPy array, where each row represents a point with x, y, z coordinates.
+        :type points: np.ndarray
+        :param class_threshold: The threshold value for classifying points as ground/non-ground, defaults to 0.5.
+        :type class_threshold: float, optional
+        :param cloth_resolution: The resolution value for cloth simulation, defaults to 0.2.
+        :type cloth_resolution: float, optional
+        :param iterations: The number of iterations for the CSF algorithm, defaults to 500.
+        :type iterations: int, optional
+        :param slope_smooth: A boolean indicating whether to enable slope smoothing, defaults to False.
+        :type slope_smooth: bool, optional
+        :return: A tuple containing three arrays: the filtered point cloud, non-ground (filtered) points indinces and ground points indices.
+        :rtype: Tuple[np.ndarray, np.ndarray, np.ndarray]
+        """
+        start = time.time()
+        print(f'Applying CSF algorithm...')
+        csf = CSF.CSF()
+        csf.params.bSloopSmooth = slope_smooth
+        csf.params.cloth_resolution = cloth_resolution
+        csf.params.interations = iterations
+        csf.params.class_threshold = class_threshold
+        csf.setPointCloud(points[:, :3])
+        ground = CSF.VecInt()
+        non_ground = CSF.VecInt()
+        csf.do_filtering(ground, non_ground)
+
+        points = points[non_ground, :]
+        os.remove('cloth_nodes.txt')
+
+        end = time.time()
+        print(f'CSF algorithm is completed in {end - start:.2f} seconds. The filtered non-ground cloud contains {points.shape[0]} points.\n')
+
+        return points, np.asarray(non_ground), np.asarray(ground)
+
+
+    def segment(self, points: np.ndarray, text_prompt: str = None, image_path: str = 'raster.tif', labels_path: str = 'labeled.tif') -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Segments a point cloud based on the provided parameters and returns the segment IDs, original image, and segmented image.
+
+        :param points: The point cloud data as a NumPy array.
+        :type points: np.ndarray
+        :param text_prompt: Optional text prompt for segment generation, defaults to None.
+        :type text_prompt: str
+        :param image_path: Path to the input raster image, defaults to 'raster.tif'.
+        :type image_path: str
+        :param labels_path: Path to save the labeled output image, defaults to 'labeled.tif'.
+        :type labels_path: str
+        :return: A tuple containing the segment IDs, segmented image, and RGB image.
+        :rtype: Tuple[np.ndarray, np.ndarray, np.ndarray]
+        """
+        start = time.time()
+        print(f'Segmenting the point cloud...')
+        minx = np.min(points[:, 0])
+        maxx = np.max(points[:, 0])
+        miny = np.min(points[:, 1])
+        maxy = np.max(points[:, 1])
+
+        print(f'- Generating raster image...')
+        image = cloud_to_image(points, minx, maxx, miny, maxy, self.resolution)
+        image = np.asarray(image).astype(np.uint8)
+
+        print(f'- Saving raster image...')
+        with rasterio.open(
+            image_path,
+            'w',
+            driver='GTiff',
+            width=image.shape[1],
+            height=image.shape[0],
+            count=3,
+            dtype=image.dtype
+        ) as dst:
+            for i in range(3):
+                dst.write(image[:, :, i], i + 1)
+
+        with rasterio.open(image_path, 'r') as src:
+            image_rgb = src.read()
+
+        print(f'- Applying {self.algorithm} to raster image...')
+        if self.algorithm == 'segment-anything':
+            sam = sam_model_registry[self.model_type](checkpoint=self.ckpt_path)
+            sam.to(self.device)
+
+            image_rgb = image_rgb.transpose(1, 2, 0)
+            result = self.mask_generator.generate(image_rgb)
+            mask_annotator = sv.MaskAnnotator()
+            detections = sv.Detections.from_sam(result)
+            num_masks, height, width = detections.mask.shape
+            segmented_image = np.zeros((height, width), dtype=np.uint8)
+            for i in range(num_masks):
+                mask = detections.mask[i]
+                segmented_image[mask] = i
+
+            print(f'- Saving segmented image...')
+            with rasterio.open(
+                labels_path,
+                'w',
+                driver='GTiff',
+                width=segmented_image.shape[1],
+                height=segmented_image.shape[0],
+                count=1,
+                dtype=segmented_image.dtype
+            ) as dst:
+                dst.write(segmented_image, 1)
+
+
+        elif self.algorithm == 'segment-geospatial':
+            if text_prompt is not None:
+                print(f'- Generating labels using text prompt...')
+                sam = LangSAM()
+                sam.predict(image=image_path, text_prompt=text_prompt, box_threshold=0.24, text_threshold=0.3, output=labels_path)
+                print(f'- Saving segmented image...')
+            else:
+                sam = self.sam_geo
+                sam.generate(source=image_path, output=labels_path, erosion_kernel=(3, 3), foreground=True, unique=True)
+                print(f'- Saving segmented image...')
+
+        with rasterio.open(labels_path, 'r') as src:
+            segmented_image = src.read()
+            segmented_image = np.squeeze(segmented_image)
+
+        print(f'- Generating segment IDs...')
+        segment_ids = image_to_cloud(points, minx, maxy, segmented_image, self.resolution)
+        end = time.time()
+
+        print(f'Segmentation is completed in {end - start:.2f} seconds. Number of instances: {np.max(segmented_image)}\n')
+        return segment_ids, segmented_image, image_rgb
+
+
+    def write(self, points: np.ndarray, segment_ids: np.ndarray, non_ground: np.ndarray = None, ground: np.ndarray = None, save_path: str = 'segmented.las') -> None:
+        """
+        Writes the segmented point cloud data to a LAS/LAZ file.
+
+        :param points: The input point cloud data as a NumPy array, where each row represents a point with x, y, z coordinates.
+        :type points: np.ndarray
+        :param segment_ids: The segment IDs corresponding to each point in the point cloud.
+        :type segment_ids: np.ndarray
+        :param non_ground: Optional array of indices for non-ground points in the original point cloud (default: None).
+        :type non_ground: np.ndarray, optional
+        :param ground: Optional array of indices for ground points in the original point cloud (default: None).
+        :type ground: np.ndarray, optional
+        :param save_path: The path to save the segmented LAS/LAZ file (default: 'segmented.las').
+        :type save_path: str, optional
+        :return: None
+        """
+        start = time.time()
+        extension = os.path.splitext(save_path)[1]
+        try:
+            if extension not in ['.laz', '.las']:
+                raise ValueError(f'The input file format {extension} is not supported.\nThe file format should be [.las/.laz].')
+        except ValueError as error:
+            message = str(error)
+            lines = message.split('\n')
+            print(lines[-2])
+            print(lines[-1])
+            exit()
+
+        print(f'Writing the segmented point cloud to {save_path}...')
+
+        header = laspy.LasHeader(point_format=3, version="1.3")
+        lidar = laspy.LasData(header=header)
+
+        if ground is not None:
+            indices = np.concatenate((non_ground, ground))
+            lidar.xyz = points[indices]
+            segment_ids = np.append(segment_ids, np.full(len(ground), -1))
+            lidar.add_extra_dim(laspy.ExtraBytesParams(name="segment_id", type=np.int32))
+            lidar.segment_id = segment_ids
+        else:
+            lidar.xyz = points
+            lidar.add_extra_dim(laspy.ExtraBytesParams(name="segment_id", type=np.int32))
+            lidar.segment_id = segment_ids
+
+        lidar.write(save_path)
+
+        end = time.time()
+        print(f'Writing is completed in {end - start:.2f} seconds.\n')
+        return None
diff --git a/requirements.txt b/requirements.txt
index c880cd9..bc4260c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ segment-geospatial==0.8.1
 rasterio==1.3.7
 laspy==2.4.1
 laszip==0.2.3
+ply-py
\ No newline at end of file
diff --git a/segment_lidar/pcd.py b/segment_lidar/pcd.py
new file mode 100644
index 0000000..857bd73
--- /dev/null
+++ b/segment_lidar/pcd.py
@@ -0,0 +1,157 @@
+import re
+import struct
+import warnings
+
+import numpy as np
+import pandas as pd
+
+numpy_pcd_type_mappings = [(np.dtype('float32'), ('F', 4)),
+                           (np.dtype('float64'), ('F', 8)),
+                           (np.dtype('uint8'), ('U', 1)),
+                           (np.dtype('uint16'), ('U', 2)),
+                           (np.dtype('uint32'), ('U', 4)),
+                           (np.dtype('uint64'), ('U', 8)),
+                           (np.dtype('int16'), ('I', 2)),
+                           (np.dtype('int32'), ('I', 4)),
+                           (np.dtype('int64'), ('I', 8))]
+numpy_type_to_pcd_type = dict(numpy_pcd_type_mappings)
+pcd_type_to_numpy_type = dict((q, p) for (p, q) in numpy_pcd_type_mappings)
+
+
+def parse_header(lines):
+    metadata = {}
+    for ln in lines:
+        if ln.startswith('#') or len(ln) < 2:
+            continue
+        match = re.match(r'(\w+)\s+([\w\s\.]+)', ln)
+        if not match:
+            warnings.warn("warning: can't understand line: %s" % ln)
+            continue
+        key, value = match.group(1).lower(), match.group(2)
+        if key == 'version':
+            metadata[key] = value
+        elif key in ('fields', 'type'):
+            metadata[key] = value.split()
+        elif key in ('size', 'count'):
+            metadata[key] = map(int, value.split())
+        elif key in ('width', 'height', 'points'):
+            metadata[key] = int(value)
+        elif key == 'viewpoint':
+            metadata[key] = map(float, value.split())
+        elif key == 'data':
+            metadata[key] = value.strip().lower()
+        # TODO apparently count is not required?
+    # add some reasonable defaults
+    if 'count' not in metadata:
+        metadata['count'] = [1] * len(metadata['fields'])
+    if 'viewpoint' not in metadata:
+        metadata['viewpoint'] = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
+    if 'version' not in metadata:
+        metadata['version'] = '.7'
+    return metadata
+
+
+def build_dtype(metadata):
+    """ build numpy structured array dtype from pcl metadata.
+    note that fields with count > 1 are 'flattened' by creating multiple
+    single-count fields.
+    TODO: allow 'proper' multi-count fields.
+    """
+    fieldnames = []
+    typenames = []
+    for f, c, t, s in zip(metadata['fields'],
+                          metadata['count'],
+                          metadata['type'],
+                          metadata['size']):
+        np_type = pcd_type_to_numpy_type[(t, s)]
+        if c == 1:
+            fieldnames.append(f)
+            typenames.append(np_type)
+        else:
+            fieldnames.extend(['%s_%04d' % (f, i) for i in range(c)])
+            typenames.extend([np_type] * c)
+
+    dtype = np.dtype(list(zip(fieldnames, typenames)))
+    return dtype
+
+
+def read_pcd(filename):
+    """ Reads and pcd file and return the elements as pandas Dataframes.
+
+    Parameters
+    ----------
+    filename: str
+        Path to the pcd file.
+
+    Returns
+    -------
+    pandas Dataframe.
+
+    """
+    data = {}
+    with open(filename, 'rb') as f:
+        header = []
+        while True:
+            ln = f.readline().strip().decode()
+            header.append(ln)
+            if ln.startswith('DATA'):
+                metadata = parse_header(header)
+                dtype = build_dtype(metadata)
+                break
+
+        if metadata['data'] == 'ascii':
+            pc_data = np.loadtxt(f, dtype=dtype, delimiter=' ')
+
+        elif metadata['data'] == 'binary':
+            rowstep = metadata['points'] * dtype.itemsize
+            # for some reason pcl adds empty space at the end of files
+            buf = f.read(rowstep)
+
+            pc_data = np.fromstring(buf, dtype=dtype)
+
+        elif metadata['data'] == 'binary_compressed':
+            raise NotImplementedError("Go ask PCD why they use lzf compression.")
+            # compressed size of data (uint32)
+            # uncompressed size of data (uint32)
+            # compressed data
+            # junk
+            fmt = 'II'
+            compressed_size, uncompressed_size =\
+                struct.unpack(fmt, f.read(struct.calcsize(fmt)))
+            compressed_data = f.read(compressed_size)
+            # TODO what to use as second argument? if buf is None
+            # (compressed > uncompressed)
+            # should we read buf as raw binary?
+            #buf = lzf.decompress(compressed_data, uncompressed_size)
+            if len(buf) != uncompressed_size:
+                raise Exception('Error decompressing data')
+            # the data is stored field-by-field
+            pc_data = np.zeros(metadata['width'], dtype=dtype)
+            ix = 0
+            for dti in range(len(dtype)):
+                dt = dtype[dti]
+                bytes = dt.itemsize * metadata['width']
+                column = np.fromstring(buf[ix:(ix + bytes)], dt)
+                pc_data[dtype.names[dti]] = column
+                ix += bytes
+
+    df = pd.DataFrame(pc_data)
+
+    # check if dataframe contains color info
+    col = 'rgb'
+    if col in df.columns:
+        # get the 'rgb' column from dataframe
+        packed_rgb = df.rgb.values
+        # 'rgb' values are stored as float
+        # treat them as int
+        packed_rgb = packed_rgb.astype(np.float32).tostring()
+        packed_rgb = np.frombuffer(packed_rgb, dtype=int32)
+        # unpack 'rgb' into 'red', 'green' and 'blue' channel
+        df['red'] = np.asarray((packed_rgb >> 16) & 255, dtype=np.uint8)
+        df['green'] = np.asarray((packed_rgb >> 8) & 255, dtype=np.uint8)
+        df['blue'] = np.asarray(packed_rgb & 255, dtype=np.uint8)
+        # remove packed rgb since we don't need it anymore
+        df.drop(col, axis=1, inplace=True)
+
+    data['points'] = df
+    return data
diff --git a/segment_lidar/samlidar.py b/segment_lidar/samlidar.py
index f526bb4..ae0ceb4 100644
--- a/segment_lidar/samlidar.py
+++ b/segment_lidar/samlidar.py
@@ -14,10 +14,17 @@
 from typing import List, Tuple
 import rasterio
 import laspy
-import cv2
+from segment_lidar.pcd import read_pcd
 
 
-def cloud_to_image(points: np.ndarray, minx: float, maxx: float, miny: float, maxy: float, resolution: float) -> np.ndarray:
+def cloud_to_image(
+    points: np.ndarray,
+    minx: float,
+    maxx: float,
+    miny: float,
+    maxy: float,
+    resolution: float,
+) -> np.ndarray:
     """
     Converts a point cloud to an image.
 
@@ -60,7 +67,9 @@ def cloud_to_image(points: np.ndarray, minx: float, maxx: float, miny: float, ma
     return image
 
 
-def image_to_cloud(points: np.ndarray, minx: float, maxy: float, image: np.ndarray, resolution: float) -> List[int]:
+def image_to_cloud(
+    points: np.ndarray, minx: float, maxy: float, image: np.ndarray, resolution: float
+) -> List[int]:
     """
     Converts an image to a point cloud with segment IDs.
 
@@ -85,7 +94,12 @@ def image_to_cloud(points: np.ndarray, minx: float, maxy: float, image: np.ndarr
     pixel_y = np.floor(y).astype(int)
 
     # Mask points outside image bounds
-    out_of_bounds = (pixel_x < 0) | (pixel_x >= image.shape[1]) | (pixel_y < 0) | (pixel_y >= image.shape[0])
+    out_of_bounds = (
+        (pixel_x < 0)
+        | (pixel_x >= image.shape[1])
+        | (pixel_y < 0)
+        | (pixel_y >= image.shape[0])
+    )
     segment_ids.extend([-1] * np.sum(out_of_bounds))
 
     # Extract RGB values for valid points
@@ -103,7 +117,15 @@ def image_to_cloud(points: np.ndarray, minx: float, maxy: float, image: np.ndarr
 
 
 class mask:
-    def __init__(self, crop_n_layers: int = 1, crop_n_points_downscale_factor: int = 1, min_mask_region_area: int = 200, points_per_side: int = 5, pred_iou_thresh: float = 0.90, stability_score_thresh: float = 0.92):
+    def __init__(
+        self,
+        crop_n_layers: int = 1,
+        crop_n_points_downscale_factor: int = 1,
+        min_mask_region_area: int = 200,
+        points_per_side: int = 5,
+        pred_iou_thresh: float = 0.90,
+        stability_score_thresh: float = 0.92,
+    ):
         """
         Initializes an instance of the mask class.
 
@@ -129,7 +151,15 @@ def __init__(self, crop_n_layers: int = 1, crop_n_points_downscale_factor: int =
 
 
 class SamLidar:
-    def __init__(self, ckpt_path: str, algorithm: str = 'segment-geospatial', model_type: str = 'vit_h', resolution: float = 0.25, device: str = 'cuda:0', sam_kwargs: bool = False) -> None:
+    def __init__(
+        self,
+        ckpt_path: str,
+        algorithm: str = "segment-geospatial",
+        model_type: str = "vit_h",
+        resolution: float = 0.25,
+        device: str = "cuda:0",
+        sam_kwargs: bool = False,
+    ) -> None:
         """
         Initializes an instance of the SamLidar class.
 
@@ -150,28 +180,34 @@ def __init__(self, ckpt_path: str, algorithm: str = 'segment-geospatial', model_
         self.model_type = model_type
         self.ckpt_path = ckpt_path
         self.resolution = resolution
-        self.device = torch.device('cuda:0') if device == 'cuda:0' and torch.cuda.is_available() else torch.device('cpu')
+        self.device = (
+            torch.device("cuda:0")
+            if device == "cuda:0" and torch.cuda.is_available()
+            else torch.device("cpu")
+        )
         self.mask = mask()
 
-        if sam_kwargs or algorithm == 'segment-anything':
+        if sam_kwargs or algorithm == "segment-anything":
             self.mask_generator = SamAutomaticMaskGenerator(
-                model=sam_model_registry[model_type](checkpoint=ckpt_path).to(device=self.device),
+                model=sam_model_registry[model_type](checkpoint=ckpt_path).to(
+                    device=self.device
+                ),
                 crop_n_layers=self.mask.crop_n_layers,
                 crop_n_points_downscale_factor=self.mask.crop_n_points_downscale_factor,
                 min_mask_region_area=self.mask.min_mask_region_area,
                 points_per_side=self.mask.points_per_side,
                 pred_iou_thresh=self.mask.pred_iou_thresh,
-                stability_score_thresh=self.mask.stability_score_thresh
+                stability_score_thresh=self.mask.stability_score_thresh,
             )
 
         if sam_kwargs:
             self.sam_kwargs = {
-                'crop_n_layers': self.mask.crop_n_layers,
-                'crop_n_points_downscale_factor': self.mask.crop_n_points_downscale_factor,
-                'min_mask_region_area': self.mask.min_mask_region_area,
-                'points_per_side': self.mask.points_per_side,
-                'pred_iou_thresh': self.mask.pred_iou_thresh,
-                'stability_score_thresh': self.mask.stability_score_thresh
+                "crop_n_layers": self.mask.crop_n_layers,
+                "crop_n_points_downscale_factor": self.mask.crop_n_points_downscale_factor,
+                "min_mask_region_area": self.mask.min_mask_region_area,
+                "points_per_side": self.mask.points_per_side,
+                "pred_iou_thresh": self.mask.pred_iou_thresh,
+                "stability_score_thresh": self.mask.stability_score_thresh,
             }
         else:
             self.sam_kwargs = None
@@ -180,12 +216,10 @@ def __init__(self, ckpt_path: str, algorithm: str = 'segment-geospatial', model_
             model_type=self.model_type,
             checkpoint=self.ckpt_path,
             device=self.device,
-            sam_kwargs=self.sam_kwargs
+            sam_kwargs=self.sam_kwargs,
         )
 
-        os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
-
-
+        os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
 
     def read(self, path: str, classification: int = None) -> np.ndarray:
         """
@@ -202,51 +236,77 @@ def read(self, path: str, classification: int = None) -> np.ndarray:
         start = time.time()
         extension = os.path.splitext(path)[1]
         try:
-            if extension not in ['.laz', '.las', '.npy']:
-                raise ValueError(f'The input file format {extension} is not supported.\nThe file format should be [.las/.laz].')
+            if extension not in [".laz", ".las", ".npy", ".pcd"]:
+                raise ValueError(
+                    f"The input file format {extension} is not supported.\nThe file format should be [.las/.laz]."
+                )
         except ValueError as error:
             message = str(error)
-            lines = message.split('\n')
+            lines = message.split("\n")
             print(lines[-2])
             print(lines[-1])
             exit()
 
-        print(f'Reading {path}...')
+        print(f"Reading {path}...")
 
-        if extension == '.npy':
+        if extension == ".npy":
             points = np.load(path)
-        elif extension in ['.laz', '.las']:
+        elif extension in [".laz", ".las"]:
             las = laspy.read(path)
-            if classification == None:
-                print(f'- Classification value is not provided. Reading all points...')
+            if classification is None:
+                print("- Classification value is not provided. Reading all points...")
                 pcd = las.points
             else:
                 try:
-                    if hasattr(las, 'classification'):
-                        print(f'- Reading points with classification value {classification}...')
+                    if hasattr(las, "classification"):
+                        print(
+                            "- Reading points with classification value {classification}..."
+                        )
                         pcd = las.points[las.raw_classification == classification]
                     else:
-                        raise ValueError(f'The input file does not contain classification values.')
+                        raise ValueError(
+                            "The input file does not contain classification values."
+                        )
                 except ValueError as error:
                     message = str(error)
-                    lines = message.split('\n')
+                    lines = message.split("\n")
                     print(lines[-2])
                     print(lines[-1])
                     exit()
 
-            if hasattr(las, 'red') and hasattr(las, 'green') and hasattr(las, 'blue'):
-                print(f'- Reading RGB values...')
-                points = np.vstack((pcd.x, pcd.y, pcd.z, pcd.red / 255.0, pcd.green / 255.0, pcd.blue / 255.0)).transpose()
+            if hasattr(las, "red") and hasattr(las, "green") and hasattr(las, "blue"):
+                print("- Reading RGB values...")
+                points = np.vstack(
+                    (
+                        pcd.x,
+                        pcd.y,
+                        pcd.z,
+                        pcd.red / 255.0,
+                        pcd.green / 255.0,
+                        pcd.blue / 255.0,
+                    )
+                ).transpose()
             else:
-                print(f'- RGB values are not provided. Reading only XYZ values...')
+                print("- RGB values are not provided. Reading only XYZ values...")
                 points = np.vstack((pcd.x, pcd.y, pcd.z)).transpose()
+        elif extension in [".pcd"]:
+            pcd_file = read_pcd(path)
+            points = pcd_file["points"][["x", "y", "z"]].values
 
         end = time.time()
-        print(f'File reading is completed in {end - start:.2f} seconds. The point cloud contains {points.shape[0]} points.\n')
+        print(
+            f"File reading is completed in {end - start:.2f} seconds. The point cloud contains {points.shape[0]} points.\n"
+        )
         return points
 
-
-    def csf(self, points: np.ndarray, class_threshold: float = 0.5, cloth_resolution: float = 0.2, iterations: int = 500, slope_smooth: bool = False) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    def csf(
+        self,
+        points: np.ndarray,
+        class_threshold: float = 0.5,
+        cloth_resolution: float = 0.2,
+        iterations: int = 500,
+        slope_smooth: bool = False,
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Applies the CSF (Cloth Simulation Filter) algorithm to filter ground points in a point cloud.
 
@@ -264,7 +324,7 @@ def csf(self, points: np.ndarray, class_threshold: float = 0.5, cloth_resolution
         :rtype: Tuple[np.ndarray, np.ndarray, np.ndarray]
         """
         start = time.time()
-        print(f'Applying CSF algorithm...')
+        print("Applying CSF algorithm...")
         csf = CSF.CSF()
         csf.params.bSloopSmooth = slope_smooth
         csf.params.cloth_resolution = cloth_resolution
@@ -276,15 +336,22 @@ def csf(self, points: np.ndarray, class_threshold: float = 0.5, cloth_resolution
         csf.do_filtering(ground, non_ground)
 
         points = points[non_ground, :]
-        os.remove('cloth_nodes.txt')
+        os.remove("cloth_nodes.txt")
 
         end = time.time()
-        print(f'CSF algorithm is completed in {end - start:.2f} seconds. The filtered non-ground cloud contains {points.shape[0]} points.\n')
+        print(
+            f"CSF algorithm is completed in {end - start:.2f} seconds. The filtered non-ground cloud contains {points.shape[0]} points.\n"
+        )
 
         return points, np.asarray(non_ground), np.asarray(ground)
 
-
-    def segment(self, points: np.ndarray, text_prompt: str = None, image_path: str = 'raster.tif', labels_path: str = 'labeled.tif') -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    def segment(
+        self,
+        points: np.ndarray,
+        text_prompt: str = None,
+        image_path: str = "raster.tif",
+        labels_path: str = "labeled.tif",
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Segments a point cloud based on the provided parameters and returns the segment IDs, original image, and segmented image.
 
@@ -300,34 +367,34 @@ def segment(self, points: np.ndarray, text_prompt: str = None, image_path: str =
         :rtype: Tuple[np.ndarray, np.ndarray, np.ndarray]
         """
         start = time.time()
-        print(f'Segmenting the point cloud...')
+        print(f"Segmenting the point cloud...")
         minx = np.min(points[:, 0])
         maxx = np.max(points[:, 0])
         miny = np.min(points[:, 1])
         maxy = np.max(points[:, 1])
 
-        print(f'- Generating raster image...')
+        print(f"- Generating raster image...")
         image = cloud_to_image(points, minx, maxx, miny, maxy, self.resolution)
         image = np.asarray(image).astype(np.uint8)
 
-        print(f'- Saving raster image...')
+        print(f"- Saving raster image...")
         with rasterio.open(
             image_path,
-            'w',
-            driver='GTiff',
+            "w",
+            driver="GTiff",
             width=image.shape[1],
             height=image.shape[0],
             count=3,
-            dtype=image.dtype
+            dtype=image.dtype,
         ) as dst:
             for i in range(3):
                 dst.write(image[:, :, i], i + 1)
 
-        with rasterio.open(image_path, 'r') as src:
+        with rasterio.open(image_path, "r") as src:
             image_rgb = src.read()
 
-        print(f'- Applying {self.algorithm} to raster image...')
-        if self.algorithm == 'segment-anything':
+        print(f"- Applying {self.algorithm} to raster image...")
+        if self.algorithm == "segment-anything":
             sam = sam_model_registry[self.model_type](checkpoint=self.ckpt_path)
             sam.to(self.device)
 
@@ -341,43 +408,64 @@ def segment(self, points: np.ndarray, text_prompt: str = None, image_path: str =
                 mask = detections.mask[i]
                 segmented_image[mask] = i
 
-            print(f'- Saving segmented image...')
+            print(f"- Saving segmented image...")
             with rasterio.open(
                 labels_path,
-                'w',
-                driver='GTiff',
+                "w",
+                driver="GTiff",
                 width=segmented_image.shape[1],
                 height=segmented_image.shape[0],
                 count=1,
-                dtype=segmented_image.dtype
+                dtype=segmented_image.dtype,
             ) as dst:
                 dst.write(segmented_image, 1)
 
-
-        elif self.algorithm == 'segment-geospatial':
+        elif self.algorithm == "segment-geospatial":
             if text_prompt is not None:
-                print(f'- Generating labels using text prompt...')
+                print(f"- Generating labels using text prompt...")
                 sam = LangSAM()
-                sam.predict(image=image_path, text_prompt=text_prompt, box_threshold=0.24, text_threshold=0.3, output=labels_path)
-                print(f'- Saving segmented image...')
+                sam.predict(
+                    image=image_path,
+                    text_prompt=text_prompt,
+                    box_threshold=0.24,
+                    text_threshold=0.3,
+                    output=labels_path,
+                )
+                print(f"- Saving segmented image...")
             else:
                 sam = self.sam_geo
-                sam.generate(source=image_path, output=labels_path, erosion_kernel=(3, 3), foreground=True, unique=True)
-                print(f'- Saving segmented image...')
-
-        with rasterio.open(labels_path, 'r') as src:
+                sam.generate(
+                    source=image_path,
+                    output=labels_path,
+                    erosion_kernel=(3, 3),
+                    foreground=True,
+                    unique=True,
+                )
+                print(f"- Saving segmented image...")
+
+        with rasterio.open(labels_path, "r") as src:
             segmented_image = src.read()
             segmented_image = np.squeeze(segmented_image)
 
-        print(f'- Generating segment IDs...')
-        segment_ids = image_to_cloud(points, minx, maxy, segmented_image, self.resolution)
+        print(f"- Generating segment IDs...")
+        segment_ids = image_to_cloud(
+            points, minx, maxy, segmented_image, self.resolution
+        )
         end = time.time()
 
-        print(f'Segmentation is completed in {end - start:.2f} seconds. Number of instances: {np.max(segmented_image)}\n')
+        print(
+            f"Segmentation is completed in {end - start:.2f} seconds. Number of instances: {np.max(segmented_image)}\n"
+        )
         return segment_ids, segmented_image, image_rgb
 
-
-    def write(self, points: np.ndarray, segment_ids: np.ndarray, non_ground: np.ndarray = None, ground: np.ndarray = None, save_path: str = 'segmented.las') -> None:
+    def write(
+        self,
+        points: np.ndarray,
+        segment_ids: np.ndarray,
+        non_ground: np.ndarray = None,
+        ground: np.ndarray = None,
+        save_path: str = "segmented.las",
+    ) -> None:
         """
         Writes the segmented point cloud data to a LAS/LAZ file.
 
@@ -396,16 +484,18 @@ def write(self, points: np.ndarray, segment_ids: np.ndarray, non_ground: np.ndar
         start = time.time()
         extension = os.path.splitext(save_path)[1]
         try:
-            if extension not in ['.laz', '.las']:
-                raise ValueError(f'The input file format {extension} is not supported.\nThe file format should be [.las/.laz].')
+            if extension not in [".laz", ".las"]:
+                raise ValueError(
+                    f"The input file format {extension} is not supported.\nThe file format should be [.las/.laz]."
+                )
         except ValueError as error:
             message = str(error)
-            lines = message.split('\n')
+            lines = message.split("\n")
             print(lines[-2])
             print(lines[-1])
             exit()
 
-        print(f'Writing the segmented point cloud to {save_path}...')
+        print(f"Writing the segmented point cloud to {save_path}...")
 
         header = laspy.LasHeader(point_format=3, version="1.3")
         lidar = laspy.LasData(header=header)
@@ -414,15 +504,19 @@ def write(self, points: np.ndarray, segment_ids: np.ndarray, non_ground: np.ndar
             indices = np.concatenate((non_ground, ground))
             lidar.xyz = points[indices]
             segment_ids = np.append(segment_ids, np.full(len(ground), -1))
-            lidar.add_extra_dim(laspy.ExtraBytesParams(name="segment_id", type=np.int32))
+            lidar.add_extra_dim(
+                laspy.ExtraBytesParams(name="segment_id", type=np.int32)
+            )
             lidar.segment_id = segment_ids
         else:
             lidar.xyz = points
-            lidar.add_extra_dim(laspy.ExtraBytesParams(name="segment_id", type=np.int32))
+            lidar.add_extra_dim(
+                laspy.ExtraBytesParams(name="segment_id", type=np.int32)
+            )
             lidar.segment_id = segment_ids
 
         lidar.write(save_path)
 
         end = time.time()
-        print(f'Writing is completed in {end - start:.2f} seconds.\n')
+        print(f"Writing is completed in {end - start:.2f} seconds.\n")
         return None