Commit 6809573d authored by Vincent POLLET's avatar Vincent POLLET
Browse files

Merge branch 'calibration' into 'master'

Calibration

See merge request !19
parents 950ae227 6d75d498
Pipeline #52539 passed with stages
in 16 minutes and 42 seconds
This diff is collapsed.
......@@ -20,8 +20,7 @@ def load_camera_config(filepath, camera_name=None):
R = np.array(camera_config['relative_rotation'])
T = np.array(camera_config['relative_translation'])
markers = np.array(camera_config['markers'])
# TODO
assert camera_config['reference'] == 'nir_left'
camera_list[name] = Camera(camera_matrix, dist_coeffs, R, T, markers)
if camera_name is not None:
return camera_list[camera_name]
......
{
"capture_directory" : "calib_trim/",
"stream_config" : "config/minibatl_data_config.json",
"pattern_type": "charuco",
"pattern_rows": 10,
"pattern_columns": 7,
"square_size" : 21,
"marker_size" : 10.5,
"threshold" : 151,
"closing_size" : 2,
"intrinsics":
{
"fix_center_point" : false,
"fix_aspect_ratio" : false,
"fix_focal_length" : false,
"zero_tang_dist" : false,
"use_intrinsic_guess" : false,
"intrinsics_guess" :
{
"f" : [1200, 1200],
"center_point" : [540.5, 768],
"dist_coefs" : [0 ,0, 0, 0, 0]
}
},
"frames_list": {
"trim_20210415_1314_01_00_0000_000_82260b97.h5" : [0],
"trim_20210415_1314_01_00_0000_000_dc6faa1b.h5" : [0],
"trim_20210415_1315_01_00_0000_000_0addc956.h5" : [0],
"trim_20210415_1315_01_00_0000_000_302b13b1.h5" : [0],
"trim_20210415_1315_01_00_0000_000_3784a139.h5" : [0],
"trim_20210415_1315_01_00_0000_000_d5dbd9cb.h5" : [0],
"trim_20210415_1315_01_00_0000_000_f9ce20b1.h5" : [0],
"trim_20210415_1316_01_00_0000_000_03789b4e.h5" : [0],
"trim_20210415_1316_01_00_0000_000_34ca89c7.h5" : [0],
"trim_20210415_1316_01_00_0000_000_988146c4.h5" : [0],
"trim_20210415_1316_01_00_0000_000_c0f9ad97.h5" : [0],
"trim_20210415_1317_01_00_0000_000_7d7f48b4.h5" : [0],
"trim_20210415_1319_01_00_0000_000_1a210fcd.h5" : [0],
"trim_20210415_1319_01_00_0000_000_7151cc94.h5" : [0],
"trim_20210415_1319_01_00_0000_000_99befa27.h5" : [0],
"trim_20210415_1319_01_00_0000_000_b761454c.h5" : [0],
"trim_20210415_1319_01_00_0000_000_cfc54d8c.h5" : [0],
"trim_20210415_1320_01_00_0000_000_3bea5d18.h5" : [0],
"trim_20210415_1320_01_00_0000_000_c38fb319.h5" : [0],
"trim_20210415_1320_01_00_0000_000_dc86ec41.h5" : [0],
"trim_20210415_1320_01_00_0000_000_fa08f076.h5" : [0]
},
"display":
{
"axe_x" : [-25, 25],
"axe_y" : [-25, 25],
"axe_z" : [-10, 10]
}
}
{
"capture_directory" : "data/charuco",
"stream_config" : "data/minibatl_stream_config.json",
"pattern_type": "charuco",
"pattern_rows": 10,
"pattern_columns": 7,
"square_size" : 21,
"marker_size" : 10.5,
"threshold" : 151,
"closing_size" : 2,
"intrinsics":
{
"fix_center_point" : false,
"fix_aspect_ratio" : false,
"fix_focal_length" : false,
"zero_tang_dist" : false,
"use_intrinsic_guess" : false,
"intrinsics_guess" :
{
"f" : [1200, 1200],
"center_point" : [540.5, 768],
"dist_coefs" : [0 ,0, 0, 0, 0]
}
},
"frames_list": {
"20210304_1648_01_00_0000_000_1a698ad0.h5" : [0],
"20210304_1648_01_00_0000_000_b3de43c5.h5" : [1],
"20210304_1649_01_00_0000_000_fb19a242.h5" : [0],
"20210304_1650_01_00_0000_000_ac061c0d.h5" : [0],
"20210304_1651_01_00_0000_000_3195159e.h5" : [1],
"20210304_1651_01_00_0000_000_574ef391.h5" : [0],
"20210304_1651_01_00_0000_000_5e350835.h5" : [0],
"20210304_1651_01_00_0000_000_b43908a7.h5" : [1],
"20210304_1652_01_00_0000_000_2f2a754e.h5" : [0],
"20210304_1652_01_00_0000_000_55f1f12e.h5" : [0],
"20210304_1652_01_00_0000_000_ee569664.h5" : [0],
"20210304_1652_01_00_0000_000_ef5d1b9d.h5" : [0],
"20210304_1652_01_00_0000_000_fee5eb32.h5" : [0],
"20210304_1652_01_00_0000_000_ff616aa3.h5" : [0],
"20210304_1653_01_00_0000_000_097ca9af.h5" : [0],
"20210304_1653_01_00_0000_000_1cb4c2e1.h5" : [0],
"20210304_1653_01_00_0000_000_4207aa48.h5" : [0],
"20210304_1653_01_00_0000_000_a8948c6f.h5" : [0],
"20210304_1653_01_00_0000_000_c02e96da.h5" : [0],
"20210304_1653_01_00_0000_000_cc4656ec.h5" : [0],
"20210304_1654_01_00_0000_000_1c7a229c.h5" : [0],
"20210304_1654_01_00_0000_000_20c63083.h5" : [0]
},
"display":
{
"references" : ["color", "nir_left", "nir_right"],
"axe_x" : [-25, 25],
"axe_y" : [-25, 25],
"axe_z" : [-10, 10]
}
}
{
"capture_directory" : "data/chessboard",
"stream_config" : "data/minibatl_stream_config.json",
"pattern_type": "chessboard",
"pattern_rows": 10,
"pattern_columns": 7,
"square_size" : 21,
"threshold" : 181,
"closing_size" : 10,
"intrinsics":
{
"fix_center_point" : false,
"fix_aspect_ratio" : false,
"fix_focal_length" : false,
"zero_tang_dist" : false,
"use_intrinsic_guess" : false,
"intrinsics_guess" :
{
"f" : [1200, 1200],
"center_point" : [540.5, 768],
"dist_coefs" : [0 ,0, 0, 0, 0]
}
},
"frames_list": {
"20210304_1656_01_00_0000_000_6610f83c.h5": [1, 3],
"20210304_1659_01_00_0000_000_b6dcb21b.h5": [0, 2],
"20210304_1657_01_00_0000_000_3a4a4a7c.h5": [0, 2],
"20210304_1657_01_00_0000_000_3378aeb0.h5": [1, 3],
"20210304_1657_01_00_0000_000_b92d2326.h5": [1, 3],
"20210304_1658_01_00_0000_000_b4c6e297.h5": [0, 2],
"20210304_1656_01_00_0000_000_3366da52.h5": [1, 3],
"20210304_1659_01_00_0000_000_2e648018.h5": [1, 3],
"20210304_1658_01_00_0000_000_ebbe4e79.h5": [0, 2],
"20210304_1656_01_00_0000_000_014df636.h5": [1, 3],
"20210304_1659_01_00_0000_000_d2bd00f7.h5": [0, 2],
"20210304_1658_01_00_0000_000_44c0e392.h5": [1, 3],
"20210304_1656_01_00_0000_000_e33e1b6e.h5": [0, 2]
},
"display":
{
"references" : ["color", "nir_left", "nir_right"],
"axe_x" : [-25, 25],
"axe_y" : [-25, 25],
"axe_z" : [-10, 10]
}
}
{
"stream_config" : "/path/to/streams_config.json",
"pattern_rows": 9,
"pattern_columns": 6,
"frame" : 0,
"streams": {
"camera1_rgb": null,
"camera2_thermal":
{
"threshold" : 155,
"closing" : 4,
"invert" : true
},
"camera3_nir":
{
"threshold" : 105,
"closing" : 2,
"invert" : false
}
},
"duplicate_calibration": {
"camera3_nir": "camera3_depth"
}
}
......@@ -286,7 +286,7 @@ def main():
im_left = bob.io.image.load(args.left_image)
im_right = bob.io.image.load(args.right_image)
im_out = stereo_match(im_left, img_right, CameraPair())
im_out = stereo_match(im_left, im_right, CameraPair())
# rescale and write file
......
......@@ -153,6 +153,8 @@ class StreamWarp(bob.io.stream.StreamFilter):
:obj:`numpy.ndarray`
`data` warped to `warp_to` image shape.
"""
if self.warp_to.camera.markers is None or self.camera.markers is None:
raise ValueError("Camera markers are not defined ! Did you run linear calibration of your cameras ?")
self.markers = (self.warp_to.camera.markers, self.camera.markers) # camera added to Stream by add_stream_camera
self.warp_transform = transform.ProjectiveTransform()
self.warp_transform.estimate(*self.markers)
......
#!/usr/bin/env python
#
# Copyright (c) 2021 Idiap Research Institute, https://www.idiap.ch/
#
"""Calibration script to find markers in each stream image and output a calibration usefull for linear transformation (warp).
This scripts performs the following steps:
1 Load the configuration JSON for the calibration
2 Load capture file (hdf5) with the captures of a chessboard for each camera.
3 Find the chessboards corners and keep the 4 internal corners as markers coordinates.
If this fails, the capture is displayed to the user, which should click on the corners with the mouse.
The mouse coordinates are recorded for the markers coordinates (to skip a stream, simply close the window).
4 (Optional) Load a previous calibration from disk and simply updates the markers coordinates.
5 Write the output calibration as a JSON file.
"""
import argparse
import copy
import json
import numpy as np
import cv2
from bob.io.stream import Stream, StreamFile
from bob.io.image import bob_to_opencvbgr
from .calibration import detect_chessboard_corners, preprocess_image
pixels_xy = []
def point_picking_event_cb(event, x, y, flags, params):
"""
This function used as a callback when a mouse left button event is activated
by clicking on the opencv image. It saves the pixel selected in a global
list 'pixels_xy'.
"""
global pixels_xy
if event == cv2.EVENT_LBUTTONDOWN:
pixels_xy.append([x, y])
print("Point selected for marker {}: {}".format(len(pixels_xy), pixels_xy))
def warp_calibrate(h5_file, config, verbosity):
"""
This function load the streams saved in datasets of an hdf5 file, with images
of the chessboard targets. It select the frame defined, convert from bob to
grayscale <H,W>, process if necessary and try to detect a target. The four side
corners are saved as markers to be later used for linear warp calibration in
bob.ip.stereo.
If the target is not detected, an interactive figure is opened, where the user
should click on the 4 corners (markers), in this order:
1. down-left
2. up-left
3. down-right
4. up-right
Parameters
----------
h5_file : :py:class:`str`
Hdf5 file path with captured image of the target in datasets.
config : :py:class:`dict`
A configuration to read the streams in the hdf5 file and run the target
detection.
Returns
-------
:py:class:`dict`
A dictionnary with detected markers for each streams.
"""
# The detection findes the inner points of the chessboard pattern, which
# are 1 less than the actual number of squares in each direction
rows = config["pattern_rows"] - 1
columns = config["pattern_columns"] - 1
pattern_size = (rows, columns)
frame = config["frame"]
with StreamFile(h5_file, config["stream_config"]) as f:
calibration = {}
for stream_name, params in config["streams"].items():
# Load Stream, select frame, convert from bob to opencv grayscale
# and squeeze to 1
stream = Stream(stream_name, f)
image = stream.normalize()[frame]
if image.shape[0] == 3:
image = cv2.cvtColor(bob_to_opencvbgr(image), cv2.COLOR_BGR2GRAY)
image = image.squeeze()
if params is not None:
if params["invert"]:
image = np.ones_like(image) * 255 - image
prep_image = preprocess_image(image, params["closing"], params["threshold"])
else:
prep_image = None
ret, ptrn_pts = detect_chessboard_corners(image, prep_image, pattern_size, verbosity)
if ret:
print("Stream : {} : Detected chessboard corners.".format(stream.name))
markers = np.stack(
[
ptrn_pts[0][0],
ptrn_pts[rows - 1][0],
ptrn_pts[rows * (columns - 1)][0],
ptrn_pts[rows * columns - 1][0],
]
)
else:
print("Stream : {} : Failed to detect chessboard corners.".format(stream.name))
window_name = "image_{}".format(stream.name)
cv2.imshow(window_name, image)
global pixels_xy
pixels_xy = []
cv2.setMouseCallback(window_name, point_picking_event_cb)
while True:
key = cv2.waitKey(1)
if len(pixels_xy) == 4:
print(
"Stream : {} : Finished to collect manually\
the corners.".format(
stream.name
)
)
cv2.destroyAllWindows()
markers = np.stack(copy.copy(pixels_xy))
break
if key == 27 or cv2.getWindowProperty(window_name, cv2.WND_PROP_VISIBLE) < 1:
print(
"Stream : {} : Failed to collect manually\
the corners.".format(
stream.name
)
)
cv2.destroyAllWindows()
markers = None
break
if markers is not None:
# As markers are ndarray, tolist() is used to serialized the values
calibration[stream.name] = {"markers": markers.tolist()}
return calibration
def fill_calibration(calibration, reference):
"""Add more elements (keys) to the calibration dictionnary to make it work with bob.ip.stereo standard
bob.ip.stereo expects some keys to be present in a calibration JSON files, however when using only warp transform
most of them are not required. This function simply adds them with values None. If the keys are already present, do
not change them ; if they are missing, set them to None.
Parameters
----------
calibration : :py:class:`dict`
A dictionary with streams/warp-markers as keys/values.
reference : :py:class:`str`
reference stream.
Returns
-------
:py:class:`dict`
The filled calibration.
"""
# Make sure that all the keys used in bob.ip.stereo.Camera.load_camera_config()
# are defined.
standard_calibration_keys = [
"camera_matrix",
"distortion_coefs",
"markers",
"relative_rotation",
"relative_translation",
]
# load calibration config
fill_calibration = copy.copy(calibration)
for camera, camera_config in calibration.items():
for key in standard_calibration_keys:
if key not in camera_config.keys():
# Projection markers as the 4 external corners
fill_calibration[camera][key] = None
if "reference" not in camera_config.keys():
fill_calibration[camera]["reference"] = reference
return fill_calibration
def update_calibration(markers_calibration, bobipstereo_calibration):
"""Updates the markers in bobipstereo_calibration with values from markers_calibration
Parameters
----------
markers_calibration : dict
Calibration dictionary containing only markers values per camera
bobipstereo_calibration : dict
Calibration dictionnary with all keys needed for bob.ip.stereo usage.
"""
if set(bobipstereo_calibration.keys()) != set(bobipstereo_calibration.keys()):
raise ValueError("Stream names in warp calibration config and loaded config to update do not match!")
for camera in bobipstereo_calibration.keys():
if camera in markers_calibration.keys():
bobipstereo_calibration[camera]["markers"] = markers_calibration[camera]["markers"]
return bobipstereo_calibration
def parse_arguments():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"-c",
"--config",
type=str,
help="An absolute path to the JSON file containing the configuration \
to run the warp calibration.",
)
parser.add_argument("-r", "--reference", type=str, default=None, help="Reference stream.")
parser.add_argument("-i", "--h5-file", type=str, default=None, help="Input hdf5 file.")
parser.add_argument("-o", "--output-file", type=str, default=None, help="Output calibration JSON file.")
parser.add_argument(
"-u",
"--update_calibration",
type=str,
default=None,
help="This calibration will used for the output values, except for the markers position which are updated by the calibration.",
)
parser.add_argument(
"-v",
"--verbosity",
action="count",
default=0,
help="Output verbosity: -v output calibration result, -vv output the dataframe, \
-vvv plots the target detection.",
)
return parser.parse_args()
def main():
args = parse_arguments()
with open(args.config) as f:
config = json.load(f)
calibration = warp_calibrate(args.h5_file, config, args.verbosity)
# Duplicate warp calibration for specified pair of stream with the same sensor, often the case
# in cameras with a depth stream created from infrared (NIR) sensor(s).
if "duplicate_calibration" in config.keys():
for stream, duplicate_stream in config["duplicate_calibration"].items():
calibration[duplicate_stream] = calibration[stream]
# If there is a configuration to update: we load it and update the markers
# fields of the cameras
if args.update_calibration is not None:
with open(args.update_calibration) as calib_f:
to_update_calib = json.load(calib_f)
calibration = update_calibration(calibration, to_update_calib)
# If not we still add some keys to the calibration dictionnary to comply
# with what bob.ip.stereo expects.
else:
calibration = fill_calibration(calibration, args.reference)
print(calibration)
if args.output_file is not None:
with open(args.output_file, "w") as f:
json.dump(calibration, f, indent=4, sort_keys=True)
print("{} written.".format(args.output_file))
if __name__ == "__main__":
main()
......@@ -33,6 +33,8 @@ requirements:
- pybind11 {{ pybind11 }}
- opencv {{ opencv }}
- scikit-image {{ scikit_image }}
- pandas {{ pandas }}
- matplotlib {{ matplotlib }}
- bob.io.image
- bob.ip.color
- bob.io.stream
......@@ -41,6 +43,8 @@ requirements:
- {{ pin_compatible('setuptools') }}
- {{ pin_compatible('numpy') }}
- {{ pin_compatible('opencv') }}
- {{ pin_compatible('pandas') }}
- {{ pin_compatible('matplotlib') }}
test:
imports:
......
.. _bob.ip.stereo.calibration:
------------------
Camera Calibration
------------------
Camera calibration consists in estimating the parameters of a camera, using a set of images of a pattern from the
camera. The calibration procedure implemented in ``calibration.py`` uses capture files containing synchronous images of
several cameras to first estimate the intrinsic, then the extrinsic, parameters of the cameras. The script mostly wraps
opencv's algorithms to use with ``bob.io.stream`` data files, therefore it is higly recommended to read opencv's
documentation before using this script.
Camera Parameters
-----------------
The intrinsic parameters of a camera are the paramters that do not depend on the camera's environment. There are composed
of the camera matrix - which describes the transformation between a 3D point in the world coordinate system, to a 2D
pixel in an image plane, assuming a distortion free projection - and the distortion coefficients of a camera. Opencv's
camera and distortion model is explained `here
<https://docs.opencv.org/4.5.0/d9/d0c/group__calib3d.html#details>`_.
The extrinsic paramters are the rotation and translation of the camera representing the change of referential from the
world coordinate to the camera coordinate system. Usually, the position and orientation of one camera, the reference, is
used as the world coordinate system. In that case, the extrinsic parameters of the other cameras encode the relative
position and rotation of these camera with respect to the reference, which allows to use stereo processing.
Intrinsic Parameters Estimation
-------------------------------
To estimate the intrinsic parameters of a camera, an image of a pattern is captured. The pattern contains points which
position and distance with respect to each other is known. The first type of patterns to be used were the checkerboard
patterns: the points of interest of the patterns are the inner corners of the checkerboard. The points are alligned, and
the distance between the points is given by the length of a checkerboard square. However, in an image of the pattern,
the checkerboard corners are not aligned due to distortion, which allows to estimate the distortion coefficients
of the camera's lense. The number of pixels between corners maps to the distance in the real world (square length) and
allows to estimate the camera matrix.
Opencv's implements methods to detect checkerboard corners, which give precise measurements of the corners in the
image, as is required for intrinsic parameters estimation. However these methods are not robust to occlusion: if a part
of the pattern is hidden, the detection fails. To circumvent this problem, a Charuco pattern can be used: the Charuco
pattern has unique (for a given pattern size) Aruco symbols placed in its white squares. The symbols allow to
individually identify each corner, so the detection can work even when not all corners are visible.
See opencv's documentation for more explanations on Charuco patterns and how to generate them:
`Detection of ArUco Markers
<https://docs.opencv.org/3.4/df/d4a/tutorial_charuco_detection.html>`_,
`Detection of ChArUco Corners
<https://docs.opencv.org/3.4/df/d4a/tutorial_charuco_detection.html>`_,
`Calibration with ArUco and ChArUco
<https://docs.opencv.org/4.5.2/da/d13/tutorial_aruco_calibration.html>`_.
Extrinsic Parameters Estimation
-------------------------------
Once the intrinsic parameters of a cameras are known, we are able to "rectify" the camera's image, that is to correct
the distortion and align the images. When this is done, we can tackle the problem of finding the
position of the camera with respect to the real world coordinate system. Indeed, the mapping between a 3D point in the
real world and the corresponding 2D point in the camra's pixels is the projection parametrized by the camera's pose.
Given enough pair of (3D, 2D) points, the projection parameters can be estimated: this is the so-called
Perspective-n-Point (PnP) problem.
`Opencv's PnP solver documentation
<https://docs.opencv.org/4.5.0/d9/d0c/group__calib3d.html#ga549c2075fac14829ff4a58bc931c033d>`_.
.. note::
The PnP algorithm works with a set of points which coordinates are known in the coordinate system of the camera's
images (pixel position in the image) and in the world coordinate system. The world coordinate system origin and
orientation can be arbitrary chosen, the result of the PnP will be the camera Pose with respect to that choice. The
simplest choice is to place the coordinate system along the checkerboard axis: that way the 3rd coordinate of the
corners are (``square_number_along_X`` * ``square_length``, ``square_number_along_Y`` * ``square_length``, 0).
A minimum of 4 points is needed for the PnP algorithm, however more points helps improve the precision and
robustness to outliers.
However, we are not so much interested in knowning 1 camera's pose, rather than to know the position of one camera with
respect to another. In opencv, this is performed by the `stereo calibrate
<https://docs.opencv.org/4.5.0/d9/d0c/group__calib3d.html#ga91018d80e2a93ade37539f01e6f07de5>`_ function, which
performs the following steps:
* Receive a list of images for each camera, of the same pattern positions (for instance, the images were taken
simultaneously with both cameras looking at the pattern).
* For each pair of image from camera 1, camera 2, solve the PnP problem for each camera. Then from the pose of the 2
camera with respect to the pattern, compute the pose of one camera with respect to the other.
* Average of the pose obtained for each image pair in previous step.
* Fine tune the pose by optimizing the reprojection error of all points in both cameras. (Levenberg–Marquardt optimizer)