From f1cce7c4157bc3e4acdd61c1c52d2c07a25d7a5e Mon Sep 17 00:00:00 2001 From: Sam Hecht Date: Tue, 6 Sep 2016 17:48:36 -0700 Subject: [PATCH] Fixing links & Security tutorials (#128) * fixing broken links * adding alex's changes and notes on extending to add connectors * adding more language about extending images --- docs/best-practices.rst | 16 -- docs/connect-avro-jdbc.rst | 254 ------------------ docs/contributing.rst | 12 +- docs/development.rst | 25 +- docs/index.rst | 4 +- docs/intro.rst | 18 +- docs/operations/external-volumes.rst | 2 +- docs/operations/monitoring.rst | 2 +- docs/operations/operations.rst | 2 +- docs/quickstart.rst | 16 +- docs/tutorials/clustered-deployment-sasl.rst | 125 ++++----- docs/tutorials/clustered-deployment-ssl.rst | 22 +- docs/tutorials/clustered-deployment.rst | 4 +- docs/tutorials/connect-avro-jdbc.rst | 22 +- docs/tutorials/tutorials.rst | 2 +- .../kafka-cluster-sasl/docker-compose.yml | 14 +- 16 files changed, 136 insertions(+), 404 deletions(-) delete mode 100644 docs/best-practices.rst delete mode 100644 docs/connect-avro-jdbc.rst diff --git a/docs/best-practices.rst b/docs/best-practices.rst deleted file mode 100644 index afe9c5e3..00000000 --- a/docs/best-practices.rst +++ /dev/null @@ -1,16 +0,0 @@ - -1. Use different volumes for transaction logs and data for Zookeeper. - -2. Use host networking for standalone deployments. - -3. Run with restart=always - -4. Use external volumes for data - - - -Caveats: - -- Not tested on overlay networks -- Snappy + Zulu slowness. -- SASL + Bridged n/w don't work with ZK diff --git a/docs/connect-avro-jdbc.rst b/docs/connect-avro-jdbc.rst deleted file mode 100644 index f03b50ae..00000000 --- a/docs/connect-avro-jdbc.rst +++ /dev/null @@ -1,254 +0,0 @@ -Now, lets extend this example to use Avro as the data format and use a JDBC Source to read from a MySQL database. For this example, make sure that the Schema registry is running. - -1. Setup - - 1. Kafka Connect stores config, status and offsets of the connectors in Kafka topics. We will create these topics now. - - :: - - docker run \ - --net=host \ - --rm \ - confluentinc/cp-kafka:3.0.0 \ - kafka-topics --create --topic quickstart-avro-offsets --partitions 1 --replication-factor 1 --if-not-exists --zookeeper localhost:32181 - - :: - - docker run \ - --net=host \ - --rm \ - confluentinc/cp-kafka:3.0.0 \ - kafka-topics --create --topic quickstart-avro-config --partitions 1 --replication-factor 1 --if-not-exists --zookeeper localhost:32181 - - :: - - docker run \ - --net=host \ - --rm \ - confluentinc/cp-kafka:3.0.0 \ - kafka-topics --create --topic quickstart-avro-status --partitions 1 --replication-factor 1 --if-not-exists --zookeeper localhost:32181 - - 2. Next we will create a topic for storing data for our quickstart. - - :: - - docker run \ - --net=host \ - --rm \ - confluentinc/cp-kafka:3.0.0 \ - kafka-topics --create --topic quickstart-avro-data --partitions 1 --replication-factor 1 --if-not-exists --zookeeper localhost:32181 - - - 3. Verify that the topics are created. - - :: - - docker run \ - --net=host \ - --rm \ - confluentinc/cp-kafka:3.0.0 \ - kafka-topics --describe --zookeeper localhost:32181 - - - 4. Download the MySQL JDBC driver and copy it to the ``jars`` folder (You will need to SSH into the VM to run these commands if you are running Docker Machine. You might have to run the command as root). - - :: - - mkdir -p /tmp/quickstart/jars - - curl -k -SL "https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.1.39.tar.gz" | tar -xzf - -C /tmp/quickstart/jars --strip-components=1 mysql-connector-java-5.1.39/mysql-connector-java-5.1.39-bin.jar - - -2. Start a connect worker with Avro support. - - :: - - docker run -d \ - --name=kafka-connect-avro \ - --net=host \ - -e CONNECT_BOOTSTRAP_SERVERS=localhost:29092 \ - -e CONNECT_REST_PORT=28083 \ - -e CONNECT_GROUP_ID="quickstart-avro" \ - -e CONNECT_CONFIG_STORAGE_TOPIC="quickstart-avro-config" \ - -e CONNECT_OFFSET_STORAGE_TOPIC="quickstart-avro-offsets" \ - -e CONNECT_STATUS_STORAGE_TOPIC="quickstart-avro-status" \ - -e CONNECT_KEY_CONVERTER="io.confluent.connect.avro.AvroConverter" \ - -e CONNECT_VALUE_CONVERTER="io.confluent.connect.avro.AvroConverter" \ - -e CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL="http://localhost:8081" \ - -e CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL="http://localhost:8081" \ - -e CONNECT_INTERNAL_KEY_CONVERTER="org.apache.kafka.connect.json.JsonConverter" \ - -e CONNECT_INTERNAL_VALUE_CONVERTER="org.apache.kafka.connect.json.JsonConverter" \ - -e CONNECT_REST_ADVERTISED_HOST_NAME="localhost" \ - -e CONNECT_LOG4J_ROOT_LOGLEVEL=DEBUG \ - -v /tmp/quickstart/file:/tmp/quickstart \ - -v /tmp/quickstart/jars:/etc/kafka-connect/jars \ - confluentinc/cp-kafka-connect:latest - -3. Make sure that the connect worker is healthy. - - :: - - docker logs kafka-connect-avro | grep started - - You should see the following - - :: - - [2016-08-25 19:18:38,517] INFO Kafka Connect started (org.apache.kafka.connect.runtime.Connect) - [2016-08-25 19:18:38,557] INFO Herder started (org.apache.kafka.connect.runtime.distributed.DistributedHerder) - -4. Launch a MYSQL database - - 1. Launch database container - :: - - docker run -d \ - --name=quickstart-mysql \ - --net=host \ - -e MYSQL_ROOT_PASSWORD=confluent \ - -e MYSQL_USER=confluent \ - -e MYSQL_PASSWORD=confluent \ - -e MYSQL_DATABASE=connect_test \ - mysql - - 2. Create databases and tables. - Exec into the docker container to create the databases - :: - docker exec -it quickstart-mysql bash - - On the bash prompt, create a MySQL shell - - :: - - mysql -u confluent -pconfluent - - Execute the following SQL statements - - :: - - CREATE DATABASE IF NOT EXISTS connect_test; - USE connect_test; - - DROP TABLE IF EXISTS test; - - - CREATE TABLE IF NOT EXISTS test ( - id serial NOT NULL PRIMARY KEY, - name varchar(100), - email varchar(200), - department varchar(200), - modified timestamp default CURRENT_TIMESTAMP NOT NULL, - INDEX `modified_index` (`modified`) - ); - - INSERT INTO test (name, email, department) VALUES ('alice', 'alice@abc.com', 'engineering'); - INSERT INTO test (name, email, department) VALUES ('bob', 'bob@abc.com', 'sales'); - INSERT INTO test (name, email, department) VALUES ('bob', 'bob@abc.com', 'sales'); - INSERT INTO test (name, email, department) VALUES ('bob', 'bob@abc.com', 'sales'); - INSERT INTO test (name, email, department) VALUES ('bob', 'bob@abc.com', 'sales'); - INSERT INTO test (name, email, department) VALUES ('bob', 'bob@abc.com', 'sales'); - INSERT INTO test (name, email, department) VALUES ('bob', 'bob@abc.com', 'sales'); - INSERT INTO test (name, email, department) VALUES ('bob', 'bob@abc.com', 'sales'); - INSERT INTO test (name, email, department) VALUES ('bob', 'bob@abc.com', 'sales'); - INSERT INTO test (name, email, department) VALUES ('bob', 'bob@abc.com', 'sales'); - exit; - - Exit the container shell by typing ``exit``. - - 3. We will now create our JDBC Source connector using the Connect REST API. (Make sure you have curl installed.) - - Set the CONNECT_HOSTNAME.If you are running this on Docker Machine, then the hostname will be ``docker-machine ip `` - :: - - export CONNECT_HOST=localhost - - Create the JDBC Source connector. - :: - - curl -X POST \ - -H "Content-Type: application/json" \ - --data '{ "name": "quickstart-jdbc-source-foo", "config": { "connector.class": "io.confluent.connect.jdbc.JdbcSourceConnector", "tasks.max": 1, "connection.url": "jdbc:mysql://127.0.0.1:3306/connect_test?user=root&password=confluent", "mode": "incrementing", "incrementing.column.name": "id", "timestamp.column.name": "modified", "topic.prefix": "quickstart-jdbc-foo", "poll.interval.ms": 1000 } }' \ - http://$CONNECT_HOST:28082/connectors - - The output of this command should be - :: - - {"name":"quickstart-jdbc-source","config":{"connector.class":"io.confluent.connect.jdbc.JdbcSourceConnector","tasks.max":"1","connection.url":"jdbc:mysql://127.0.0.1:3306/connect_test?user=root&password=confluent","mode":"incrementing","incrementing.column.name":"id","timestamp.column.name":"modified","topic.prefix":"quickstart-jdbc-","poll.interval.ms":"1000","name":"quickstart-jdbc-source"},"tasks":[]} - - Check the status of the connector using curl as follows: - - :: - - curl -s -X GET http://$CONNECT_HOST:28083/connectors/quickstart-jdbc-source/status - - You should see - - :: - - {"name":"quickstart-jdbc-source","connector":{"state":"RUNNING","worker_id":"localhost:28083"},"tasks":[{"state":"RUNNING","id":0,"worker_id":"localhost:28083"}]} - - The JDBC sink create intermediate topics for storing data. We should see a ``quickstart-jdbc-test`` topic. - - :: - - docker run \ - --net=host \ - --rm \ - confluentinc/cp-kafka:3.0.0 \ - kafka-topics --describe --zookeeper localhost:32181 - - - Now we will read from the ``quickstart-jdbc-test`` topic to check if the connector works. - - :: - - docker run \ - --net=host \ - --rm \ - confluentinc/cp-schema-registry:3.0.0 \ - kafka-avro-console-consumer --bootstrap-server localhost:29092 --topic quickstart-jdbc-test --new-consumer --from-beginning --max-messages 10 - - You should see the following: - - :: - - {"id":1,"name":{"string":"alice"},"email":{"string":"alice@abc.com"},"department":{"string":"engineering"},"modified":1472153437000} - {"id":2,"name":{"string":"bob"},"email":{"string":"bob@abc.com"},"department":{"string":"sales"},"modified":1472153437000} - .... - {"id":10,"name":{"string":"bob"},"email":{"string":"bob@abc.com"},"department":{"string":"sales"},"modified":1472153439000} - Processed a total of 10 messages - - 5. We will now launch a File Sink to read from this topic and write to an output file. - - :: - - curl -X POST -H "Content-Type: application/json" \ - --data '{"name": "quickstart-avro-file-sink", "config": {"connector.class":"org.apache.kafka.connect.file.FileStreamSinkConnector", "tasks.max":"1", "topics":"quickstart-jdbc-test", "file": "/tmp/quickstart/jdbc-output.txt"}}' \ - http://$CONNECT_HOST:28083/connectors - - You should see the following in the output. - :: - - {"name":"quickstart-avro-file-sink","config":{"connector.class":"org.apache.kafka.connect.file.FileStreamSinkConnector","tasks.max":"1","topics":"quickstart-jdbc-test","file":"/tmp/quickstart/jdbc-output.txt","name":"quickstart-avro-file-sink"},"tasks":[]} - - Check the status of the connector. - - :: - - curl -s -X GET http://$CONNECT_HOST:28083/connectors/quickstart-avro-file-sink/status - - You should see - - :: - - {"name":"quickstart-avro-file-sink","connector":{"state":"RUNNING","worker_id":"localhost:28083"},"tasks":[{"state":"RUNNING","id":0,"worker_id":"localhost:28083"}]} - - Now check the file to see if the data is present. You will need to SSH into the VM if you are running Docker Machine. - - :: - - cat /tmp/quickstart/file/jdbc-output.txt | wc -l - - You should see ``10`` as the output. - - Because of https://issues.apache.org/jira/browse/KAFKA-4070, you will not see the actual data in the file. diff --git a/docs/contributing.rst b/docs/contributing.rst index 91b836ee..ce1c3cca 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -8,19 +8,15 @@ General Guidelines When submitting a pull request (PR), please use the following guidelines: -* Make sure your code respects existing formatting conventions. In general, follow - the same coding style as the code that you are modifying. +* Make sure your code respects existing formatting conventions. In general, follow the same coding style as the code that you are modifying. * Do add/update documentation appropriately for the change you are making. -* If you are introducing a new feature you may want to first submit your idea - for feedback to the `Confluent mailing list `_. +* If you are introducing a new feature you may want to first submit your idea for feedback to the `Confluent mailing list `_. * Non-trivial features should include unit tests covering the new functionality. * Bugfixes should include a unit test or integration test reproducing the issue. -* Try to keep pull requests short and submit separate ones for unrelated - features, but feel free to combine simple bugfixes/tests into one pull request. +* Try to keep pull requests short and submit separate ones for unrelated features, but feel free to combine simple bugfixes/tests into one pull request. * Keep the number of commits small and combine commits for related changes. * Each commit should compile on its own and ideally pass tests. -* Keep formatting changes in separate commits to make code reviews easier and - distinguish them from actual code changes. +* Keep formatting changes in separate commits to make code reviews easier and distinguish them from actual code changes. GitHub Workflow ~~~~~~~~~~~~~~ diff --git a/docs/development.rst b/docs/development.rst index 834ad654..7e873a56 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -120,9 +120,7 @@ To get started, you can build all the CP images as follows: make build-debian -You can run build tests by running ``make test-build``. Use this when -you want to test the builds with a clean slate. This deletes all images -and starts from scratch. +You can run build tests by running ``make test-build``. Use this when you want to test the builds with a clean slate. This deletes all images and starts from scratch. .. _running_tests : @@ -151,7 +149,7 @@ To run a single test, you can do so with Python. In the following example, we r Make Targets ~~~~~~~~~~~~ -Deletes all images tagged with ``label=io.confluent.docker.testing=true`` : +Delete all images tagged with ``label=io.confluent.docker.testing=true`` : ``clean-images`` @@ -184,10 +182,8 @@ config management, use service discovery etc. This page provides instructions f Prerequisites ~~~~~~~~~~~~ -1. Read the section on :ref:`development ` to setup the development - environment to build docker images. -2. Understand how the images are structured by reading the following - docs: +1. Read the section on :ref:`development ` to setup the development environment to build docker images. +2. Understand how the images are structured by reading the following docs: - ``image-structure`` describes the structure of the images - ``utility_scripts`` describes the utility scripts used in the @@ -195,6 +191,14 @@ Prerequisites 3. If you plan to contribute back to the project, please be sure to review our guide on :ref:`contributing `. +Adding Connectors to the Kafka Connect Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are currently two ways to add new connectors to the Kafka Connect image. + +* Build a new Docker image that has connector installed. You can follow example 2 in the documentation below. You will need to make sure that the connector jars are on the classpath. +* Add the connector jars via volumes. If you don't want to create a new Docker image, please see our documentation on `Configuring Kafka Connect with External Jars `_ to configure the `cp-kafka-connect` container with external jars. + .. _examples : Examples @@ -206,8 +210,6 @@ The following examples show to extend the images. This example shows how to change the configuration management. You will need to override the ``configure`` script to download the scripts from an HTTP URL. - For example: - To do this for the Zookeeper image, you will need the following dockerfile and configure script. This example assumes that each property file is has a URL. ``Dockerfile`` @@ -255,7 +257,7 @@ The following examples show to extend the images. -e ZOOKEEPER_LOG_CONFIG_URL =http://foo.com/zk1/log4j.properties \ foo/zookeeper:latest -2. Add more software +2. Add More Software This example shows how to add new software to an image. For example, you might want to extend the Kafka Connect client to include the MySQL JDBC driver. @@ -741,7 +743,6 @@ The following properties may be configured when using the ``kafka-ready`` utilit * Default: "PKIX" * Importance: low - .. _references : References diff --git a/docs/index.rst b/docs/index.rst index d4011655..b15d7ef2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,8 +3,8 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -CP Docker Images -================ +Docker +====== Contents: diff --git a/docs/intro.rst b/docs/intro.rst index 6a547ed1..2857efdc 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -19,14 +19,14 @@ The following are prerequisites for running the CP Docker images: 2. An understanding of how Docker host networks and bridge networks work (Highly Recommended) 3. Docker Version 1.11 or greater. Previous versions are not currently tested. -Important Caveats +Important Notes/Caveats -------------- -1. Mounted Volumes: +1. Mounted Volumes If you are using Kafka and Zookeeper, you should always `use mounted volumes `_ to persist data in the event that a container stops running or is restarted. This is important when running a system like Kafka on Docker, as it relies heavily on the filesystem for storing and caching messages. -2. Bridge Networking vs. Host Networking: +2. Bridge Networking vs. Host Networking Bridge networking is currently only supported on a single host. For multiple hosts, you will need to use overlay networks which are not currently supported. It order to expose Kafka to clients outside of the bridge network, you need to find the container IP and put it in ``advertised.listeners``. This can be difficult to achieve depending on how you're using the images. Furthermore, it can add a network hop and may not be as performant as the host network, which shares the network stack. In summary, host networking is the recommended option in the following cases: @@ -36,8 +36,15 @@ Important Caveats 3. Always launch containers with ``Restart=always`` unless you are using a process manager. 4. These images are currently tested and shipped with `Azul Zulu OpenJDK `_. If you want to switch to Oracle Java, please refer to our instructions for extending the images + +5. Adding Connectors to the Kafka Connect Image + + There are currently two ways to add new connectors to the Kafka Connect image. + + * Build a new Docker image that has connector installed. You can follow the examples found in our documentation on `Extending Images `_. You will need to make sure that the connector jars are on the classpath. + * Add the connector jars via volumes. If you don't want to create a new Docker image, please see our documentation on `Configuring Kafka Connect with External Jars `_ to configure the `cp-kafka-connect` container with external jars. -5. Untested Features +6. Untested Features The following features/conditions are not currently tested: @@ -50,6 +57,3 @@ License ------- The Confluent Platform Docker Images are available as open source software under the Apache License v2.0 license. For more information on the licenses for each of the individual Confluent Platform components packaged in the images, please refer to the `respective Confluent Platform documentation for each component `_. - - - diff --git a/docs/operations/external-volumes.rst b/docs/operations/external-volumes.rst index b17048ff..3aafb10e 100644 --- a/docs/operations/external-volumes.rst +++ b/docs/operations/external-volumes.rst @@ -12,7 +12,7 @@ When working with Docker, you may sometimes need to persist data in the event of .. note:: - In the event that you need to add support for additional use cases for external volumes, please refer to our guide on `extending the images <_extending_images>`_. + In the event that you need to add support for additional use cases for external volumes, please refer to our guide on `extending the images <../development.html#extending-the-docker-images>`_. Data Volumes for Kafka & Zookeeper ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/operations/monitoring.rst b/docs/operations/monitoring.rst index a3d382e8..7c7041a7 100644 --- a/docs/operations/monitoring.rst +++ b/docs/operations/monitoring.rst @@ -51,7 +51,7 @@ Properties Launching Kafka and Zookeeper with JMX Enabled `````````````````````````````````````````````` -The steps for launching Kafka and Zookeeper with JMX enabled are the same as we saw in the `quickstart guide `_, with the only difference being that you set ``KAFKA_JMX_PORT`` and ``KAFKA_JMX_HOSTNAME`` for both. Here are examples of the Docker ``run`` commands for each service: +The steps for launching Kafka and Zookeeper with JMX enabled are the same as we saw in the `quickstart guide <../quickstart.html>`_, with the only difference being that you set ``KAFKA_JMX_PORT`` and ``KAFKA_JMX_HOSTNAME`` for both. Here are examples of the Docker ``run`` commands for each service: .. sourcecode:: bash diff --git a/docs/operations/operations.rst b/docs/operations/operations.rst index 8f76b753..892899af 100644 --- a/docs/operations/operations.rst +++ b/docs/operations/operations.rst @@ -3,7 +3,7 @@ Operations ========== -In this section, we provide a closer look at how to run a Confluent Platform cluster on Docker. If you are looking for a simple tutorial, you should refer instead to the `quickstart guide `_. +In this section, we provide a closer look at how to run a Confluent Platform cluster on Docker. If you are looking for a simple tutorial, you should refer instead to the `quickstart guide <../quickstart.html>`_. We will cover the following topics: diff --git a/docs/quickstart.rst b/docs/quickstart.rst index c2fc8fac..012e4b08 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -1,21 +1,18 @@ -.. _quickstart : +.. _docker_quickstart : Quickstart ========== In this section, we provide a simple guide for running a Kafka cluster along with all of the other Confluent Platform components. By the end of this quickstart, you will have successfully installed and run a simple deployment including each component with Docker. -In order to keep things simple, this quickstart guide is limited to a single node Confluent Platform cluster. For more advanced tutorials, you can refer to the following guides: +In order to keep things simple, this quickstart guide is limited to a single node Confluent Platform cluster. For more advanced tutorials, you can refer to the `tutorials section `_ of this documentation. -* Securing Your Cluster on Docker -* Running in a Clustered Environment - -It is also worth noting that we will be configuring Kafka and Zookeeper to store data locally in the Docker containers. However, you can also refer to our `bla bla bla `_ for an example of how to add mounted volumes to the host machine to persist data in the event that the container stops running. This is important when running a system like Kafka on Docker, as it relies heavily on the filesystem for storing and caching messages. +It is also worth noting that we will be configuring Kafka and Zookeeper to store data locally in the Docker containers. However, you can also refer to our `documentation on Docker external volumes `_ for an example of how to add mounted volumes to the host machine to persist data in the event that the container stops running. This is important when running a system like Kafka on Docker, as it relies heavily on the filesystem for storing and caching messages. Installing & Running Docker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -For this tutorial, we'll run Docker using the Docker client. If you are interested in information on using Docker Compose to run the images, `we have docs for that too `_. +For this tutorial, we'll run Docker using the Docker client. If you are interested in information on using Docker Compose to run the images, :ref:`we have docs for that too `. To get started, you'll need to first `install Docker and get it running `_. The CP Docker Images require Docker version 1.11 or greater. @@ -52,7 +49,7 @@ Now that we have all of the Docker dependencies installed, we can create a Docke -e ZOOKEEPER_CLIENT_PORT=32181 \ confluentinc/cp-zookeeper:3.0.1 - In this command, we tell Docker to run the ``confluentinc/cp-zookeeper:3.0.1`` container named ``zookeeper``. We also specify that we want to use host networking and pass in the required parameter for running Zookeeper: ``ZOOKEEPER_CLIENT_PORT``. For a full list of the available configuration options and more details on passing environment variables into Docker containers, `go to this link that is yet to be created `_. + In this command, we tell Docker to run the ``confluentinc/cp-zookeeper:3.0.1`` container named ``zookeeper``. We also specify that we want to use host networking and pass in the required parameter for running Zookeeper: ``ZOOKEEPER_CLIENT_PORT``. For a full list of the available configuration options and more details on passing environment variables into Docker containers, `please see the configuration reference docs `_. Now that we've attempted to start Zookeeper, we'll check the logs to see the server has booted up successfully by running the following command: @@ -266,7 +263,7 @@ Now that we have all of the Docker dependencies installed, we can create a Docke [{"key":null,"value":{"f1":"value1"},"partition":0,"offset":0},{"key":null,"value":{"f1":"value2"},"partition":0,"offset":1},{"key":null,"value":{"f1":"value3"},"partition":0,"offset":2}] -7. We will walk you through an end-to-end data transfer pipeline using Kafka Connect. We'll start by reading data from a file and writing that data to a new file. We will then extend the pipeline to show how to use connect to read from a database. This example is meant to be simple for the sake of this introductory tutorial. If you'd like a more in-depth example, please refer to `our tutorial on using a JDBC connector with avro data `_. +7. We will walk you through an end-to-end data transfer pipeline using Kafka Connect. We'll start by reading data from a file and writing that data to a new file. We will then extend the pipeline to show how to use connect to read from a database. This example is meant to be simple for the sake of this introductory tutorial. If you'd like a more in-depth example, please refer to `our tutorial on using a JDBC connector with avro data `_. First, let's start up Kafka Connect. Connect stores config, status, and internal offsets for connectors in Kafka topics. We will create these topics now. We already have Kafka up and running from the steps above. @@ -647,6 +644,7 @@ Now that we have all of the Docker dependencies installed, we can create a Docke 9. Once you're done, cleaning up is simple. You can simply run ``docker rm -f $(docker ps -a -q)`` to delete all the containers we created in the steps above. Because we allowed Kafka and Zookeeper to store data on their respective containers, there are no additional volumes to clean up. If you also want to remove the Docker machine you used, you can do so using ``docker-machine rm ``. +.. _quickstart_compose: Getting Started with Docker Compose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/tutorials/clustered-deployment-sasl.rst b/docs/tutorials/clustered-deployment-sasl.rst index 51b42dd2..1a890696 100644 --- a/docs/tutorials/clustered-deployment-sasl.rst +++ b/docs/tutorials/clustered-deployment-sasl.rst @@ -3,7 +3,7 @@ Clustered Deployment Using SASL and SSL ---------------------------------------- -In this section, we provide a tutorial for running a secure three-node Kafka cluster and Zookeeper ensemble with SASL. By the end of this tutorial, you will have successfully installed and run a simple deployment with security enabled on Docker. If you're looking for a simpler tutorial, please `refer to our quickstart guide `_, which is limited to a single node Kafka cluster. +In this section, we provide a tutorial for running a secure three-node Kafka cluster and Zookeeper ensemble with SASL. By the end of this tutorial, you will have successfully installed and run a simple deployment with SSL and SASL security enabled on Docker. If you're looking for a simpler tutorial, please `refer to our quickstart guide <../quickstart.html>`_, which is limited to a single node Kafka cluster. .. note:: @@ -54,7 +54,7 @@ Now that we have all of the Docker dependencies installed, we can create a Docke .. sourcecode:: bash - cd $(pwd)/examples/kafka-cluster-ssl/secrets + cd $(pwd)/examples/kafka-cluster-sasl/secrets ./create-certs.sh (Type yes for all "Trust this certificate? [no]:" prompts.) cd - @@ -65,66 +65,66 @@ Now that we have all of the Docker dependencies installed, we can create a Docke export KAFKA_SASL_SECRETS_DIR=$(pwd)/examples/kafka-cluster-sasl/secrets -2. Build and run the kerberos image - - :: + To configure SASL, all your nodes will need to have a proper hostname. It is not advisable to use ``localhost`` as the hostname. - cd test/images/kerberos - docker build -t confluentinc/cp-kerberos:3.0.1 . + We need to create an entry in ``/etc/hosts`` with hostname ``quickstart.confluent.io`` that points to ``eth0`` IP. In Linux, run the below commands on the Linux host. If running Docker Machine (eg for Mac or Windows), you will need to SSH into the VM and run the below commands as root. You can SSH into the Docker Machine VM by running ``docker-machine ssh confluent``. - docker run -d \ - --name=kerberos \ - --net=host \ - -v ${KAFKA_SASL_SECRETS_DIR}:/tmp/keytab \ - -v /dev/urandom:/dev/random \ - confluentinc/cp-kerberos:3.0.1 + .. sourcecode:: bash + + export ETH0_IP=$(ifconfig eth0 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}') + echo ${ETH0_IP} quickstart.confluent.io >> /etc/hosts -3. Create the principals and keytabs. - i. To configure SASL, all your nodes will need to have a proper hostname . It is not advisable to use ``localhost`` as the hostname. +2. Build and run the kerberos image - We will now create a entry in ``/etc/hosts`` with hostname ``quickstart.confluent.io`` that points to ``eth0`` IP. + .. sourcecode:: bash - :: + cd tests/images/kerberos + docker build -t confluentinc/cp-kerberos:3.0.1 . - export ETH0_IP=$(ifconfig eth0 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}') + docker run -d \ + --name=kerberos \ + --net=host \ + -v ${KAFKA_SASL_SECRETS_DIR}:/tmp/keytab \ + -v /dev/urandom:/dev/random \ + confluentinc/cp-kerberos:3.0.1 - echo ${ETH0_IP} quickstart.confluent.io >> /etc/hosts - ii. Now, lets create all the prinicipals and their keytabs on Kerberos. +3. Create the principals and keytabs. - :: + .. sourcecode:: bash - for principal in zookeeper1 zookeeper2 zookeeper3 - do - docker exec -it kerberos kadmin.local -q "addprinc -randkey zookeeper/quickstart.confluent.io@TEST.CONFLUENT.IO" - docker exec -it kerberos kadmin.local -q "ktadd -norandkey -k /tmp/keytab/${principal}.keytab zookeeper/quickstart.confluent.io@TEST.CONFLUENT.IO" - done + for principal in zookeeper1 zookeeper2 zookeeper3 + do + docker exec -it kerberos kadmin.local -q "addprinc -randkey zookeeper/quickstart.confluent.io@TEST.CONFLUENT.IO" + docker exec -it kerberos kadmin.local -q "ktadd -norandkey -k /tmp/keytab/${principal}.keytab zookeeper/quickstart.confluent.io@TEST.CONFLUENT.IO" + done - :: + .. sourcecode:: bash - for principal in zkclient1 zkclient2 zkclient3 - do - docker exec -it kerberos kadmin.local -q "addprinc -randkey zkclient/quickstart.confluent.io@TEST.CONFLUENT.IO" - docker exec -it kerberos kadmin.local -q "ktadd -norandkey -k /tmp/keytab/${principal}.keytab zkclient/quickstart.confluent.io@TEST.CONFLUENT.IO" - done + for principal in zkclient1 zkclient2 zkclient3 + do + docker exec -it kerberos kadmin.local -q "addprinc -randkey zkclient/quickstart.confluent.io@TEST.CONFLUENT.IO" + docker exec -it kerberos kadmin.local -q "ktadd -norandkey -k /tmp/keytab/${principal}.keytab zkclient/quickstart.confluent.io@TEST.CONFLUENT.IO" + done - For Kafka brokers, the principal should be called ``kafka``. - :: + For Kafka brokers, the principal should be called ``kafka``. + + .. sourcecode:: bash - for principal in broker1 broker2 broker3 - do - docker exec -it kerberos kadmin.local -q "addprinc -randkey kafka/quickstart.confluent.io@TEST.CONFLUENT.IO" - docker exec -it kerberos kadmin.local -q "ktadd -norandkey -k /tmp/keytab/${principal}.keytab kafka/quickstart.confluent.io@TEST.CONFLUENT.IO" - done + for principal in broker1 broker2 broker3 + do + docker exec -it kerberos kadmin.local -q "addprinc -randkey kafka/quickstart.confluent.io@TEST.CONFLUENT.IO" + docker exec -it kerberos kadmin.local -q "ktadd -norandkey -k /tmp/keytab/${principal}.keytab kafka/quickstart.confluent.io@TEST.CONFLUENT.IO" + done - :: + .. sourcecode:: bash - for principal in saslproducer saslconsumer - do - docker exec -it kerberos kadmin.local -q "addprinc -randkey ${principal}/quickstart.confluent.io@TEST.CONFLUENT.IO" - docker exec -it kerberos kadmin.local -q "ktadd -norandkey -k /tmp/keytab/${principal}.keytab ${principal}/quickstart.confluent.io@TEST.CONFLUENT.IO" - done + for principal in saslproducer saslconsumer + do + docker exec -it kerberos kadmin.local -q "addprinc -randkey ${principal}/quickstart.confluent.io@TEST.CONFLUENT.IO" + docker exec -it kerberos kadmin.local -q "ktadd -norandkey -k /tmp/keytab/${principal}.keytab ${principal}/quickstart.confluent.io@TEST.CONFLUENT.IO" + done 4. Run a 3-node Zookeeper ensemble with SASL enabled. @@ -364,7 +364,7 @@ Check the logs to see the broker has booted up successfully: -v ${KAFKA_SASL_SECRETS_DIR}:/etc/kafka/secrets \ -e KAFKA_OPTS="-Djava.security.auth.login.config=/etc/kafka/secrets/consumer_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf" \ confluentinc/cp-kafka:3.0.1 \ - kafka-console-consumer --bootstrap-server quickstart.confluent.io:29094 --topic bar --new-consumer --from-beginning --max-messages 42 --consumer.config /etc/kafka/secrets/host.consumer.ssl.sasl.config + kafka-console-consumer --bootstrap-server quickstart.confluent.io:29094 --topic bar --new-consumer --from-beginning --consumer.config /etc/kafka/secrets/host.consumer.ssl.sasl.config You should see the following (it might take some time for this command to return data. Kafka has to create the ``__consumers_offset`` topic behind the scenes when you consume data for the first time and this may take some time): @@ -400,7 +400,7 @@ Before you get started, you will first need to install `Docker `_, which is limited to a single node Kafka cluster. +In this section, we provide a tutorial for running a secure three-node Kafka cluster and Zookeeper ensemble with SSL. By the end of this tutorial, you will have successfully installed and run a simple deployment with SSL security enabled on Docker. If you're looking for a simpler tutorial, please `refer to our quickstart guide <../quickstart.html>`_, which is limited to a single node Kafka cluster. .. note:: It is worth noting that we will be configuring Kafka and Zookeeper to store data locally in the Docker containers. For production deployments (or generally whenever you care about not losing data), you should use mounted volumes for persisting data in the event that a container stops running or is restarted. This is important when running a system like Kafka on Docker, as it relies heavily on the filesystem for storing and caching messages. Refer to our `documentation on Docker external volumes `_ for an example of how to add mounted volumes to the host machine. + Installing & Running Docker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -For this tutorial, we'll run docker using the Docker client. If you are interested in information on using Docker Compose to run the images, `skip to the bottom of this guide `_. +For this tutorial, we'll run docker using the Docker client. If you are interested in information on using Docker Compose to run the images, :ref:`skip to the bottom of this guide `. To get started, you'll need to first `install Docker and get it running `_. The CP Docker Images require Docker version 1.11 or greater. @@ -48,7 +49,7 @@ Now that we have all of the Docker dependencies installed, we can create a Docke 3. Generate Credentials - You will need to generate CA certificates (or use yours if you already have one) and then generate keystore and truststore for brokers and clients. You can use ``create-certs.sh`` script in ``examples/kafka-ssl-cluster/secrets`` to generate them. For production, please use these scripts for generating certificates : https://github.com/confluentinc/confluent-platform-security-tools + You will need to generate CA certificates (or use yours if you already have one) and then generate a keystore and truststore for brokers and clients. You can use the ``create-certs.sh`` script in ``examples/kafka-ssl-cluster/secrets`` to generate them. For production, please use these scripts for generating certificates : https://github.com/confluentinc/confluent-platform-security-tools For this example, we will use the ``create-certs.sh`` available in the ``examples/kafka-ssl-cluster/secrets`` directory in cp-docker-images. See "security" section for more details on security. Make sure that you have OpenSSL and JDK installed. @@ -107,7 +108,7 @@ Now that we have all of the Docker dependencies installed, we can create a Docke -e ZOOKEEPER_SERVERS="localhost:22888:23888;localhost:32888:33888;localhost:42888:43888" \ confluentinc/cp-zookeeper:3.0.1 - Check the logs to see the broker has booted up successfully + Check the logs to confirm that the ZooKeeper servers have booted up successfully: .. sourcecode:: bash @@ -279,7 +280,7 @@ Now that we have all of the Docker dependencies installed, we can create a Docke --rm \ -v ${KAFKA_SSL_SECRETS_DIR}:/etc/kafka/secrets \ confluentinc/cp-kafka:3.0.1 \ - kafka-console-consumer --bootstrap-server localhost:29092 --topic bar --new-consumer --from-beginning --max-messages 10 --consumer.config /etc/kafka/secrets/host.consumer.ssl.config + kafka-console-consumer --bootstrap-server localhost:29092 --topic bar --new-consumer --from-beginning --consumer.config /etc/kafka/secrets/host.consumer.ssl.config You should see the following (it might take some time for this command to return data. Kafka has to create the ``__consumers_offset`` topic behind the scenes when you consume data for the first time and this may take some time): @@ -293,9 +294,9 @@ Now that we have all of the Docker dependencies installed, we can create a Docke 16 .... 41 - Processed a total of 10 messages + Processed a total of 42 messages -.. _clustered_quickstart_compose_ssl : +.. _clustered_quickstart_compose_ssl: Docker Compose: Setting Up a Three Node CP Cluster with SSL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -315,9 +316,10 @@ Before you get started, you will first need to install `Docker `_, which is limited to a single node Kafka cluster. + If you're looking for a simpler tutorial, please `refer to our quickstart guide <../quickstart.html>`_, which is limited to a single node Kafka cluster. It is worth noting that we will be configuring Kafka and Zookeeper to store data locally in the Docker containers. For production deployments (or generally whenever you care about not losing data), you should use mounted volumes for persisting data in the event that a container stops running or is restarted. This is important when running a system like Kafka on Docker, as it relies heavily on the filesystem for storing and caching messages. Refer to our `documentation on Docker external volumes `_ for an example of how to add mounted volumes to the host machine. Installing & Running Docker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -For this tutorial, we'll run docker using the Docker client. If you are interested in information on using Docker Compose to run the images, `skip to the bottom of this guide `_. +For this tutorial, we'll run docker using the Docker client. If you are interested in information on using Docker Compose to run the images, :ref:`skip to the bottom of this guide `. To get started, you'll need to first `install Docker and get it running `_. The CP Docker Images require Docker version 1.11 or greater. diff --git a/docs/tutorials/connect-avro-jdbc.rst b/docs/tutorials/connect-avro-jdbc.rst index c54a9f97..77b80811 100644 --- a/docs/tutorials/connect-avro-jdbc.rst +++ b/docs/tutorials/connect-avro-jdbc.rst @@ -3,7 +3,7 @@ Kafka Connect Tutorial ---------------------- -In the `quickstart guide `_, we showed you how to get up and running with a simple file connector using Kafka Connect. In this section, we provide a somewhat more advanced tutorial in which we'll use Avro as the data format and use a JDBC Source Connector to read from a MySQL database. If you're coming from the quickstart and already have all the other services running, that's great. Otherwise, you'll need to first startup up Zookeeper, Kafka and the Schema Registry. +In the `quickstart guide <../quickstart.html>`_, we showed you how to get up and running with a simple file connector using Kafka Connect. In this section, we provide a somewhat more advanced tutorial in which we'll use Avro as the data format and use a JDBC Source Connector to read from a MySQL database. If you're coming from the quickstart and already have all the other services running, that's great. Otherwise, you'll need to first startup up Zookeeper, Kafka and the Schema Registry. .. note:: @@ -14,7 +14,7 @@ It is worth noting that we will be configuring Kafka and Zookeeper to store data Installing & Running Docker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -For this tutorial, we'll run Docker using the Docker client. If you are interested in information on using Docker Compose to run the images, `skip to the bottom of this guide `_. +For this tutorial, we'll run Docker using the Docker client. If you are interested in information on using Docker Compose to run the images, :ref:`skip to the bottom of this guide `. To get started, you'll need to first `install Docker and get it running `_. The CP Docker Images require Docker version 1.11 or greater. If you're running on Windows or Mac OS X, you'll need to use `Docker Machine `_ to start the Docker host. Docker runs natively on Linux, so the Docker host will be your local machine if you go that route. If you are running on Mac or Windows, be sure to allocate at least 4 GB of ram to the Docker Machine. @@ -41,7 +41,7 @@ Now that we have all of the Docker dependencies installed, we can create a Docke 2. Start up Zookeeper, Kafka, and Schema Registry. - We'll walk through each of the commands for starting up these services, but you should refer to the `quickstart guide `_ for a more detailed walkthrough. + We'll walk through each of the commands for starting up these services, but you should refer to the `quickstart guide <../quickstart.html>`_ for a more detailed walkthrough. Start Zookeeper: @@ -71,15 +71,15 @@ Now that we have all of the Docker dependencies installed, we can create a Docke Start the Schema Registry: - .. sourcecode:: bash + .. sourcecode:: bash - docker run -d \ - --net=host \ - --name=schema-registry \ - -e SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=localhost:32181 \ - -e SCHEMA_REGISTRY_HOST_NAME=localhost \ - -e SCHEMA_REGISTRY_LISTENERS=http://localhost:8081 \ - confluentinc/cp-schema-registry:3.0.0 + docker run -d \ + --net=host \ + --name=schema-registry \ + -e SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=localhost:32181 \ + -e SCHEMA_REGISTRY_HOST_NAME=localhost \ + -e SCHEMA_REGISTRY_LISTENERS=http://localhost:8081 \ + confluentinc/cp-schema-registry:3.0.0 You can confirm that each of the services is up by checking the logs using the following command: ``docker logs ``. For example, if we run ``docker logs kafka``, we should see the following at the end of the log output: diff --git a/docs/tutorials/tutorials.rst b/docs/tutorials/tutorials.rst index 4bdf6e9d..723fb075 100644 --- a/docs/tutorials/tutorials.rst +++ b/docs/tutorials/tutorials.rst @@ -3,7 +3,7 @@ More Tutorials ============== -In this section, we provide more advanced tutorials for using specific Confluent Platform features on Docker. If you are looking for a simple tutorial, you should refer instead to the `quickstart guide `_. +In this section, we provide more advanced tutorials for using specific Confluent Platform features on Docker. If you are looking for a simple tutorial, you should refer instead to the `quickstart guide <../quickstart.html>`_. **Table of Contents** diff --git a/examples/kafka-cluster-sasl/docker-compose.yml b/examples/kafka-cluster-sasl/docker-compose.yml index fc648440..10a9d52f 100644 --- a/examples/kafka-cluster-sasl/docker-compose.yml +++ b/examples/kafka-cluster-sasl/docker-compose.yml @@ -2,7 +2,7 @@ version: '2' services: zookeeper-sasl-1: - image: confluentinc/cp-zookeeper:latest + image: confluentinc/cp-zookeeper:3.0.1 # This is required because Zookeeper can fail if kerberos is still initializing. restart: on-failure:3 environment: @@ -21,7 +21,7 @@ services: network_mode: host zookeeper-sasl-2: - image: confluentinc/cp-zookeeper:latest + image: confluentinc/cp-zookeeper:3.0.1 # This is required because Zookeeper can fail if kerberos is still initializing. restart: on-failure:3 environment: @@ -41,7 +41,7 @@ services: zookeeper-sasl-3: - image: confluentinc/cp-zookeeper:latest + image: confluentinc/cp-zookeeper:3.0.1 # This is required because Zookeeper can fail if kerberos is still initializing. restart: on-failure:3 environment: @@ -60,7 +60,7 @@ services: network_mode: host kerberos: - image: confluentinc/cp-kerberos + image: confluentinc/cp-kerberos:3.0.1 network_mode: host environment: BOOTSTRAP: 0 @@ -69,7 +69,7 @@ services: - /dev/urandom:/dev/random kafka-sasl-1: - image: confluentinc/cp-kafka:latest + image: confluentinc/cp-kafka:3.0.1 network_mode: host # This is required because Kafka can fail if kerberos is still initializing. restart: on-failure:3 @@ -94,7 +94,7 @@ services: - ${KAFKA_SASL_SECRETS_DIR}:/etc/kafka/secrets kafka-sasl-2: - image: confluentinc/cp-kafka:latest + image: confluentinc/cp-kafka:3.0.1 network_mode: host restart: on-failure:3 environment: @@ -118,7 +118,7 @@ services: - ${KAFKA_SASL_SECRETS_DIR}:/etc/kafka/secrets kafka-sasl-3: - image: confluentinc/cp-kafka:latest + image: confluentinc/cp-kafka:3.0.1 network_mode: host restart: on-failure:3 environment: