cd docker
docker-machine up docker-vmdocker-machine env docker-vmUse this at relevant places.
docker-compose up -d mysqldocker exec -it my-mysql bash -c "mysql -uroot -p -e \"GRANT ALL on demo.* to 'demo'@'%' identified by 'demo';CREATE DATABASE demo;SELECT User FROM mysql.user;\""
docker exec -it my-mysql bash -c "mysql -uroot -p -e \"GRANT ALL on maxwell.* to 'maxwell'@'%' identified by 'maxwell';GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE on *.* to 'maxwell'@'%';SELECT User FROM mysql.user;\""docker-compose up -d kafkadocker-compose run maxwell bash -c "bin/maxwell --user=maxwell --password=maxwell --host=mysql --producer=kafka --kafka.bootstrap.servers=kafka:9092"docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-topics.sh --zookeeper localhost:2181 --list"docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic maxwell"cd data-generators/ruby
bundle exec ruby create_users.rbYou will have created 1000 records each on demo.A and demo.B
Now, let us look at how we can do DB denormalization while stream processing
docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-topics.sh --create --topic events --zookeeper localhost:2181 --partitions 1 --replication-factor 1"bundle exec ruby create_events.rbEverytime you run create_events.rb 1000 arbitrary events will be logged of type {"aid":803,"bid":205}.
cd data-processors/stream-processors/samza
bin/grid install yarn
bin/grid start yarnOnce YARN is up, you should be able to access http://localhost:8088/cluster.
mvn clean package
mkdir -p deploy/samza
tar -xvf ./target/db-caching-0.0.1-dist.tar.gz -C deploy/samza
deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/stream-db-denormalization.propertiesWait for job to get into RUNNING status on YARN.
docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic db-de-normalized-eventsLet this shell remain open.
bundle exec ruby create_events.rbdeploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/db-to-changelog.propertiesdocker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic stream-de-normalized-events"docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic db-de-normalized-events"bundle exec ruby create_events.rbNote the difference in speed.
Enjoy!