0% found this document useful (0 votes)
12 views

Apache Kafka

Uploaded by

vedaxew561
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views

Apache Kafka

Uploaded by

vedaxew561
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 6

MODULE 6 APACHE KAFKA

1. Distribute Reporting service using Apache Kafka

- Pull information from data source

- Send it to other users via email

SOLUTION:

Prerequisites:

Install Apache Kafka and have it running.

Step 1: Set Up Kafka Topics

Create Kafka topics for the data you want to report.

kafka-topics.sh --bootstrap-server localhost:9092 --topic report_doc --create --partitions 3 --


replication-factor 1

Step 2: Data Producer

Create a simple Python script to act as a data producer. This script will simulate pulling information
from a data source and publishing it to the Kafka topic.

from kafka import KafkaProducer

import json

import mysql.connector

from datetime import date, datetime

from decimal import Decimal

def fetch_data_from_mysql_with_column_names():
connection = mysql.connector.connect(

host='localhost',

user='gow',

password='Gow2210***',

database='employee'

cursor = connection.cursor()

cursor.execute('SELECT * FROM Employees')

rows = cursor.fetchall()

column_names = [desc[0] for desc in cursor.description] # Fetch column names

connection.close()

return rows, column_names

def produce_to_kafka(rows, topic):

producer = KafkaProducer(bootstrap_servers='localhost:9092',

value_serializer=lambda x: json.dumps(x).encode('utf-8'))

for row in rows:

producer.send(topic, value=row)

producer.flush()

producer.close()

def convert_to_serializable(rows, column_names):

serializable_rows = []

for row in rows:

serializable_row = {}

for column_name, value in zip(column_names, row):

if isinstance(value, (date, datetime)): # Convert date objects to ISO 8601 formatted strings
serializable_row[column_name] = value.isoformat()

elif isinstance(value, float):

serializable_row[column_name] = float(value)

elif isinstance(value, bool):

serializable_row[column_name] = bool(value)

elif isinstance(value, Decimal): # Convert Decimal to float or string

serializable_row[column_name] = float(value) # or str(value)

else:

serializable_row[column_name] = value

serializable_rows.append(serializable_row)

return serializable_rows

if __name__ == "__main__":

topic = 'report_doc'

data, column_names = fetch_data_from_mysql_with_column_names()

serializable_data = convert_to_serializable(data, column_names)

produce_to_kafka(serializable_data, topic)
Step 3: Data Consumer (Reporting Service)

Create another Python script to act as a data consumer, which will pull information from the Kafka
topic and process it for reporting purposes.

from kafka import KafkaConsumer

import json

import smtplib

from email.mime.multipart import MIMEMultipart

from email.mime.text import MIMEText

def send_email(sender_email, sender_password, receiver_email, subject, body):

# Create message container - the correct MIME type is multipart/alternative

msg = MIMEMultipart('alternative')

msg['Subject'] = subject

msg['From'] = sender_email

msg['To'] = receiver_email

# Create the body of the message (a plain-text and an HTML version)

text = body

html = f"""\

<html>

<body>

<p>{body}</p>

</body>

</html>

"""

# Attach both parts to the MIME message

part1 = MIMEText(text, 'plain')

part2 = MIMEText(html, 'html')


msg.attach(part1)

msg.attach(part2)

# Send the message via SMTP server

with smtplib.SMTP('smtp.gmail.com', 587) as server:

server.starttls()

server.login(sender_email, sender_password)

server.sendmail(sender_email, receiver_email, msg.as_string())

def consume_and_send_emails(topic, sender_email, sender_password, receiver_email):

consumer = KafkaConsumer(topic,

bootstrap_servers='localhost:9092',

auto_offset_reset='earliest', # Start consuming from the beginning of the topic

value_deserializer=lambda x: json.loads(x.decode('utf-8')))

for message in consumer:

data = message.value

subject = 'New Employee Data Received'

body = f"New employee data received:\n{json.dumps(data, indent=4)}"

send_email(sender_email, sender_password, receiver_email, subject, body)

if __name__ == "__main__":

topic = 'report_doc'

sender_email = '[email protected]'

sender_password = 'lmdn uvrc’

receiver_email = '[email protected]'

consume_and_send_emails(topic, sender_email, sender_password, receiver_email)

You might also like