1、模拟数据
package com.chb.flink.combat.ch1
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import java.util.{Properties, Random}
/**
* 造模拟数据
*/
object MockData2Kafka {
def main(args: Array[String]): Unit = {
val users = Array(1, 2, 3, 4, 5, 6)
val itemIds = Array(1001, 1002, 1003, 1004)
val categoryIds = Array(10001, 10002, 10003, 10004)
val actions = Array("pv", "buy", "cart", "fav")
val kafkaProps = new Properties()
kafkaProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "chb1:9092")
kafkaProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
kafkaProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
val producer = new KafkaProducer[String, String](kafkaProps)
val topic = "user_behavior"
val random = new Random()
while (true) {
val value = users(random.nextInt(users.length)) + "," + itemIds(random.nextInt(itemIds.length)) + "," +
categoryIds(random.nextInt(categoryIds.length)) + "," + actions(random.nextInt(actions.length)) +
"," + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"))
producer.send(new ProducerRecord[String, String](topic, value))
Thread.sleep(300)
}
}
}
csv格式:
user_id,item_id,category_id,event_time
4,1002,10001,buy,2022-10-17 19:25:10
6,1002,10004,buy,2022-10-17 19:25:11
5,1004,10004,cart,2022-10-17 19:25:11
1,1004,10004,buy,2022-10-17 19:25:11
5,1004,10001,fav,2022-10-17 19:25:12
5,1001,10001,pv,2022-10-17 19:25:12
1,1004,10002,fav,2022-10-17 19:25:12
1,1004,10003,cart,2022-10-17 19:25:13
2,1004,10001,buy,2022-10-17 19:25:13