Sink之kafka存储
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.{
FlinkKafkaProducer, KafkaSerializationSchema}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.ByteArraySerializer
import org.apache.flink.streaming.api.scala._
import java.lang
import java.util.Properties
object KafkaSink {
def main(args: Array[String]): Unit = {
val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
streamEnv.setParallelism(1) //默认情况下每个任务的并行度为1
//读取netcat流中数据 (实时流)
val stream1: DataStream[String] = streamEnv.socketTextStream("127.0.0.1", 6666)
//转换计算
val ds = stream1.flatMap(_.split(" "))
.map((_, 1))
.keyBy(0)
.sum(1)
val result = ds.map(line => {
("", line._1 + "_" + line._2) // key设置为空
})
//Kafka生产者的配置
val props = new Properties()
props.setProperty("bootstrap.servers", "172.16.254.4:9092,172.16.254.5:9092,172.16.254.6:9092")
props.setProperty("key.serializer", classOf[ByteArraySerializer].getName)
props.setProperty("value.serializer", classOf[ByteArraySerializer].getName)
//数据写入Kafka,并且是KeyValue格式的数据
result.addSink(new FlinkKafkaProducer[(String, String)]("t_topic",
new KafkaSerializationSchema[(String, String)] {
override def serialize(element: (String, String), aLong: lang.Long): ProducerRecord[Array[Byte], Array[Byte]] = {
new ProducerRecord("t_topic", element._1.getBytes, (element._2 + "").getBytes())
}
}, props, FlinkKafkaProducer.Semantic.EXACTLY_ONCE)) //EXACTLY_ONCE 精确一次
streamEnv.execute()
}
}
结果:
Sink之redis存储
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig
import org.apache.flink.streaming.connectors.redis.common.mapper.{
RedisCommand, RedisCommandDescription, RedisMapper}
import org.apache.flink.streaming.api.scala._
object RedisSink {
def main(args: Array[String]): Unit = {
//初始化Flink的Streaming(流计算)上下文执行环境
val streamEnv= StreamExecutionEnvironment.getExecutionEnvironment
streamEnv.setParallelism(1)
//读取数据
val stream = streamEnv.socketTextStream("127.0.0.1",6666)
//转换计算
val result = stream.flatMap(_.split(","))
.map((_, 1))
.keyBy(0)
.sum(1)
//连接redis的配置
val config = new FlinkJedisPoolConfig.Builder().setDatabase(1).setHost("127.0.0.1").setPort(6379).build()
//写入redis
result.addSink(new RedisSink[(String, Int)](config,new RedisMapper[(String, Int)] {
override def getCommandDescription = new RedisCommandDescription(RedisCommand.HSET,"t_wc") //t_wc是表名
override def getKeyFromData(data: (String, Int)) = {
data._1 //单词
}
override def getValueFromData(data: (String, Int)) = {
data._2+"" //单词出现的次数
}
}))
streamEnv.execute()
}
}
结果展示:
Sink之自定义存储(mysql)
import com.bjsxt.practice.source.StationLog
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{
RichSinkFunction, SinkFunction}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala._
import java.sql.{
Connection, DriverManager, PreparedStatement}
//自定义一个Sink写入Mysql
class MyCustomSink extends RichSinkFunction[StationLog] {
var conn: Connection = _
var pst: PreparedStatement = _
//生命周期管理,在Sink初始化的时候调用
override def open(parameters: Configuration): Unit = {
conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:3306/traffic_monitor?characterEncoding=utf-8&useSSL=false", "root", "mysql")
pst = conn.prepareStatement("insert into t_station_log (sid,call_out,call_in,call_type,call_time,duration) values (?,?,?,?,?,?)")
}
//把StationLog 写入到表t_station_log,循环调用方法,输出一条数据调用一次
override def invoke(value: StationLog, context: SinkFunction.Context[_]): Unit = {
pst.setString(1, value.sid)
pst.setString(2, value.callOut)
pst.setString(3, value.callIn)
pst.setString(4, value.callType)
pst.setLong(5, value.callTime)
pst.setLong(6, value.duration)
pst.executeUpdate()
}
override def close(): Unit = {
pst.close()
conn.close()
}
}
object CustomJdbcSink {
def main(args: Array[String]): Unit = {
//初始化Flink的Streaming(流计算)上下文执行环境
val streamEnv = StreamExecutionEnvironment.getExecutionEnvironment
streamEnv.setParallelism(1)
val data:DataStream[String] = streamEnv.socketTextStream("127.0.0.1", 6666)
val result = data.map(line=>{
val arr = line.split(",")
StationLog(arr(0),arr(1),arr(2),arr(3),arr(4).toLong,arr(5).toLong)
})
//数据写入msyql
result.addSink(new MyCustomSink)
streamEnv.execute()
}
}
结果:
t_station_log建表语句:
CREATE TABLE `t_station_log` (
`sid` varchar(255) DEFAULT NULL,
`call_out` varchar(255) DEFAULT NULL,
`call_in` varchar(255) DEFAULT NULL,
`call_type` varchar(255) DEFAULT NULL,
`call_time` varchar(255) DEFAULT NULL,
`duration` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8
文章评论