AllTraceSink.java 4.16 KB
package com.yoho.trace.online.sink;

import com.alibaba.fastjson.JSONObject;
import com.yoho.trace.anaylzer.model.ApiTraceResult;
import com.yoho.trace.store.HBasePool;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

/**
 * Created by mingdan.ge on 2019/11/11.
 */
public class AllTraceSink extends RichSinkFunction<Tuple2<String, ApiTraceResult>> {
    private Connection conn;
    private HTable table;

    @Override
    public void open(Configuration parameters) throws Exception {
        conn = HBasePool.getConnection();
        table = (HTable) conn.getTable(TableName.valueOf("all_trace"));
        table.setWriteBufferSize(1024 * 1024 * 20);
        table.setAutoFlush(false, true);//不单个提交
    }

    @Override
    public void invoke(Tuple2<String, ApiTraceResult> value, SinkFunction.Context context) throws Exception {
        ApiTraceResult apiTraceResult = value.f1;

        String[] md5Tags = StringUtils.split(apiTraceResult.getTraceMd5(), '.');

        Put put1 = new Put(Bytes.toBytes( md5Tags[1] + ":" + apiTraceResult.getTraceStartTime()/1000 + ":" + apiTraceResult.getTraceId()));
        //其实不推荐关闭WAL,不过关了的确可以提升性能...因为HBase在写数据前会先写WAL,以保证在异常情况下,HBase可以按照WAL的记录来恢复还未持久化的数据。
        put1.setDurability(Durability.SKIP_WAL);
        put1.addColumn(Bytes.toBytes("data"), Bytes.toBytes("spans"), Bytes.toBytes(JSONObject.toJSONString(apiTraceResult.getSpans())));
        put1.addColumn(Bytes.toBytes("data"), Bytes.toBytes("traceid"), Bytes.toBytes(apiTraceResult.getTraceId()));
        put1.addColumn(Bytes.toBytes("data"), Bytes.toBytes("starttime"), Bytes.toBytes(apiTraceResult.getTraceStartTime()/1000));
        put1.addColumn(Bytes.toBytes("data"), Bytes.toBytes("traceMd5"), Bytes.toBytes(apiTraceResult.getTraceMd5()));
        put1.addColumn(Bytes.toBytes("data"), Bytes.toBytes("duration"), Bytes.toBytes(apiTraceResult.getDuration()));
        if(StringUtils.isNotEmpty(apiTraceResult.getRegion())){
            put1.addColumn(Bytes.toBytes("data"), Bytes.toBytes("region"), Bytes.toBytes(apiTraceResult.getRegion()));
        }

        Put put2 = new Put(Bytes.toBytes( apiTraceResult.getApiName() + ":" + apiTraceResult.getTraceStartTime()/1000 + ":" + apiTraceResult.getTraceId()));
        //其实不推荐关闭WAL,不过关了的确可以提升性能...因为HBase在写数据前会先写WAL,以保证在异常情况下,HBase可以按照WAL的记录来恢复还未持久化的数据。
        put2.setDurability(Durability.SKIP_WAL);
        put2.addColumn(Bytes.toBytes("data"), Bytes.toBytes("spans"), Bytes.toBytes(JSONObject.toJSONString(apiTraceResult.getSpans())));
        put2.addColumn(Bytes.toBytes("data"), Bytes.toBytes("traceid"), Bytes.toBytes(apiTraceResult.getTraceId()));
        put2.addColumn(Bytes.toBytes("data"), Bytes.toBytes("starttime"), Bytes.toBytes(apiTraceResult.getTraceStartTime()/1000));
        put2.addColumn(Bytes.toBytes("data"), Bytes.toBytes("traceMd5"), Bytes.toBytes(apiTraceResult.getTraceMd5()));
        put2.addColumn(Bytes.toBytes("data"), Bytes.toBytes("duration"), Bytes.toBytes(apiTraceResult.getDuration()));
        if(StringUtils.isNotEmpty(apiTraceResult.getRegion())){
            put2.addColumn(Bytes.toBytes("data"), Bytes.toBytes("region"), Bytes.toBytes(apiTraceResult.getRegion()));
        }

        table.put(put1);
        table.put(put2);
    }

    @Override
    public void close() throws Exception {
        try {
            table.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        super.close();
        conn.close();
    }
}