1.在hadoop中必须序列化对象后,才能进行网络传输,不使用java序列化的原因是:java序列化对象会添加很多信息,造成对象的字节数变大,hadoop序列化为一种高效的序列化
如果在实际生产中,BooleanWrite、IntWrite,Text,FloatWrite等数据类型无法满足要求,就需要对对象进行序列化,
package com.hxy.mr.xuliehua; import org.apache.hadoop.io.Writable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class FlowBean implements Writable { private Long upFlow; private Long downFlow; private Long sumFlow; public FlowBean() { } public FlowBean(Long upFlow, Long downFlow) { this.upFlow = upFlow; this.downFlow = downFlow; this.sumFlow = upFlow + downFlow; } @Override public void write(DataOutput out) throws IOException { //序列化方法 out.writeLong(upFlow); out.writeLong(downFlow); out.writeLong(sumFlow); } @Override public void readFields(DataInput in) throws IOException { //反序列化方法 必须与序列化写入顺序是一致的,因为序列化后的数据传输是以管道队列的方式传输的,先进先出; upFlow = in.readLong(); downFlow =in.readLong(); sumFlow = in.readLong(); } @Override public String toString() { return upFlow + "\t" + downFlow + "\t" + sumFlow ; } public Long getUpFlow() { return upFlow; } public void setUpFlow(Long upFlow) { this.upFlow = upFlow; } public Long getDownFlow() { return downFlow; } public void setDownFlow(Long downFlow) { this.downFlow = downFlow; } public Long getSumFlow() { return sumFlow; } public void setSumFlow(Long sumFlow) { this.sumFlow = sumFlow; } }