文件A.txt
1|369001|O|186600.18
2|780017|O|66219.63
3|1233140|F|270741.97
4|1367761|O|41714.38
5|444848|F|122444.33
6|556222|F|50883.96
文件B.txt
1|7759468|384484
1|3365454|365455
1|3184989|184990
2|5308487|58508
3|214849|89850
3|951772|326776
3|1468981|93984
4|4401735|276760
5|5428465|428466
5|6196340|196341
5|1876509|1513
6|6981773|106787
读取两个文件,A.txt,B.txt,两个文件的第一行为主键,当主键相等时进行连接,写文件。类似于数据库中的join连接。
A,B主键都有序,A主键重复
方案一
读取B.txt一行,然后扫描整个A.txt 主键相等时,连接
public static void main(String[] args) throws IOException {
FileInputStream A = null;
FileInputStream B = null;
InputStreamReader A_isr = null;
InputStreamReader B_isr = null;
BufferedReader A_br = null; // 用于包装InputStreamReader,提高处理性能。因为BufferedReader有缓冲的,而InputStreamReader没有。
BufferedReader B_br = null;
FileOutputStream output_fos = null;
OutputStreamWriter output_osw = null;
BufferedWriter output_bw = null;
try {
orders_tbl = new FileInputStream("D:" + File.separator + "bigfile" + File.separator + "A.txt");// FileInputStream
lineitem_tbl = new FileInputStream("D:" + File.separator + "bigfile" + File.separator + "B.txl");// FileInputStream
// 从文件系统中的某个文件中获取字节
A_isr = new InputStreamReader(A);// InputStreamReader 是字节流通向字符流的桥梁
B_isr = new InputStreamReader(B);
A_br = new BufferedReader(A_isr);// 从字符输入流中读取文件中的内容,封装了一个new InputStreamReader的对象
B_br = new BufferedReader(B_isr);
output_fos = new FileOutputStream(// 输出文件位置
new File("D:" + File.separator + "bigfile" + File.separator + "output.txt"));
output_osw = new OutputStreamWriter(output_fos, "UTF-8");
output_bw = new BufferedWriter(output_osw);
String A_line = "";
String B_line ="";
while ((A_line = A_br.readLine()) != null) {
String A_line_1[] = A_line.split("\\|", 2);
while ((B_line = B_br.readLine()) != null) {
String B_line_1[] = B_line.split("\\|", 2);
if(Integer.parseInt(A_line_1[0])==(Integer.parseInt(B_line_1[0]))) {
output_bw.write(A_line + B_line+"\n");
output_bw.flush();
}
if(Integer.parseInt(A_line_1[0])<(Integer.parseInt(B_line_1[0]))) {
break;
}
}
}
} catch (FileNotFoundException e) {
System.out.println("找不到指定文件");
} catch (IOException e) {
System.out.println("读取文件失败");
} finally {
try {
// 注意关闭的先后顺序,先打开的后关闭,后打开的先关闭
output_bw.close();
output_osw.close();
output_fos.close();
A_br.close();
B_br.close();
A_isr.close();
B_isr.close();
A.close();
B.close();
// 关闭的时候最好按照先后顺序关闭最后开的先关闭所以先关s,再关n,最后关m
} catch (IOException e) {
e.printStackTrace();
}
}
}
问题:B.txt读取一行没有问题,但是A.txt读取完成后,B.txt读取一行,A.txt已经读取完成,无法重复读取
所以求方法。(A.txt放进内存,重复读取)
方案二
投机取巧型
A.txt有序
先读取A.txt一行,再扫描B.txt,当主键相等时进行连接到同一行,不相等时候终止循环
public static void main(String[] args) throws IOException {
FileInputStream A = null;
FileInputStream B = null;
InputStreamReader A_isr = null;
InputStreamReader B_isr = null;
BufferedReader A_br = null; // 用于包装InputStreamReader,提高处理性能。因为BufferedReader有缓冲的,而InputStreamReader没有。
BufferedReader B_br = null;
FileOutputStream output_fos = null;
OutputStreamWriter output_osw = null;
BufferedWriter output_bw = null;
try {
orders_tbl = new FileInputStream("D:" + File.separator + "bigfile" + File.separator + "A.txt");// FileInputStream
lineitem_tbl = new FileInputStream("D:" + File.separator + "bigfile" + File.separator+ "B.txt");// FileInputStream
// 从文件系统中的某个文件中获取字节
A_isr = new InputStreamReader(A);// InputStreamReader 是字节流通向字符流的桥梁
B_isr = new InputStreamReader(B);
A_br = new BufferedReader(A_isr);// 从字符输入流中读取文件中的内容,封装了一个new InputStreamReader的对象
B_br = new BufferedReader(B_isr);
output_fos = new FileOutputStream(// 输出文件位置
new File("D:" + File.separator + "bigfile" + File.separator + "output.txt"));
output_osw = new OutputStreamWriter(output_fos, "UTF-8");
output_bw = new BufferedWriter(output_osw);
String A_line = "";
String B_line ="";
while ((A_line = A_br.readLine()) != null) {
while ((B_line = B_br.readLine()) != null) {
String A_line_1[] = A_line.split("\\|", 2);
String B_line_1[] = B_line.split("\\|", 2);
if (Integer.parseInt(A_line_1[0]) < Integer.parseInt(B_line_1[0])) {
break;
}
if (Integer.parseInt(A_line_1[0]) == Integer.parseInt(B_line_1[0])) {
output_bw.write(A_line + B_line+"\n");
output_bw.flush();
}
}
}
} catch (FileNotFoundException e) {
System.out.println("找不到指定文件");
} catch (IOException e) {
System.out.println("读取文件失败");
} finally {
try {
// 注意关闭的先后顺序,先打开的后关闭,后打开的先关闭
output_bw.close();
output_osw.close();
output_fos.close();
A_br.close();
B_br.close();
A_isr.close();
B_isr.close();
A.close();
B.close();
// 关闭的时候最好按照先后顺序关闭最后开的先关闭
} catch (IOException e) {
e.printStackTrace();
}
}
}
问题:A读取一行没有问题,B读取一行,当主键相等是连接写文件,但是需要读取下一行数据才能判断不相等,会造成数据丢失。