import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TQ {
public static void main(String[] args) {
String myRegex1="[0-9.,]+([\\s]?)[%|元|万元|亿元|美元|英镑|千股|万股|千万股|亿股|米|千米|万千米|里|公里|千里|万千里|斤|千克|吨|列|个|项|件|马力|家|辆|次|户|人次|百人次|千人次|万人次]+";//金额
String myRegex2="([,。;?!])";
String myRegex3="([0-9.,]+$)+";
//String myRegex4="[^(元|万元|亿元|美元|英镑|千股|万股|千万股|亿股|米|千米|万千米|里|公里|千里|万千里|斤|千克|吨|列|个|项|件|马力|家|辆|次|户|人次|百人次|千人次|万人次)..]";
//String myRegex3="[元|万元|亿元|美元|英镑|千股|万股|千万股|亿股|米|千米|万千米|里|公里|千里|万千里|斤|千克|吨|列|个|项|件|马力|家|辆|次|户|人次|百人次|千人次|万人次]+";
File inFile = new File("G:/Users/liuyang/Desktop/PDF2TXT//中车 【601766.SH】.txt"); // 读取的txt文件
File outFile = new File("F://TQ.txt");
String inString = "";
String nextString = "";
java.util.regex.Pattern p1=java.util.regex.Pattern.compile(myRegex1);
java.util.regex.Pattern p2=java.util.regex.Pattern.compile(myRegex2);
// java.util.regex.Pattern p3=java.util.regex.Pattern.compile(myRegex3);
//java.util.regex.Pattern p4=java.util.regex.Pattern.compile(myRegex4);
try {
BufferedReader reader = new BufferedReader(new FileReader(inFile));//
BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
while((inString=reader.readLine())!=null){
Pattern pattern = Pattern.compile(myRegex3);
Matcher matcher = pattern.matcher(inString);
if(matcher.find()){
nextString=reader.readLine();
String ljString=inString+nextString;
java.util.regex.Matcher m1=p1.matcher(ljString);
while(m1.find()){
String matchedText=m1.group();
//System.out.println(matchedText);
String tmpString=ljString+"$";
String zhString=tmpString.substring(tmpString.indexOf(matchedText),tmpString.indexOf("$"));
String Text=matchedText.replace(",", "");
zhString=zhString.replace(matchedText, Text);
java.util.regex.Matcher m11=p2.matcher(zhString);
if(m11.find()){
zhString=zhString.replace("!", "#").replace(",", "#").replace("。", "#").replace("?", "#").replace(";", "#");
String zqString=ljString.substring(0,ljString.indexOf(matchedText));
java.util.regex.Matcher m111=p2.matcher(zqString);
if(m111.find()){
zqString=zqString.replace("!", "@").replace(",", "@").replace("。", "@").replace("?", "@").replace(";", "@");
ljString=zqString+zhString;
String GJC=ljString.substring(ljString.indexOf("@")+1,ljString.indexOf("#"));
GJC=GJC.replace("@", "");
System.out.println(GJC+"(ljGJC)");
writer.write(GJC);
writer.newLine();
}//m111
}//m11
}//m1
}//if(inString以数字结尾)
else{
java.util.regex.Matcher m1=p1.matcher(inString);
while(m1.find()){
String matchedText=m1.group();
String tmpString=inString+"$";
String zhString=tmpString.substring(tmpString.indexOf(matchedText),tmpString.indexOf("$"));
String Text=matchedText.replace(",", "");
zhString=zhString.replace(matchedText, Text);
java.util.regex.Matcher m2=p2.matcher(zhString);
if(m2.find()){
zhString=zhString.replace("!", "#").replace(",", "#").replace("。", "#").replace("?", "#").replace(";", "#");
String zqString=inString.substring(0,inString.indexOf(matchedText));
java.util.regex.Matcher m3=p2.matcher(zqString);
if(m3.find()){
zqString=zqString.replace("!", "@").replace(",", "@").replace("。", "@").replace("?", "@").replace(";", "@");
inString=zqString+zhString;
String GJC=inString.substring(inString.indexOf("@")+1,inString.indexOf("#"));
GJC=GJC.replace("@", "");
System.out.println(GJC+"(GJC)");
writer.write(GJC);
writer.newLine();
}//m3
else{
zqString="@"+zqString;
inString=zqString+zhString;
String GJC=inString.substring(inString.indexOf("@")+1,inString.indexOf("#"));
GJC=GJC.replace("@", "");
System.out.println(GJC+"(GJC)");
writer.write(GJC);
writer.newLine();
}
}//m2
else{
nextString=reader.readLine();
String ljString=zhString+nextString;
java.util.regex.Matcher m21=p1.matcher(ljString);
while(m21.find()){
String matchedText21=m21.group();
String tmpString21=ljString+"$";
String zhString21=tmpString21.substring(tmpString21.indexOf(matchedText21),tmpString21.indexOf("$"));
String Text21=matchedText21.replace(",", "");
zhString21=zhString21.replace(matchedText21, Text21);
java.util.regex.Matcher m211=p2.matcher(zhString21);
if(m211.find()){
zhString21=zhString21.replace("!", "#").replace(",", "#").replace("。", "#").replace("?", "#").replace(";", "#");
String zqString21=ljString.substring(0,ljString.indexOf(matchedText21));
java.util.regex.Matcher m2111=p2.matcher(zqString21);
if(m2111.find()){
zqString21=zqString21.replace("!", "@").replace(",", "@").replace("。", "@").replace("?", "@").replace(";", "@");
ljString=zqString21+zhString21;
String GJC21=ljString.substring(ljString.indexOf("@")+1,ljString.indexOf("#"));
GJC21=GJC21.replace("@", "");
System.out.println(GJC21+"(GJC21)");
writer.write(GJC21);
writer.newLine();
}
else{
zqString21="@"+zqString21;
ljString=zqString21+zhString21;
String GJC21=ljString.substring(ljString.indexOf("@")+1,ljString.indexOf("#"));
GJC21=GJC21.replace("@", "");
System.out.println(GJC21+"(GJC21)");
writer.write(GJC21);
writer.newLine();
}
}
}
}
}//m1
//System.out.println("字符串sa不是以数字结尾的");
}//else(inString不以数字结尾)
}//while(总)
reader.close();
writer.close();
}
catch (FileNotFoundException ex) {
System.out.println("没找到文件!");
} catch (IOException ex) {
System.out.println("读写文件出错!");
}
}
}