最近从网上爬下一些音乐网页,用HTMLParser 分析的时候出现了乱码问题,好不容易解决以后,获得了一些音乐信息的txt文件。然而在建立索引的时候,需要把这些txt文件的文件名(由歌手和歌名两部分组成)截取出来写入另一个txt文件,此时竟然出现了乱码问题
结果控制台输出的结果没有任何问题
但是在word.txt文件里中文是乱码的
各种尝试 好像都没奏效。。。
public class BuildWordVacabulary {
private String wordvacabularypath = null; //新的txt地址
private ArrayList wordlist = new ArrayList();
public void loadProductFromDirectory(String[] dirs) throws IOException {
String fullname = null;
try {
if (wordvacabularypath == null) {
throw new IOException(" ");
}
for (int i = 0; i < dirs.length; i++) {
String folder = dirs[i];
File foo = new File(folder);
String[] files = foo.list();
for (int j = 0; j < files.length; j++) {
File f = new File(foo, files[j]);
fullname = f.getName(); //遍历文件,获取所有txt文件的文件名
String type = fullname.substring(fullname.indexOf("-") + 1
,fullname.indexOf(".txt")); //获取歌手
String name = fullname.substring(0, fullname.indexOf("-")); //获取歌名
if (!wordlist.contains(type)) {
wordlist.add(type);
}
if (!wordlist.contains(name)) {
wordlist.add(name);
}
}
}
Collections.sort(wordlist);
writeToFile();
} catch (Exception e) {
System.out.println(fullname);
e.printStackTrace();
}
}
private void writeToFile() throws IOException {
BufferedWriter writer = new BufferedWriter(new FileWriter(
wordvacabularypath));
for (int i = 0; i < wordlist.size(); i++) {
String type = (String)wordlist.get(i);
writer.write(type); //写入
// writer.write(name);
writer.newLine();
System.out.println(type);
}
writer.close();
}
public void setWordvacabularyPath(String wordvacabularypath) {
this.wordvacabularypath = wordvacabularypath;
}
public static void main(String[] args) throws IOException {
BuildWordVacabulary builder = new BuildWordVacabulary();
builder.setWordvacabularyPath("//root//Documents//word.txt"); //新的txt文件
builder.loadProductFromDirectory(new String[] { "//root//Documents//mp3//" }); //遍历的文件
}
}