实惨,为了用java读取超大csv文件
可以读一读小文件,但是我要弄的文件是接近10G的
package text;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
//import text1.InputStreamReader;
/**
*读取csv文件 没有问题,但是无法读取大文件
The constructor InputStreamReader(BufferedReader, String) is undefined
*/
public class a{
public static void main(String[] args) {
try {
BufferedReader reader = new BufferedReader(new FileReader("D:\\BaiduNetdiskDownload\\1111.csv"));//换成你的文件名
//BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),5*1024*1024);
//reader.readLine();//第一行信息,为标题信息,不用,如果需要,注释掉
String line = null;
while((line=reader.readLine())!=null){
String item[] = line.split(",");//CSV格式文件为逗号分隔符文件,这里根据逗号切分
String first = item[item.length-3];
String mid = item[item.length-2];
String last = item[item.length-1];//这就是你要的数据了
//int value = Integer.parseInt(last);//如果是数值,可以转化为数值
System.out.println("['"+first+"','"+mid+"','"+last+"']");
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
这里是 InputStreamReader的四个重载,这是原文
https://blog.csdn.net/weixin_34050005/article/details/89654956?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522160511186719725225039658%2522%252C%2522scm%2522%253A%252220140713.130102334…%2522%257D&request_id=160511186719725225039658&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2
all
baidu_landing_v2~default-1-89654956.first_rank_ecpm_v3_pc_rank_v2&utm_term=inputstreamreader%E6%BA%90%E7%A0%81%E5%88%86%E6%9E%90&spm=1018.2118.3001.4449
//创建一个使用默认字符集的InputStreamReader
public InputStreamReader(InputStream in) {
super(in);//Reader的lock是InputStream
try {
sd = StreamDecoder.forInputStreamReader(in, this, (String)null); // ## check lock object
} catch (UnsupportedEncodingException e) {
// The default encoding should always be available默认的字符集总是有效的,所以无参构造不会抛出
throw new Error(e);
}
}
//创建一个使用指定名字字符集的InputStreamReader
public InputStreamReader(InputStream in, String charsetName)
throws UnsupportedEncodingException
{
super(in);
if (charsetName == null)
throw new NullPointerException("charsetName");
sd = StreamDecoder.forInputStreamReader(in, this, charsetName);
}
//创建一个使用给出的字符集的InputStreamReader
public InputStreamReader(InputStream in, Charset cs) {
super(in);
if (cs == null)
throw new NullPointerException("charset");
sd = StreamDecoder.forInputStreamReader(in, this, cs);
}
//创建一个使用给出的字符集解码器的InputStreamReader
public InputStreamReader(InputStream in, CharsetDecoder dec) {
super(in);
if (dec == null)
throw new NullPointerException("charset decoder");
sd = StreamDecoder.forInputStreamReader(in, this, dec);
}
getEncoding
https://www.cnblogs.com/duanxz/p/4874712.html
我看了这篇文章,给出了
一类是使用BufferedReader类读写超大文件;
另一类是使用RandomAccessFile类读取,经过比较,最后使用了前一种方式
BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));
BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),5*1024*1024);
好了,重头戏来了,大家细细品一品,InputStreamReader()能接收的是哪些参数?
InputStreamReader(InputStream in)
InputStreamReader(InputStream in, String charsetName)
InputStreamReader(InputStream in, Charset cs)
public InputStreamReader(InputStream in, CharsetDecoder dec)
这里有BufferedInputStream这个参数类型吗,,,
好的,其实你通篇看下来也只是看了个寂寞,因为没解决,除了你可以用小文件
如果有大佬经过,能够指导一二那就太好了
package text;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
//import text1.InputStreamReader;
public InputStreamReader(InputStream in, String charsetName)
throws UnsupportedEncodingException
{
super(in);
if (charsetName == null)
throw new NullPointerException("charsetName");
sd = StreamDecoder.forInputStreamReader(in, this, charsetName);
}
/**
*读取csv文件 没有问题,但是无法读取大文件
The constructor InputStreamReader(BufferedReader, String) is undefined
*/
public class a{
public static void main(String[] args) {
try {
BufferedReader fis = new BufferedReader(new FileReader("D:\\BaiduNetdiskDownload\\1111.csv"));//换成你的文件名
BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),5*1024*1024);
//reader.readLine();//第一行信息,为标题信息,不用,如果需要,注释掉
String line = null;
while((line=reader.readLine())!=null){
String item[] = line.split(",");//CSV格式文件为逗号分隔符文件,这里根据逗号切分
String first = item[item.length-3];
String mid = item[item.length-2];
String last = item[item.length-1];//这就是你要的数据了
//int value = Integer.parseInt(last);//如果是数值,可以转化为数值
System.out.println("['"+first+"','"+mid+"','"+last+"']");
}
} catch (Exception e) {
e.printStackTrace();
}
}
}