实惨,为了用java读取超大csv文件,被 InputStreamReader折腾的不行

  • Post author:
  • Post category:java



实惨,为了用java读取超大csv文件


可以读一读小文件,但是我要弄的文件是接近10G的

package text;
import java.io.BufferedReader; 

import java.io.FileReader;
import java.io.IOException;
//import text1.InputStreamReader;    
	/** 
	 *读取csv文件 没有问题,但是无法读取大文件
	 The constructor InputStreamReader(BufferedReader, String) is undefined
	 */  
public class a{

	    public static void main(String[] args) {    
	        try {    
	            BufferedReader reader = new BufferedReader(new FileReader("D:\\BaiduNetdiskDownload\\1111.csv"));//换成你的文件名   
	            //BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),5*1024*1024);
	            //reader.readLine();//第一行信息,为标题信息,不用,如果需要,注释掉   
	            String line = null;    
	            while((line=reader.readLine())!=null){    
	                String item[] = line.split(",");//CSV格式文件为逗号分隔符文件,这里根据逗号切分   
	                String first = item[item.length-3];
	                String mid = item[item.length-2];
	                String last = item[item.length-1];//这就是你要的数据了   
	                //int value = Integer.parseInt(last);//如果是数值,可以转化为数值   
	                System.out.println("['"+first+"','"+mid+"','"+last+"']");    
	            }    
	        } catch (Exception e) {    
	            e.printStackTrace();    
	        }    
	    }    
}

这里是 InputStreamReader的四个重载,这是原文

https://blog.csdn.net/weixin_34050005/article/details/89654956?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522160511186719725225039658%2522%252C%2522scm%2522%253A%252220140713.130102334…%2522%257D&request_id=160511186719725225039658&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2

all

baidu_landing_v2~default-1-89654956.first_rank_ecpm_v3_pc_rank_v2&utm_term=inputstreamreader%E6%BA%90%E7%A0%81%E5%88%86%E6%9E%90&spm=1018.2118.3001.4449

   //创建一个使用默认字符集的InputStreamReader
    public InputStreamReader(InputStream in) {
        super(in);//Reader的lock是InputStream
        try {
            sd = StreamDecoder.forInputStreamReader(in, this, (String)null); // ## check lock object
        } catch (UnsupportedEncodingException e) {
            // The default encoding should always be available默认的字符集总是有效的,所以无参构造不会抛出
            throw new Error(e);
        }
    }
    //创建一个使用指定名字字符集的InputStreamReader
    public InputStreamReader(InputStream in, String charsetName)
        throws UnsupportedEncodingException
    {
        super(in);
        if (charsetName == null)
            throw new NullPointerException("charsetName");
        sd = StreamDecoder.forInputStreamReader(in, this, charsetName);
    }
    //创建一个使用给出的字符集的InputStreamReader
    public InputStreamReader(InputStream in, Charset cs) {
        super(in);
        if (cs == null)
            throw new NullPointerException("charset");
        sd = StreamDecoder.forInputStreamReader(in, this, cs);
    }
    //创建一个使用给出的字符集解码器的InputStreamReader
    public InputStreamReader(InputStream in, CharsetDecoder dec) {
        super(in);
        if (dec == null)
            throw new NullPointerException("charset decoder");
        sd = StreamDecoder.forInputStreamReader(in, this, dec);
    }
getEncoding

https://www.cnblogs.com/duanxz/p/4874712.html

我看了这篇文章,给出了

一类是使用BufferedReader类读写超大文件;

另一类是使用RandomAccessFile类读取,经过比较,最后使用了前一种方式

BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));    
BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),5*1024*1024);

好了,重头戏来了,大家细细品一品,InputStreamReader()能接收的是哪些参数?

InputStreamReader(InputStream in)

InputStreamReader(InputStream in, String charsetName)

InputStreamReader(InputStream in, Charset cs)

public InputStreamReader(InputStream in, CharsetDecoder dec)

这里有BufferedInputStream这个参数类型吗,,,

好的,其实你通篇看下来也只是看了个寂寞,因为没解决,除了你可以用小文件

如果有大佬经过,能够指导一二那就太好了

package text;
import java.io.BufferedReader; 


import java.io.FileReader;
import java.io.IOException;

//import text1.InputStreamReader;    
public InputStreamReader(InputStream in, String charsetName)
        throws UnsupportedEncodingException
    {
        super(in);
        if (charsetName == null)
            throw new NullPointerException("charsetName");
        sd = StreamDecoder.forInputStreamReader(in, this, charsetName);
    }

	/** 
	 *读取csv文件 没有问题,但是无法读取大文件
	 The constructor InputStreamReader(BufferedReader, String) is undefined
	 */  
public class a{

	    public static void main(String[] args) {    
	        try {    
	            BufferedReader fis = new BufferedReader(new FileReader("D:\\BaiduNetdiskDownload\\1111.csv"));//换成你的文件名   
	            BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),5*1024*1024);
	            //reader.readLine();//第一行信息,为标题信息,不用,如果需要,注释掉   
	            String line = null;    
	            while((line=reader.readLine())!=null){    
	                String item[] = line.split(",");//CSV格式文件为逗号分隔符文件,这里根据逗号切分   
	                String first = item[item.length-3];
	                String mid = item[item.length-2];
	                String last = item[item.length-1];//这就是你要的数据了   
	                //int value = Integer.parseInt(last);//如果是数值,可以转化为数值   
	                System.out.println("['"+first+"','"+mid+"','"+last+"']");    
	            }    
	        } catch (Exception e) {    
	            e.printStackTrace();    
	        }    
	    }    
}



版权声明:本文为weixin_45031830原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。