Read Parquet from Amazon S3 using a Temporary File

Updated: Feb 21, 2022
/*
 * Copyright (c) 2006-2022 North Concepts Inc.  All rights reserved.
 * Proprietary and Confidential.  Use is subject to license terms.
 * 
 * https://northconcepts.com/data-pipeline/licensing/
 */
package com.northconcepts.datapipeline.examples.amazons3;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;

import com.northconcepts.datapipeline.amazons3.AmazonS3FileSystem;
import com.northconcepts.datapipeline.core.DataReader;
import com.northconcepts.datapipeline.core.DataWriter;
import com.northconcepts.datapipeline.core.StreamWriter;
import com.northconcepts.datapipeline.job.Job;
import com.northconcepts.datapipeline.parquet.ParquetDataReader;

public class ReadParquetFromAmazonS3UsingATemporaryFile {

    private static final String ACCESS_KEY = "YOUR ACCESS KEY";
    private static final String SECRET_KEY = "YOUR SECRET KEY";

    private static File parquetFile;

    public static void main(String[] args) throws Throwable {
        downloadS3File();
        try {

            DataReader reader = new ParquetDataReader(parquetFile);
            DataWriter writer = new StreamWriter(System.out);

            Job.run(reader, writer);
        } finally {
            parquetFile.delete();
        }
    }

    private static void downloadS3File() throws Throwable {
        AmazonS3FileSystem s3 = new AmazonS3FileSystem();
        try {
            s3.setBasicAWSCredentials(ACCESS_KEY, SECRET_KEY);
            s3.open();

            parquetFile = File.createTempFile("output", ".parquet");
            parquetFile.deleteOnExit();

            InputStream in = s3.readFile("bucket", "input.parquet");
            OutputStream out = new BufferedOutputStream(new FileOutputStream(parquetFile));

            byte[] buffer = new byte[1024];
            int lengthRead;
            while ((lengthRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, lengthRead);
                out.flush();
            }
        } finally {
            s3.close();
        }
    }
}

Mobile Analytics