Write a Parquet file using schema from database query
Updated: Jan 24, 2023
package com.northconcepts.datapipeline.examples.parquet; import java.io.File; import java.sql.Connection; import com.northconcepts.datapipeline.core.DataReader; import com.northconcepts.datapipeline.core.StreamWriter; import com.northconcepts.datapipeline.examples.database.DB; import com.northconcepts.datapipeline.jdbc.JdbcReader; import com.northconcepts.datapipeline.job.Job; import com.northconcepts.datapipeline.parquet.ParquetDataReader; import com.northconcepts.datapipeline.parquet.ParquetDataWriter; public class WriteAParquetFileUsingSchemaFromDatabaseQuery { private static final String PARQUET_FILE = "example/data/output/WriteAParquetFileUsingSchemaFromDatabaseQuery.parquet"; public static void main(String[] args) throws Throwable { DB db = new DB(); // creates HSQL DB Connection connection = db.getConnection(); db.executeFile(new File("example/data/input/user_information.sql")); DataReader reader = new JdbcReader(connection, "SELECT * FROM user").setAutoCloseConnection(true); ParquetDataWriter writer = new ParquetDataWriter(new File(PARQUET_FILE)); // Set Parquet schema using a query // The query returns zero records as 1<0 condition is always false. writer.setSchema(connection, "SELECT * FROM user where 1<0"); // Set Parquet schema using a query with parameters // Optimized query returns only one record for give user_id. //writer.setSchema(connection, "SELECT * FROM user WHERE user_id=?", 1); // Set Parquet schema using a query with parameters & JdbcValueReader (default is OPINIONATED) // Optimized query retruns only limited records. // writer.setSchema(connection, JdbcValueReader.STRICT, "SELECT * FROM user WHERE user_role_id=?", 1); System.out.println("===================Generated Parquet Schema========================"); System.out.println(writer.getSchema()); System.out.println("==================================================================="); Job.run(reader, writer); System.out.println("=======================Reading Parquet File============================================"); reader = new ParquetDataReader(new File(PARQUET_FILE)); Job.run(reader, new StreamWriter(System.out)); } }