Java-Hadoop : Create a file in HDFS programmatically and write data into it

Here is a java program with its pom file which lets you to create a file in HDFS, write data into it. The pom file lets you create 2 jar files, one which has all the dependencies included in it.

createFileHdfs.java

package com.abc.javahadoop;

import java.io.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class createFileHdfs {

static String newFolder;

public static void main(String args[]) throws IOException
{

newFolder = args[0];
Path newFolderPath= new Path(newFolder);

FileSystem hdfs =FileSystem.get(new Configuration());
Path homeDir=hdfs.getHomeDirectory();
System.out.println(“Home folder -” +homeDir);

if(hdfs.exists(newFolderPath))
{
hdfs.delete(newFolderPath, true);
}

hdfs.mkdirs(newFolderPath);

Path newFilePath=new Path(newFolder+”/newFile.txt”);
//hdfs.createNewFile(newFilePath);

StringBuilder sb=new StringBuilder();
for(int i=1;i<=5;i++)
{
sb.append(“Data”);
sb.append(i);
sb.append(“\n”);
}
byte[] byt=sb.toString().getBytes();
FSDataOutputStream fsOutStream = hdfs.create(newFilePath);
fsOutStream.write(byt);

fsOutStream.close();

}

}


POM.xml

<project xmlns=”http://maven.apache.org/POM/4.0.0&#8243; xmlns:xsi=”http://www.w3.org/2001/XMLSchema-instance&#8221;
xsi:schemaLocation=”http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd”&gt;
<modelVersion>4.0.0</modelVersion>

<groupId>com.abc</groupId>
<artifactId>javahadoop</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>

<name>javahadoop</name>
<url>http://maven.apache.org</url&gt;

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.0</version>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.3</version>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<!– Additional configuration. –>
<artifactSet>
<excludes>
<exclude>org.apache.hadoop:jar</exclude>
</excludes>
</artifactSet>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation=”org.apache.maven.plugins.shade.resource.ManifestResourceTransformer”>
<manifestEntries>
<Main-Class>com/abc/javahadoop/createFileHdfs</Main-Class>
<Build-Number>123</Build-Number>
</manifestEntries>
</transformer>
</transformers>
<finalName>uber-${artifactId}-${version}</finalName>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>

Advertisements

About shalishvj : My Experience with BigData

6+ years of experience using Bigdata technologies in Architect, Developer and Administrator roles for various clients. • Experience using Hortonworks, Cloudera, AWS distributions. • Cloudera Certified Developer for Hadoop. • Cloudera Certified Administrator for Hadoop. • Spark Certification from Big Data Spark Foundations. • SCJP, OCWCD. • Experience in setting up Hadoop clusters in PROD, DR, UAT , DEV environments.
This entry was posted in Java-Maven-Hadoop and tagged , , , . Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s