SlideShare a Scribd company logo
M.C. Kang
Spring data iii
   Hello World Using Spring for Apache Hadoop

Declaring a Hadoop job using Spring’s Hadoop namespace
<configuration>
           fs.default.name=hdfs://localhost:9000
</configuration>
<job id="wordcountJob"
           input-path="/user/gutenberg/input"
           output-path="/user/gutenberg/output"
           mapper="org.apache.hadoop.examples.WordCount.TokenizerMapper"
           reducer="org.apache.hadoop.examples.WordCount.IntSumReducer"/>
           <job-runner id="runner" job="wordcountJob" run-at-startup="true"/>

This configuration will create a singleton instance of an org.apache.hadoop.mapreduce.Job managed by the
Spring container.
Spring can determine that outputKeyClass is of the type org.apache.hadoop.io.Text and that
outputValueClass is of type org.apache.hadoop.io.IntWritable, so we do not need to set these
properties explicitly.

public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{
      private final static IntWritable one = new IntWritable(1);
      private Text word = new Text();
      public void map(Object key, Text value, Context context)
      throws IOException, InterruptedException {
            StringTokenizer itr = new StringTokenizer(value.toString());
            while (itr.hasMoreTokens()) {
                  word.set(itr.nextToken());
                  context.write(word, one);
            }
      }
}
   Hello World Using Spring for Apache Hadoop
public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
      private IntWritable result = new IntWritable();
      public void reduce(Text key, Iterable<IntWritable> values, Context context)
      throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable val : values) {
                       sum += val.get();
            }
            result.set(sum);
            context.write(key, result);
      }
}


public class Main {
      private static final String[] CONFIGS = new String[] {"META-INF/spring/hadoop-
      context.xml" };

      public static void main(String[] args) {
            String[] res = (args != null && args.length > 0 ? args : CONFIGS);
            AbstractApplicationContext ctx = new ClassPathXmlApplicationContext(res);
            // shut down the context cleanly along with the VM
            ctx.registerShutdownHook();
      }
}
   Externalize the configuration parameters of the application
<context:property-placeholder location="hadoop-default.properties"/>
<configuration>
      fs.default.name=${hd.fs}
</configuration>
<job id="wordcountJob"
      input-path="${wordcount.input.path}"
      output-path="${wordcount.output.path}"
      mapper="org.apache.hadoop.examples.WordCount.TokenizerMapper"
      reducer="org.apache.hadoop.examples.WordCount.IntSumReducer"/>
<job-runner id="runner" job="wordcountJob" run-at-startup="true"/>

hd.fs=hdfs://localhost:9000
wordcount.input.path=/user/gutenberg/input/
wordcount.output.path=/user/gutenberg/output/


<context:property-placeholder location="hadoop-${ENV:default}.properties"/>
   Scripting HDFS on the JVM – Type1
<context:property-placeholder location="hadoop.properties"/>
<configuration>
      fs.default.name=${hd.fs}
</configuration>
<script id="setupScript" location="copy-files.groovy">
      <property name="localSourceFile" value="${localSourceFile}"/>
      <property name=“hdfsInputDir" value="${hdfsInputDir}"/>
      <property name=“hdfsOutputDir" value="${hdfsOutputDir}"/>
</script>



       Groovy Script
if (!fsh.test(hdfsInputDir)) {
      fsh.mkdir(hdfsInputDir);
      fsh.copyFromLocal(localSourceFile, hdfsInputDir);
      fsh.chmod(700, hdfsInputDir)
}
if (fsh.test(hdfsOutputDir)) {
      fsh.rmr(hdfsOutputDir)
}
   Combining HDFS Scripting and Job Submission
<context:property-placeholder location="hadoop.properties"/>

<configuration>
      fs.default.name=${hd.fs}
</configuration>

<job id="wordcountJob"
      input-path="${wordcount.input.path}"
      output-path="${wordcount.output.path}"
      mapper="org.apache.hadoop.examples.WordCount.TokenizerMapper"
      reducer="org.apache.hadoop.examples.WordCount.IntSumReducer"/>

<script id="setupScript" location="copy-files.groovy">
       <property name="localSourceFile" value="${localSourceFile}"/>
       <property name="inputDir" value="${wordcount.input.path}"/>
       <property name="outputDir" value="${wordcount.output.path}"/>
</script>

<job-runner id="runner" run-at-startup="true"
      pre-action="setupScript"
      job="wordcountJob"/>
   Configuring the JobRunner to execute multiple HDFS scripts and jobs
<job-runner id="runner"
      pre-action="setupScript1,setupScript"
      job="wordcountJob1,wordcountJob2"
      post-action="cleanupScript1,cleanupScript2"/>
   Scheduling MapReduce Jobs with a TaskScheduler
<!-- job definition as before -->
<job id="wordcountJob" ... />
<!-- script definition as before -->
<script id="setupScript" ... />

<job-runner id="runner" pre-action="setupScript" job="wordcountJob"/>

<task:scheduled-tasks>
      <task:scheduled ref="runner" method="call" cron="3/30 * * * * ?"/>
</task:scheduled-tasks>



   Scheduling MapReduce Jobs with Quartz
<bean id="jobDetail"
      class="org.springframework.scheduling.quartz.MethodInvokingJobDetailFactoryBean">
      <property name="targetObject" ref="runner"/>
      <property name="targetMethod" value="run"/>
</bean>
<bean id="cronTrigger" class="org.springframework.scheduling.quartz.CronTriggerBean">
      <property name="jobDetail" ref="jobDetail"/>
      <property name="cronExpression" value="3/30 * * * * ?"/>
</bean>
<bean class="org.springframework.scheduling.quartz.SchedulerFactoryBean">
      <property name="triggers" ref="cronTrigger"/>
</bean>
Spring data iii
   Creating and configuring a Hive server
<context:property-placeholder location="hadoop.properties,hive.properties" />
<configuration id="hadoopConfiguration">
       fs.default.name=${hd.fs}
       mapred.job.tracker=${mapred.job.tracker}
</configuration>
<hive-server port="${hive.port}" auto-startup="false"
       configuration-ref="hadoopConfiguration"
       properties-location="hive-server.properties">
       hive.exec.scratchdir=/tmp/hive/
</hive-server>



   Hive Thrift Client
since the HiveClient is not a thread-safe class, so a new instance needs to be created inside
methods that are shared across multiple threads.

<hive-client-factory host="${hive.host}" port="${hive.port}"/>
   Hive Thrift Client Sample
@Repository
public class HivePasswordRepository implements PasswordRepository {
      private static final Log logger = LogFactory.getLog(HivePasswordRepository.class);
      private HiveClientFactory hiveClientFactory;
      private String tableName;
      // constructor and setters omitted
      @Override
      public Long count() {
            HiveClient hiveClient = hiveClientFactory.getHiveClient();
            try {
                   hiveClient.execute("select count(*) from " + tableName);
                   return Long.parseLong(hiveClient.fetchOne());
            // checked exceptions
            } catch (HiveServerException ex) {
                   throw translateExcpetion(ex);
            } catch (org.apache.thrift.TException tex) {
                   throw translateExcpetion(tex);
            } finally {
                   try {
                          hiveClient.shutdown();
                   } catch (org.apache.thrift.TException tex) {
                          logger.debug("Unexpected exception on shutting down HiveClient", tex);
                   }
            }
      }
      …
   Hive JDBC Client
The JDBC support for Hive lets you use your existing Spring knowledge of JdbcTemplate to
interact with Hive. Hive provides a HiveDriver class.

<bean id="hiveDriver" class="org.apache.hadoop.hive.jdbc.HiveDriver" />
<bean id="dataSource" class="org.springframework.jdbc.datasource.SimpleDriverDataSource">
      <constructor-arg name="driver" ref="hiveDriver" />
      <constructor-arg name="url" value="${hive.url}"/>
</bean>
<bean id="jdbcTemplate" class="org.springframework.jdbc.core.simple.JdbcTemplate">
      <constructor-arg ref="dataSource" />
</bean>


   Hive JDBC Client Sample
@Repository
public class JdbcPasswordRepository implements PasswordRepository {
      private JdbcOperations jdbcOperations;
      private String tableName;
      // constructor and setters omitted
      @Override
      public Long count() {
            return jdbcOperations.queryForLong("select count(*) from " + tableName);
      }
      …
   Hive Script Runner
<context:property-placeholder location="hadoop.properties,hive.properties"/>

<configuration>
      fs.default.name=${hd.fs}
      mapred.job.tracker=${mapred.job.tracker}
</configuration>

<hive-server port="${hive.port}"
      properties-location="hive-server.properties"/>

<hive-client-factory host="${hive.host}" port="${hive.port}"/>

<hive-runner id="hiveRunner" run-at-startup="false" >
       <script location="apache-log-simple.hql">
              <arguments>
                     hiveContribJar=${hiveContribJar}
                     localInPath="./data/apache.log"
              </arguments>
       </script>
</hive-runner>
Spring data iii
   Creating and configuring a Pig server
<context:property-placeholder location="hadoop.properties" />
<configuration>
      fs.default.name=${hd.fs}
      mapred.job.tracker=${mapred.job.tracker}
</configuration>
<pig-factory properties-location="pig-server.properties"
      <script location="initialization.pig">
            <arguments>
                  inputFile=${initInputFile}
            </arguments>
      </script>
<pig-factory/>


   Pig Runner
PigRunner helper class to provide a convenient way to repeatedly execute Pig jobs and also
execute HDFS scripts before and after their execution.

<pig-runner id="pigRunner"
      pre-action="hdfsScript"
      run-at-startup="true" >
      <script location="password-analysis.pig">
            <arguments>
                  inputDir=${inputDir}
                  outputDir=${outputDir}
            </arguments>
      </script>
</pig-runner>
   Pig Template
<context:property-placeholder location="hadoop.properties" />
<configuration>
      fs.default.name=${hd.fs}
      mapred.job.tracker=${mapred.job.tracker}
</configuration>
<pig-factory properties-location="pig-server.properties"
      <script location="initialization.pig">
            <arguments>
                  inputFile=${initInputFile}
            </arguments>
      </script>
<pig-factory/>


   Pig Runner
PigRunner helper class to provide a convenient way to repeatedly execute Pig jobs and also
execute HDFS scripts before and after their execution.

<pig-runner id="pigRunner"
      pre-action="hdfsScript"
      run-at-startup="true" >
      <script location="password-analysis.pig">
            <arguments>
                  inputDir=${inputDir}
                  outputDir=${outputDir}
            </arguments>
      </script>
</pig-runner>
   Pig Runner Example
@Component
public class AnalysisService {
      private PigRunner pigRunner;
      @Autowired
      public AnalysisService(PigRunner pigRunner)
             this.pigRunner = pigRunner;
      }
      @Async
      public void performAnalysis() {
             pigRunner.call();
      }
}
   Controlling Runtime Script Execution
To have more runtime control over what Pig scripts are executed and the arguments passed into
them, we can use the PigTemplate class.

<pig-factory properties-location="pig-server.properties"/>
<pig-template/>
<beans:bean id="passwordRepository“
class="com.oreilly.springdata.hadoop.pig.PigPasswordRepository">
           <beans:constructor-arg ref="pigTemplate"/>
</beans:bean>

public class PigPasswordRepository implements PasswordRepository {
      private PigOperations pigOperations;
      private String pigScript = "classpath:password-analysis.pig";
      public void processPasswordFile(String inputFile) {
             Assert.notNull(inputFile);
             String outputDir =
             PathUtils.format("/data/password-repo/output/%1$tY/%1$tm/%1$td/%1$tH/%1$tM/
             %1$tS");
             Properties scriptParameters = new Properties();
             scriptParameters.put("inputDir", inputFile);
             scriptParameters.put("outputDir", outputDir);
             pigOperations.executeScript(pigScript, scriptParameters);
      }
      @Override
      public void processPasswordFiles(Collection<String> inputFiles) {
             for (String inputFile : inputFiles) {
                   processPasswordFile(inputFile);
             }
      }
}
Spring data iii
   HBASE Java Client w/o Spring Data
The HTable class is the main way in Java to interact with HBase. It allows you to put data into a
table using a Put class, get data by key using a Get class, and delete data using a Delete
class.
You query that data using a Scan class, which lets you specify key ranges as well as filter
criteria.

Configuration configuration = new Configuration(); // Hadoop configuration object
HTable table = new HTable(configuration, "users");
Put p = new Put(Bytes.toBytes("user1"));
p.add(Bytes.toBytes("cfInfo"), Bytes.toBytes("qUser"), Bytes.toBytes("user1"));
p.add(Bytes.toBytes("cfInfo"), Bytes.toBytes("qEmail"), Bytes.toBytes("user1@yahoo.com"));
p.add(Bytes.toBytes("cfInfo"), Bytes.toBytes("qPassword"), Bytes.toBytes("user1pwd"));
table.put(p);
   HBASE Client w/ Spring Data HBaseTemplate
The HBase API requires that you work with the data as byte arrays and not other primitive types.
The HTable class is also not thread safe, and requires you to carefully manage the underlying
resources it uses and catch HBase-specific exceptions.
Spring’s HBaseTemplate class provides a higher-level abstraction for interacting with HBase.
As with other Spring template classes, it is thread-safe once created and provides exception
translation into Spring’s portable data access exception hierarchy.

<configuration>
fs.default.name=hdfs://localhost:9000
</configuration>
<hbase-configuration configuration-ref="hadoopConfiguration" />
<beans:bean id="hbaseTemplate" class="org.springframework.data.hadoop.hbase.HbaseTemplate">
<beans:property name="configuration" ref="hbaseConfiguration" />
</beans:bean>
   HBASE Client w/ Spring Data HBaseTemplate
Ad

More Related Content

What's hot (20)

Burn down the silos! Helping dev and ops gel on high availability websites
Burn down the silos! Helping dev and ops gel on high availability websitesBurn down the silos! Helping dev and ops gel on high availability websites
Burn down the silos! Helping dev and ops gel on high availability websites
Lindsay Holmwood
 
[Srijan Wednesday Webinars] Ruling Drupal 8 with #d8rules
[Srijan Wednesday Webinars] Ruling Drupal 8 with #d8rules[Srijan Wednesday Webinars] Ruling Drupal 8 with #d8rules
[Srijan Wednesday Webinars] Ruling Drupal 8 with #d8rules
Srijan Technologies
 
Creating Reusable Puppet Profiles
Creating Reusable Puppet ProfilesCreating Reusable Puppet Profiles
Creating Reusable Puppet Profiles
Bram Vogelaar
 
Zend framework service
Zend framework serviceZend framework service
Zend framework service
Michelangelo van Dam
 
Fatc
FatcFatc
Fatc
Wade Arnold
 
Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014
Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014
Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014
Cliff Seal
 
Integrating icinga2 and the HashiCorp suite
Integrating icinga2 and the HashiCorp suiteIntegrating icinga2 and the HashiCorp suite
Integrating icinga2 and the HashiCorp suite
Bram Vogelaar
 
The effective use of Django ORM
The effective use of Django ORMThe effective use of Django ORM
The effective use of Django ORM
Yaroslav Muravskyi
 
Drupal 8: Fields reborn
Drupal 8: Fields rebornDrupal 8: Fields reborn
Drupal 8: Fields reborn
Pablo López Escobés
 
Apache Hive Hook
Apache Hive HookApache Hive Hook
Apache Hive Hook
Minwoo Kim
 
Augeas @RMLL 2012
Augeas @RMLL 2012Augeas @RMLL 2012
Augeas @RMLL 2012
Raphaël PINSON
 
What's new in jQuery 1.5
What's new in jQuery 1.5What's new in jQuery 1.5
What's new in jQuery 1.5
Martin Kleppe
 
Read, store and create xml and json
Read, store and create xml and jsonRead, store and create xml and json
Read, store and create xml and json
Kim Berg Hansen
 
The IoC Hydra - Dutch PHP Conference 2016
The IoC Hydra - Dutch PHP Conference 2016The IoC Hydra - Dutch PHP Conference 2016
The IoC Hydra - Dutch PHP Conference 2016
Kacper Gunia
 
Observability with Consul Connect
Observability with Consul ConnectObservability with Consul Connect
Observability with Consul Connect
Bram Vogelaar
 
Php tips-and-tricks4128
Php tips-and-tricks4128Php tips-and-tricks4128
Php tips-and-tricks4128
PrinceGuru MS
 
Scaling Symfony2 apps with RabbitMQ - Symfony UK Meetup
Scaling Symfony2 apps with RabbitMQ - Symfony UK MeetupScaling Symfony2 apps with RabbitMQ - Symfony UK Meetup
Scaling Symfony2 apps with RabbitMQ - Symfony UK Meetup
Kacper Gunia
 
A Little Backbone For Your App
A Little Backbone For Your AppA Little Backbone For Your App
A Little Backbone For Your App
Luca Mearelli
 
Having Fun with Play
Having Fun with PlayHaving Fun with Play
Having Fun with Play
Clinton Dreisbach
 
Electrify your code with PHP Generators
Electrify your code with PHP GeneratorsElectrify your code with PHP Generators
Electrify your code with PHP Generators
Mark Baker
 
Burn down the silos! Helping dev and ops gel on high availability websites
Burn down the silos! Helping dev and ops gel on high availability websitesBurn down the silos! Helping dev and ops gel on high availability websites
Burn down the silos! Helping dev and ops gel on high availability websites
Lindsay Holmwood
 
[Srijan Wednesday Webinars] Ruling Drupal 8 with #d8rules
[Srijan Wednesday Webinars] Ruling Drupal 8 with #d8rules[Srijan Wednesday Webinars] Ruling Drupal 8 with #d8rules
[Srijan Wednesday Webinars] Ruling Drupal 8 with #d8rules
Srijan Technologies
 
Creating Reusable Puppet Profiles
Creating Reusable Puppet ProfilesCreating Reusable Puppet Profiles
Creating Reusable Puppet Profiles
Bram Vogelaar
 
Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014
Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014
Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014
Cliff Seal
 
Integrating icinga2 and the HashiCorp suite
Integrating icinga2 and the HashiCorp suiteIntegrating icinga2 and the HashiCorp suite
Integrating icinga2 and the HashiCorp suite
Bram Vogelaar
 
The effective use of Django ORM
The effective use of Django ORMThe effective use of Django ORM
The effective use of Django ORM
Yaroslav Muravskyi
 
Apache Hive Hook
Apache Hive HookApache Hive Hook
Apache Hive Hook
Minwoo Kim
 
What's new in jQuery 1.5
What's new in jQuery 1.5What's new in jQuery 1.5
What's new in jQuery 1.5
Martin Kleppe
 
Read, store and create xml and json
Read, store and create xml and jsonRead, store and create xml and json
Read, store and create xml and json
Kim Berg Hansen
 
The IoC Hydra - Dutch PHP Conference 2016
The IoC Hydra - Dutch PHP Conference 2016The IoC Hydra - Dutch PHP Conference 2016
The IoC Hydra - Dutch PHP Conference 2016
Kacper Gunia
 
Observability with Consul Connect
Observability with Consul ConnectObservability with Consul Connect
Observability with Consul Connect
Bram Vogelaar
 
Php tips-and-tricks4128
Php tips-and-tricks4128Php tips-and-tricks4128
Php tips-and-tricks4128
PrinceGuru MS
 
Scaling Symfony2 apps with RabbitMQ - Symfony UK Meetup
Scaling Symfony2 apps with RabbitMQ - Symfony UK MeetupScaling Symfony2 apps with RabbitMQ - Symfony UK Meetup
Scaling Symfony2 apps with RabbitMQ - Symfony UK Meetup
Kacper Gunia
 
A Little Backbone For Your App
A Little Backbone For Your AppA Little Backbone For Your App
A Little Backbone For Your App
Luca Mearelli
 
Electrify your code with PHP Generators
Electrify your code with PHP GeneratorsElectrify your code with PHP Generators
Electrify your code with PHP Generators
Mark Baker
 

Viewers also liked (6)

Spring data ii
Spring data iiSpring data ii
Spring data ii
명철 강
 
Spring data
Spring dataSpring data
Spring data
명철 강
 
Managementcontrol Brandweer Steller André Maranus.2
Managementcontrol Brandweer Steller André Maranus.2Managementcontrol Brandweer Steller André Maranus.2
Managementcontrol Brandweer Steller André Maranus.2
awmaranus
 
Cloudfront private distribution 개요
Cloudfront private distribution 개요Cloudfront private distribution 개요
Cloudfront private distribution 개요
명철 강
 
Class loader basic
Class loader basicClass loader basic
Class loader basic
명철 강
 
The Outcome Economy
The Outcome EconomyThe Outcome Economy
The Outcome Economy
Helge Tennø
 
Spring data ii
Spring data iiSpring data ii
Spring data ii
명철 강
 
Managementcontrol Brandweer Steller André Maranus.2
Managementcontrol Brandweer Steller André Maranus.2Managementcontrol Brandweer Steller André Maranus.2
Managementcontrol Brandweer Steller André Maranus.2
awmaranus
 
Cloudfront private distribution 개요
Cloudfront private distribution 개요Cloudfront private distribution 개요
Cloudfront private distribution 개요
명철 강
 
Class loader basic
Class loader basicClass loader basic
Class loader basic
명철 강
 
The Outcome Economy
The Outcome EconomyThe Outcome Economy
The Outcome Economy
Helge Tennø
 
Ad

Similar to Spring data iii (20)

Hd insight programming
Hd insight programmingHd insight programming
Hd insight programming
Casear Chu
 
Hadoop Integration in Cassandra
Hadoop Integration in CassandraHadoop Integration in Cassandra
Hadoop Integration in Cassandra
Jairam Chandar
 
How to develop Big Data Pipelines for Hadoop, by Costin Leau
How to develop Big Data Pipelines for Hadoop, by Costin LeauHow to develop Big Data Pipelines for Hadoop, by Costin Leau
How to develop Big Data Pipelines for Hadoop, by Costin Leau
Codemotion
 
Serverless Ballerina
Serverless BallerinaServerless Ballerina
Serverless Ballerina
Ballerina
 
Flask and Angular: An approach to build robust platforms
Flask and Angular:  An approach to build robust platformsFlask and Angular:  An approach to build robust platforms
Flask and Angular: An approach to build robust platforms
Ayush Sharma
 
Play!ng with scala
Play!ng with scalaPlay!ng with scala
Play!ng with scala
Siarzh Miadzvedzeu
 
Building Lithium Apps
Building Lithium AppsBuilding Lithium Apps
Building Lithium Apps
Nate Abele
 
Introduction to Nodejs
Introduction to NodejsIntroduction to Nodejs
Introduction to Nodejs
Gabriele Lana
 
Using and scaling Rack and Rack-based middleware
Using and scaling Rack and Rack-based middlewareUsing and scaling Rack and Rack-based middleware
Using and scaling Rack and Rack-based middleware
Alona Mekhovova
 
Writing robust Node.js applications
Writing robust Node.js applicationsWriting robust Node.js applications
Writing robust Node.js applications
Tom Croucher
 
NoSQL and JavaScript: a Love Story
NoSQL and JavaScript: a Love StoryNoSQL and JavaScript: a Love Story
NoSQL and JavaScript: a Love Story
Alexandre Morgaut
 
AngularJS - $http & $resource Services
AngularJS - $http & $resource ServicesAngularJS - $http & $resource Services
AngularJS - $http & $resource Services
Eyal Vardi
 
Serverless archtiectures
Serverless archtiecturesServerless archtiectures
Serverless archtiectures
Iegor Fadieiev
 
XQuery Rocks
XQuery RocksXQuery Rocks
XQuery Rocks
William Candillon
 
How and why i roll my own node.js framework
How and why i roll my own node.js frameworkHow and why i roll my own node.js framework
How and why i roll my own node.js framework
Ben Lin
 
HTML5: huh, what is it good for?
HTML5: huh, what is it good for?HTML5: huh, what is it good for?
HTML5: huh, what is it good for?
Remy Sharp
 
Amazon elastic map reduce
Amazon elastic map reduceAmazon elastic map reduce
Amazon elastic map reduce
Olga Lavrentieva
 
Server Side Swift: Vapor
Server Side Swift: VaporServer Side Swift: Vapor
Server Side Swift: Vapor
Paweł Kowalczuk
 
Scala based Lift Framework
Scala based Lift FrameworkScala based Lift Framework
Scala based Lift Framework
vhazrati
 
Overview Of Lift Framework
Overview Of Lift FrameworkOverview Of Lift Framework
Overview Of Lift Framework
Xebia IT Architects
 
Hd insight programming
Hd insight programmingHd insight programming
Hd insight programming
Casear Chu
 
Hadoop Integration in Cassandra
Hadoop Integration in CassandraHadoop Integration in Cassandra
Hadoop Integration in Cassandra
Jairam Chandar
 
How to develop Big Data Pipelines for Hadoop, by Costin Leau
How to develop Big Data Pipelines for Hadoop, by Costin LeauHow to develop Big Data Pipelines for Hadoop, by Costin Leau
How to develop Big Data Pipelines for Hadoop, by Costin Leau
Codemotion
 
Serverless Ballerina
Serverless BallerinaServerless Ballerina
Serverless Ballerina
Ballerina
 
Flask and Angular: An approach to build robust platforms
Flask and Angular:  An approach to build robust platformsFlask and Angular:  An approach to build robust platforms
Flask and Angular: An approach to build robust platforms
Ayush Sharma
 
Building Lithium Apps
Building Lithium AppsBuilding Lithium Apps
Building Lithium Apps
Nate Abele
 
Introduction to Nodejs
Introduction to NodejsIntroduction to Nodejs
Introduction to Nodejs
Gabriele Lana
 
Using and scaling Rack and Rack-based middleware
Using and scaling Rack and Rack-based middlewareUsing and scaling Rack and Rack-based middleware
Using and scaling Rack and Rack-based middleware
Alona Mekhovova
 
Writing robust Node.js applications
Writing robust Node.js applicationsWriting robust Node.js applications
Writing robust Node.js applications
Tom Croucher
 
NoSQL and JavaScript: a Love Story
NoSQL and JavaScript: a Love StoryNoSQL and JavaScript: a Love Story
NoSQL and JavaScript: a Love Story
Alexandre Morgaut
 
AngularJS - $http & $resource Services
AngularJS - $http & $resource ServicesAngularJS - $http & $resource Services
AngularJS - $http & $resource Services
Eyal Vardi
 
Serverless archtiectures
Serverless archtiecturesServerless archtiectures
Serverless archtiectures
Iegor Fadieiev
 
How and why i roll my own node.js framework
How and why i roll my own node.js frameworkHow and why i roll my own node.js framework
How and why i roll my own node.js framework
Ben Lin
 
HTML5: huh, what is it good for?
HTML5: huh, what is it good for?HTML5: huh, what is it good for?
HTML5: huh, what is it good for?
Remy Sharp
 
Scala based Lift Framework
Scala based Lift FrameworkScala based Lift Framework
Scala based Lift Framework
vhazrati
 
Ad

Recently uploaded (20)

tecnologias de las primeras civilizaciones.pdf
tecnologias de las primeras civilizaciones.pdftecnologias de las primeras civilizaciones.pdf
tecnologias de las primeras civilizaciones.pdf
fjgm517
 
Complete Guide to Advanced Logistics Management Software in Riyadh.pdf
Complete Guide to Advanced Logistics Management Software in Riyadh.pdfComplete Guide to Advanced Logistics Management Software in Riyadh.pdf
Complete Guide to Advanced Logistics Management Software in Riyadh.pdf
Software Company
 
Procurement Insights Cost To Value Guide.pptx
Procurement Insights Cost To Value Guide.pptxProcurement Insights Cost To Value Guide.pptx
Procurement Insights Cost To Value Guide.pptx
Jon Hansen
 
Special Meetup Edition - TDX Bengaluru Meetup #52.pptx
Special Meetup Edition - TDX Bengaluru Meetup #52.pptxSpecial Meetup Edition - TDX Bengaluru Meetup #52.pptx
Special Meetup Edition - TDX Bengaluru Meetup #52.pptx
shyamraj55
 
AI and Data Privacy in 2025: Global Trends
AI and Data Privacy in 2025: Global TrendsAI and Data Privacy in 2025: Global Trends
AI and Data Privacy in 2025: Global Trends
InData Labs
 
Technology Trends in 2025: AI and Big Data Analytics
Technology Trends in 2025: AI and Big Data AnalyticsTechnology Trends in 2025: AI and Big Data Analytics
Technology Trends in 2025: AI and Big Data Analytics
InData Labs
 
Mobile App Development Company in Saudi Arabia
Mobile App Development Company in Saudi ArabiaMobile App Development Company in Saudi Arabia
Mobile App Development Company in Saudi Arabia
Steve Jonas
 
Linux Support for SMARC: How Toradex Empowers Embedded Developers
Linux Support for SMARC: How Toradex Empowers Embedded DevelopersLinux Support for SMARC: How Toradex Empowers Embedded Developers
Linux Support for SMARC: How Toradex Empowers Embedded Developers
Toradex
 
DevOpsDays Atlanta 2025 - Building 10x Development Organizations.pptx
DevOpsDays Atlanta 2025 - Building 10x Development Organizations.pptxDevOpsDays Atlanta 2025 - Building 10x Development Organizations.pptx
DevOpsDays Atlanta 2025 - Building 10x Development Organizations.pptx
Justin Reock
 
TrsLabs - Fintech Product & Business Consulting
TrsLabs - Fintech Product & Business ConsultingTrsLabs - Fintech Product & Business Consulting
TrsLabs - Fintech Product & Business Consulting
Trs Labs
 
UiPath Community Berlin: Orchestrator API, Swagger, and Test Manager API
UiPath Community Berlin: Orchestrator API, Swagger, and Test Manager APIUiPath Community Berlin: Orchestrator API, Swagger, and Test Manager API
UiPath Community Berlin: Orchestrator API, Swagger, and Test Manager API
UiPathCommunity
 
Increasing Retail Store Efficiency How can Planograms Save Time and Money.pptx
Increasing Retail Store Efficiency How can Planograms Save Time and Money.pptxIncreasing Retail Store Efficiency How can Planograms Save Time and Money.pptx
Increasing Retail Store Efficiency How can Planograms Save Time and Money.pptx
Anoop Ashok
 
Semantic Cultivators : The Critical Future Role to Enable AI
Semantic Cultivators : The Critical Future Role to Enable AISemantic Cultivators : The Critical Future Role to Enable AI
Semantic Cultivators : The Critical Future Role to Enable AI
artmondano
 
#StandardsGoals for 2025: Standards & certification roundup - Tech Forum 2025
#StandardsGoals for 2025: Standards & certification roundup - Tech Forum 2025#StandardsGoals for 2025: Standards & certification roundup - Tech Forum 2025
#StandardsGoals for 2025: Standards & certification roundup - Tech Forum 2025
BookNet Canada
 
Quantum Computing Quick Research Guide by Arthur Morgan
Quantum Computing Quick Research Guide by Arthur MorganQuantum Computing Quick Research Guide by Arthur Morgan
Quantum Computing Quick Research Guide by Arthur Morgan
Arthur Morgan
 
How analogue intelligence complements AI
How analogue intelligence complements AIHow analogue intelligence complements AI
How analogue intelligence complements AI
Paul Rowe
 
Greenhouse_Monitoring_Presentation.pptx.
Greenhouse_Monitoring_Presentation.pptx.Greenhouse_Monitoring_Presentation.pptx.
Greenhouse_Monitoring_Presentation.pptx.
hpbmnnxrvb
 
Into The Box Conference Keynote Day 1 (ITB2025)
Into The Box Conference Keynote Day 1 (ITB2025)Into The Box Conference Keynote Day 1 (ITB2025)
Into The Box Conference Keynote Day 1 (ITB2025)
Ortus Solutions, Corp
 
Role of Data Annotation Services in AI-Powered Manufacturing
Role of Data Annotation Services in AI-Powered ManufacturingRole of Data Annotation Services in AI-Powered Manufacturing
Role of Data Annotation Services in AI-Powered Manufacturing
Andrew Leo
 
2025-05-Q4-2024-Investor-Presentation.pptx
2025-05-Q4-2024-Investor-Presentation.pptx2025-05-Q4-2024-Investor-Presentation.pptx
2025-05-Q4-2024-Investor-Presentation.pptx
Samuele Fogagnolo
 
tecnologias de las primeras civilizaciones.pdf
tecnologias de las primeras civilizaciones.pdftecnologias de las primeras civilizaciones.pdf
tecnologias de las primeras civilizaciones.pdf
fjgm517
 
Complete Guide to Advanced Logistics Management Software in Riyadh.pdf
Complete Guide to Advanced Logistics Management Software in Riyadh.pdfComplete Guide to Advanced Logistics Management Software in Riyadh.pdf
Complete Guide to Advanced Logistics Management Software in Riyadh.pdf
Software Company
 
Procurement Insights Cost To Value Guide.pptx
Procurement Insights Cost To Value Guide.pptxProcurement Insights Cost To Value Guide.pptx
Procurement Insights Cost To Value Guide.pptx
Jon Hansen
 
Special Meetup Edition - TDX Bengaluru Meetup #52.pptx
Special Meetup Edition - TDX Bengaluru Meetup #52.pptxSpecial Meetup Edition - TDX Bengaluru Meetup #52.pptx
Special Meetup Edition - TDX Bengaluru Meetup #52.pptx
shyamraj55
 
AI and Data Privacy in 2025: Global Trends
AI and Data Privacy in 2025: Global TrendsAI and Data Privacy in 2025: Global Trends
AI and Data Privacy in 2025: Global Trends
InData Labs
 
Technology Trends in 2025: AI and Big Data Analytics
Technology Trends in 2025: AI and Big Data AnalyticsTechnology Trends in 2025: AI and Big Data Analytics
Technology Trends in 2025: AI and Big Data Analytics
InData Labs
 
Mobile App Development Company in Saudi Arabia
Mobile App Development Company in Saudi ArabiaMobile App Development Company in Saudi Arabia
Mobile App Development Company in Saudi Arabia
Steve Jonas
 
Linux Support for SMARC: How Toradex Empowers Embedded Developers
Linux Support for SMARC: How Toradex Empowers Embedded DevelopersLinux Support for SMARC: How Toradex Empowers Embedded Developers
Linux Support for SMARC: How Toradex Empowers Embedded Developers
Toradex
 
DevOpsDays Atlanta 2025 - Building 10x Development Organizations.pptx
DevOpsDays Atlanta 2025 - Building 10x Development Organizations.pptxDevOpsDays Atlanta 2025 - Building 10x Development Organizations.pptx
DevOpsDays Atlanta 2025 - Building 10x Development Organizations.pptx
Justin Reock
 
TrsLabs - Fintech Product & Business Consulting
TrsLabs - Fintech Product & Business ConsultingTrsLabs - Fintech Product & Business Consulting
TrsLabs - Fintech Product & Business Consulting
Trs Labs
 
UiPath Community Berlin: Orchestrator API, Swagger, and Test Manager API
UiPath Community Berlin: Orchestrator API, Swagger, and Test Manager APIUiPath Community Berlin: Orchestrator API, Swagger, and Test Manager API
UiPath Community Berlin: Orchestrator API, Swagger, and Test Manager API
UiPathCommunity
 
Increasing Retail Store Efficiency How can Planograms Save Time and Money.pptx
Increasing Retail Store Efficiency How can Planograms Save Time and Money.pptxIncreasing Retail Store Efficiency How can Planograms Save Time and Money.pptx
Increasing Retail Store Efficiency How can Planograms Save Time and Money.pptx
Anoop Ashok
 
Semantic Cultivators : The Critical Future Role to Enable AI
Semantic Cultivators : The Critical Future Role to Enable AISemantic Cultivators : The Critical Future Role to Enable AI
Semantic Cultivators : The Critical Future Role to Enable AI
artmondano
 
#StandardsGoals for 2025: Standards & certification roundup - Tech Forum 2025
#StandardsGoals for 2025: Standards & certification roundup - Tech Forum 2025#StandardsGoals for 2025: Standards & certification roundup - Tech Forum 2025
#StandardsGoals for 2025: Standards & certification roundup - Tech Forum 2025
BookNet Canada
 
Quantum Computing Quick Research Guide by Arthur Morgan
Quantum Computing Quick Research Guide by Arthur MorganQuantum Computing Quick Research Guide by Arthur Morgan
Quantum Computing Quick Research Guide by Arthur Morgan
Arthur Morgan
 
How analogue intelligence complements AI
How analogue intelligence complements AIHow analogue intelligence complements AI
How analogue intelligence complements AI
Paul Rowe
 
Greenhouse_Monitoring_Presentation.pptx.
Greenhouse_Monitoring_Presentation.pptx.Greenhouse_Monitoring_Presentation.pptx.
Greenhouse_Monitoring_Presentation.pptx.
hpbmnnxrvb
 
Into The Box Conference Keynote Day 1 (ITB2025)
Into The Box Conference Keynote Day 1 (ITB2025)Into The Box Conference Keynote Day 1 (ITB2025)
Into The Box Conference Keynote Day 1 (ITB2025)
Ortus Solutions, Corp
 
Role of Data Annotation Services in AI-Powered Manufacturing
Role of Data Annotation Services in AI-Powered ManufacturingRole of Data Annotation Services in AI-Powered Manufacturing
Role of Data Annotation Services in AI-Powered Manufacturing
Andrew Leo
 
2025-05-Q4-2024-Investor-Presentation.pptx
2025-05-Q4-2024-Investor-Presentation.pptx2025-05-Q4-2024-Investor-Presentation.pptx
2025-05-Q4-2024-Investor-Presentation.pptx
Samuele Fogagnolo
 

Spring data iii

  • 3. Hello World Using Spring for Apache Hadoop Declaring a Hadoop job using Spring’s Hadoop namespace <configuration> fs.default.name=hdfs://localhost:9000 </configuration> <job id="wordcountJob" input-path="/user/gutenberg/input" output-path="/user/gutenberg/output" mapper="org.apache.hadoop.examples.WordCount.TokenizerMapper" reducer="org.apache.hadoop.examples.WordCount.IntSumReducer"/> <job-runner id="runner" job="wordcountJob" run-at-startup="true"/> This configuration will create a singleton instance of an org.apache.hadoop.mapreduce.Job managed by the Spring container. Spring can determine that outputKeyClass is of the type org.apache.hadoop.io.Text and that outputValueClass is of type org.apache.hadoop.io.IntWritable, so we do not need to set these properties explicitly. public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } }
  • 4. Hello World Using Spring for Apache Hadoop public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public class Main { private static final String[] CONFIGS = new String[] {"META-INF/spring/hadoop- context.xml" }; public static void main(String[] args) { String[] res = (args != null && args.length > 0 ? args : CONFIGS); AbstractApplicationContext ctx = new ClassPathXmlApplicationContext(res); // shut down the context cleanly along with the VM ctx.registerShutdownHook(); } }
  • 5. Externalize the configuration parameters of the application <context:property-placeholder location="hadoop-default.properties"/> <configuration> fs.default.name=${hd.fs} </configuration> <job id="wordcountJob" input-path="${wordcount.input.path}" output-path="${wordcount.output.path}" mapper="org.apache.hadoop.examples.WordCount.TokenizerMapper" reducer="org.apache.hadoop.examples.WordCount.IntSumReducer"/> <job-runner id="runner" job="wordcountJob" run-at-startup="true"/> hd.fs=hdfs://localhost:9000 wordcount.input.path=/user/gutenberg/input/ wordcount.output.path=/user/gutenberg/output/ <context:property-placeholder location="hadoop-${ENV:default}.properties"/>
  • 6. Scripting HDFS on the JVM – Type1 <context:property-placeholder location="hadoop.properties"/> <configuration> fs.default.name=${hd.fs} </configuration> <script id="setupScript" location="copy-files.groovy"> <property name="localSourceFile" value="${localSourceFile}"/> <property name=“hdfsInputDir" value="${hdfsInputDir}"/> <property name=“hdfsOutputDir" value="${hdfsOutputDir}"/> </script>  Groovy Script if (!fsh.test(hdfsInputDir)) { fsh.mkdir(hdfsInputDir); fsh.copyFromLocal(localSourceFile, hdfsInputDir); fsh.chmod(700, hdfsInputDir) } if (fsh.test(hdfsOutputDir)) { fsh.rmr(hdfsOutputDir) }
  • 7. Combining HDFS Scripting and Job Submission <context:property-placeholder location="hadoop.properties"/> <configuration> fs.default.name=${hd.fs} </configuration> <job id="wordcountJob" input-path="${wordcount.input.path}" output-path="${wordcount.output.path}" mapper="org.apache.hadoop.examples.WordCount.TokenizerMapper" reducer="org.apache.hadoop.examples.WordCount.IntSumReducer"/> <script id="setupScript" location="copy-files.groovy"> <property name="localSourceFile" value="${localSourceFile}"/> <property name="inputDir" value="${wordcount.input.path}"/> <property name="outputDir" value="${wordcount.output.path}"/> </script> <job-runner id="runner" run-at-startup="true" pre-action="setupScript" job="wordcountJob"/>
  • 8. Configuring the JobRunner to execute multiple HDFS scripts and jobs <job-runner id="runner" pre-action="setupScript1,setupScript" job="wordcountJob1,wordcountJob2" post-action="cleanupScript1,cleanupScript2"/>
  • 9. Scheduling MapReduce Jobs with a TaskScheduler <!-- job definition as before --> <job id="wordcountJob" ... /> <!-- script definition as before --> <script id="setupScript" ... /> <job-runner id="runner" pre-action="setupScript" job="wordcountJob"/> <task:scheduled-tasks> <task:scheduled ref="runner" method="call" cron="3/30 * * * * ?"/> </task:scheduled-tasks>  Scheduling MapReduce Jobs with Quartz <bean id="jobDetail" class="org.springframework.scheduling.quartz.MethodInvokingJobDetailFactoryBean"> <property name="targetObject" ref="runner"/> <property name="targetMethod" value="run"/> </bean> <bean id="cronTrigger" class="org.springframework.scheduling.quartz.CronTriggerBean"> <property name="jobDetail" ref="jobDetail"/> <property name="cronExpression" value="3/30 * * * * ?"/> </bean> <bean class="org.springframework.scheduling.quartz.SchedulerFactoryBean"> <property name="triggers" ref="cronTrigger"/> </bean>
  • 11. Creating and configuring a Hive server <context:property-placeholder location="hadoop.properties,hive.properties" /> <configuration id="hadoopConfiguration"> fs.default.name=${hd.fs} mapred.job.tracker=${mapred.job.tracker} </configuration> <hive-server port="${hive.port}" auto-startup="false" configuration-ref="hadoopConfiguration" properties-location="hive-server.properties"> hive.exec.scratchdir=/tmp/hive/ </hive-server>  Hive Thrift Client since the HiveClient is not a thread-safe class, so a new instance needs to be created inside methods that are shared across multiple threads. <hive-client-factory host="${hive.host}" port="${hive.port}"/>
  • 12. Hive Thrift Client Sample @Repository public class HivePasswordRepository implements PasswordRepository { private static final Log logger = LogFactory.getLog(HivePasswordRepository.class); private HiveClientFactory hiveClientFactory; private String tableName; // constructor and setters omitted @Override public Long count() { HiveClient hiveClient = hiveClientFactory.getHiveClient(); try { hiveClient.execute("select count(*) from " + tableName); return Long.parseLong(hiveClient.fetchOne()); // checked exceptions } catch (HiveServerException ex) { throw translateExcpetion(ex); } catch (org.apache.thrift.TException tex) { throw translateExcpetion(tex); } finally { try { hiveClient.shutdown(); } catch (org.apache.thrift.TException tex) { logger.debug("Unexpected exception on shutting down HiveClient", tex); } } } …
  • 13. Hive JDBC Client The JDBC support for Hive lets you use your existing Spring knowledge of JdbcTemplate to interact with Hive. Hive provides a HiveDriver class. <bean id="hiveDriver" class="org.apache.hadoop.hive.jdbc.HiveDriver" /> <bean id="dataSource" class="org.springframework.jdbc.datasource.SimpleDriverDataSource"> <constructor-arg name="driver" ref="hiveDriver" /> <constructor-arg name="url" value="${hive.url}"/> </bean> <bean id="jdbcTemplate" class="org.springframework.jdbc.core.simple.JdbcTemplate"> <constructor-arg ref="dataSource" /> </bean>  Hive JDBC Client Sample @Repository public class JdbcPasswordRepository implements PasswordRepository { private JdbcOperations jdbcOperations; private String tableName; // constructor and setters omitted @Override public Long count() { return jdbcOperations.queryForLong("select count(*) from " + tableName); } …
  • 14. Hive Script Runner <context:property-placeholder location="hadoop.properties,hive.properties"/> <configuration> fs.default.name=${hd.fs} mapred.job.tracker=${mapred.job.tracker} </configuration> <hive-server port="${hive.port}" properties-location="hive-server.properties"/> <hive-client-factory host="${hive.host}" port="${hive.port}"/> <hive-runner id="hiveRunner" run-at-startup="false" > <script location="apache-log-simple.hql"> <arguments> hiveContribJar=${hiveContribJar} localInPath="./data/apache.log" </arguments> </script> </hive-runner>
  • 16. Creating and configuring a Pig server <context:property-placeholder location="hadoop.properties" /> <configuration> fs.default.name=${hd.fs} mapred.job.tracker=${mapred.job.tracker} </configuration> <pig-factory properties-location="pig-server.properties" <script location="initialization.pig"> <arguments> inputFile=${initInputFile} </arguments> </script> <pig-factory/>  Pig Runner PigRunner helper class to provide a convenient way to repeatedly execute Pig jobs and also execute HDFS scripts before and after their execution. <pig-runner id="pigRunner" pre-action="hdfsScript" run-at-startup="true" > <script location="password-analysis.pig"> <arguments> inputDir=${inputDir} outputDir=${outputDir} </arguments> </script> </pig-runner>
  • 17. Pig Template <context:property-placeholder location="hadoop.properties" /> <configuration> fs.default.name=${hd.fs} mapred.job.tracker=${mapred.job.tracker} </configuration> <pig-factory properties-location="pig-server.properties" <script location="initialization.pig"> <arguments> inputFile=${initInputFile} </arguments> </script> <pig-factory/>  Pig Runner PigRunner helper class to provide a convenient way to repeatedly execute Pig jobs and also execute HDFS scripts before and after their execution. <pig-runner id="pigRunner" pre-action="hdfsScript" run-at-startup="true" > <script location="password-analysis.pig"> <arguments> inputDir=${inputDir} outputDir=${outputDir} </arguments> </script> </pig-runner>
  • 18. Pig Runner Example @Component public class AnalysisService { private PigRunner pigRunner; @Autowired public AnalysisService(PigRunner pigRunner) this.pigRunner = pigRunner; } @Async public void performAnalysis() { pigRunner.call(); } }
  • 19. Controlling Runtime Script Execution To have more runtime control over what Pig scripts are executed and the arguments passed into them, we can use the PigTemplate class. <pig-factory properties-location="pig-server.properties"/> <pig-template/> <beans:bean id="passwordRepository“ class="com.oreilly.springdata.hadoop.pig.PigPasswordRepository"> <beans:constructor-arg ref="pigTemplate"/> </beans:bean> public class PigPasswordRepository implements PasswordRepository { private PigOperations pigOperations; private String pigScript = "classpath:password-analysis.pig"; public void processPasswordFile(String inputFile) { Assert.notNull(inputFile); String outputDir = PathUtils.format("/data/password-repo/output/%1$tY/%1$tm/%1$td/%1$tH/%1$tM/ %1$tS"); Properties scriptParameters = new Properties(); scriptParameters.put("inputDir", inputFile); scriptParameters.put("outputDir", outputDir); pigOperations.executeScript(pigScript, scriptParameters); } @Override public void processPasswordFiles(Collection<String> inputFiles) { for (String inputFile : inputFiles) { processPasswordFile(inputFile); } } }
  • 21. HBASE Java Client w/o Spring Data The HTable class is the main way in Java to interact with HBase. It allows you to put data into a table using a Put class, get data by key using a Get class, and delete data using a Delete class. You query that data using a Scan class, which lets you specify key ranges as well as filter criteria. Configuration configuration = new Configuration(); // Hadoop configuration object HTable table = new HTable(configuration, "users"); Put p = new Put(Bytes.toBytes("user1")); p.add(Bytes.toBytes("cfInfo"), Bytes.toBytes("qUser"), Bytes.toBytes("user1")); p.add(Bytes.toBytes("cfInfo"), Bytes.toBytes("qEmail"), Bytes.toBytes("[email protected]")); p.add(Bytes.toBytes("cfInfo"), Bytes.toBytes("qPassword"), Bytes.toBytes("user1pwd")); table.put(p);
  • 22. HBASE Client w/ Spring Data HBaseTemplate The HBase API requires that you work with the data as byte arrays and not other primitive types. The HTable class is also not thread safe, and requires you to carefully manage the underlying resources it uses and catch HBase-specific exceptions. Spring’s HBaseTemplate class provides a higher-level abstraction for interacting with HBase. As with other Spring template classes, it is thread-safe once created and provides exception translation into Spring’s portable data access exception hierarchy. <configuration> fs.default.name=hdfs://localhost:9000 </configuration> <hbase-configuration configuration-ref="hadoopConfiguration" /> <beans:bean id="hbaseTemplate" class="org.springframework.data.hadoop.hbase.HbaseTemplate"> <beans:property name="configuration" ref="hbaseConfiguration" /> </beans:bean>
  • 23. HBASE Client w/ Spring Data HBaseTemplate