In this HackerRank Tag Content Extractor problem in the java programming language you have Given a string of text in a tag-based language, parse this text and retrieve the contents enclosed within sequences of well-organized tags meeting the following criterion:

  1. The name of the start and end tags must be the same. The HTML code <h1>Hello World</h2> is not valid, because the text starts with an h1 tag and ends with a non-matching h2 tag.
  2. Tags can be nested, but content between nested tags is considered not valid. For example, in <h1><a>contents</a>invalid</h1>, contents is valid but invalid is not valid.
  3. Tags can consist of any printable characters.


HackerRank Tag Content Extractor solution in java


HackerRank Tag content extractor problem solution in java programming.

import java.io.*;
import java.util.*;
import java.text.*;
import java.math.*;
import java.util.regex.*;

public class Solution{
    
   public static void main(String[] args){
      Scanner in = new Scanner(System.in);
      int t = Integer.parseInt(in.nextLine());
      
      while(t-->0){
          String line = in.nextLine();
         
          Matcher m = Pattern.compile("<(.+)>(([^<>]+))</\\1>").matcher(line);
          
          if (!m.find()) {
              System.out.println("None");
              continue;
          }
          
          m.reset();
          while (m.find()){
              System.out.println(m.group(2));
          }
       }
    }
 }


Second solution

import java.io.*;
import java.util.*;
import java.text.*;
import java.math.*;
import java.util.regex.*;

public class Solution{
   public static void main(String[] args){
       
      String regex = "<([^<>]+)>([^<>]+)</\\1>";
      Pattern pattern = Pattern.compile(regex);
      
      Scanner in = new Scanner(System.in);
      int testCases = Integer.parseInt(in.nextLine());
      while(testCases>0){
         String line = in.nextLine();
         Matcher matcher = pattern.matcher(line);
         int counter = 0;
         while (matcher.find()) {
             System.out.println(matcher.group(2));
             counter++;
         }
         if (counter == 0) System.out.println("None");
         testCases--;
      }
   }
}


A solution in java8 programming.

import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* Solution assumes we can't have the symbol "<" as text between tags */
public class Solution{
    public static void main(String[] args){
        Scanner scan = new Scanner(System.in);
        int testCases = Integer.parseInt(scan.nextLine());
        
        while (testCases-- > 0) {
            String line = scan.nextLine();
            
            boolean matchFound = false;
            Pattern r = Pattern.compile("<(.+)>([^<]+)</\\1>");
            Matcher m = r.matcher(line);

            while (m.find()) {
                System.out.println(m.group(2));
                matchFound = true;
            }
            if ( ! matchFound) {
                System.out.println("None");
            }
        }
    }
}