Posted on

using jq to parse json

Here’s a quickly written script to simply query my json file.
The json file was created from an elasticsearch index using estab from https://github.com/miku/estab to dump the contents to a file.

estab -indices=myindex -raw > /tmp/myindex.json

I thought I would try using jq to query the resulting large json file. I found it a bit awkward to get the syntax correct and decided to write a simple frontend to allow me or others to query the json file more easily. This was a first attempt ….
<!DOCTYPE html>
<html>
<HEAD>
<?php
// ##  Set default values and GET / POST values
 
$scriptName = $_SERVER['SCRIPT_NAME'];
$datafile = "/var/www/html/easisearch/ela1.json";
$jq = exec('which jq', $retval);
$jq = $jq . " -r ";
$grepcmd = " | " . exec('which egrep', $retval);
$outputfile = "/var/www/html/tmp/jqoutput.csv";
$weboutputfile = "/tmp/jqoutput.csv";
$statusfile = "/tmp/status.txt";
$displaysize = 1000;
$grepargs = $_REQUEST['grepargs'];
$jqargs = $_REQUEST['jqargs'];

// this builds the contents of the select drop-down
// and is based on the structure of the json file 
$field[".aetopiaMetadata |.aetopiaId"] = "aetopiaId";
$field[".aetopiaMetadata |.metadata | .createDate"] = "aetopia-createDate";
$field[".aetopiaMetadata |.metadata | .id"] = "aetopia-id";
$field[".aetopiaMetadata |.metadata | .lastModifiedDate"] = "aetopia-lastModifiedDate";
$field[".aetopiaMetadata |.metadata | .metadata | .DOC_TEXT"] = "aetopia-DOC_TEXT";
$field[".aetopiaMetadata |.metadata | .originalFileName"] = "aetopia-origFileName";
$field[".synopsis"] = "synopsis";
$field[".title"] = "title";
$field[".usageTrafficLight"] = "usageTrafficLight";
$field[".tvFormat"] = "tvFormat";
$field[".transmissionMonth"] = "transmissionMonth";
$field[".transmissionDay"] = "transmissionDay";
$field[".transmissionDate"] = "transmissionDate";
$field[".transcription | .version"] = "transcription version";
$field[".transcription | .updated"] = "transcription updated";
$field[".transcription | .text"] = "transcription text";
$field[".thumbnailUrl"] = "thumbnailUrl";
$field[".sync | .requiresSearchSync"] = "requiresSearchSync";
$field[".sync | .lastTopicsUpdate"] = "lastTopicsUpdate";
$field[".sync | .lastSourceSync"] = "lastSourceSync";
$field[".sync | .lastSearchSync"] = "lastSearchSync";
$field[".sync | .lastSaved"] = "lastSaved";
$field[".sync | .enrichmentStatus"] = "enrichmentStatus";
$field[".sourceSystem"] = "sourceSystem";
$field[".sourceId"] = "sourceId";
$field[".owner"] = "owner";
$field[".modifiedDate"] = "modifiedDate";
$field[".clipName"] = "clipName";
$field[".clipDuration"] = "clipDuration";
$field[".assetType"] = "assetType";
$field[".archiveTapeIdItemNumber"] = "archiveTapeIdItemNumber";
$field[".archiveTapeId"] = "archiveTapeId";
$field[".createdDate"] = "createdDate";
$field[".deleted"] = "deleted";
$field[".deletedAtSource"] = "deletedAtSource";
$field[".files | .web | .url"] = "web url";
$field[".files | .web | .fileName"] = "web fileName";
$field[".files | .orig | .fileName"] = "orig fileName";
$field[".files | .orig | .url"] = "orig url";
$field[".id"] = "id";
$field[".lastModifiedBy"] = "lastModifiedBy";
 
// sort the array by value
asort($field);
// kill the job if Kill Job button is clicked 
if (isset($_REQUEST['killjob']))
   {
        exec("pkill jq");
   }
 // The json file can take a long time to parse
// so provide some way of indicating progress
if (isset($_REQUEST['chkprogress']))
     {
        $chkprogress = 1;
    }
 
print "easiSearch</HEAD><body>";
?>
<br/>
<form method="GET" action="<?php
echo htmlspecialchars($_SERVER['PHP_SELF']); ?>">
<table>
<tr><td valign="top" align="center">(SEARCH THIS)</td><td valign="top" align="center">  (FOR) </td><td align="center"  valign="top"> (OUTPUT THESE) </td> </tr>
<tr><td align="center"  valign="top">
<select  size=1 name="jqargsarray[]">
<?php
// build the dropdown list for which fields to search from
foreach($field as $x => $x_value)
        {
        print "<option value=\"$x\">$x_value</option>";
        }
 
?>
<option value="">All</option>
</select>
</td><td align="center"  valign="top">
   <input type="text" id="grepargs" name="grepargs"  value="<?php
echo $grepargs; ?>">
</td><td  valign="top" align="center">
<select multiple="multiple"  size=8 name="outputfieldsarray[]">
<?php
 // build the dropdown for which fields to output
foreach($field as $x => $x_value)
        {
        print "<option value=\"$x\">$x_value</option>";
        }
 
?>
  <option value="">All</option>
</select>
</td align="center"></tr>
</table><br/>
  <input type="submit" value="Search"  name="Search">
</form>
<?php
$jqargsarray = $_REQUEST['jqargsarray'];
// build the syntax for the jq command based on
// which fields have been selected
// if 'all' was selected, then just use grep
if ($jqargsarray[0] == "")
        {
        foreach($field as $x => $x_value)
                {
                $i++;
                if ($x != "")
                        {
                        $jqargs.= "($x)";
                        }
 
                if ($i < (count($field)))
                        {
                        $jqargs.= ",";
                        }
                }
 
        $jqargs = "'._source | [$jqargs] | @csv'";
        $pipetogrep = 1;
        }
  else
        {
        for ($i = 0; $i < count($jqargsarray); $i++)
                {
                $jqselect.= "select($jqargsarray[$i]|tostring|test(\"$grepargs\";\"i\")) |";
                $jqargs = "'._source | $jqselect ";
                }
        }
 
if ($grepargs == "")
        {
        $grepcmd = "";
        }
 
$outputfieldsarray = $_REQUEST['outputfieldsarray'];
 
if ($outputfieldsarray[0] == "")
        {
        foreach($field as $x => $x_value)
                {
                $outputfields.= "($x), ";
                }
 
        $outputfields = substr($outputfields, 0, -2);
        $outputfields = "[ $outputfields ]";
        $perlfilter = "";
        }
  else
        {
        if ($pipetogrep)
                {
 
                // need to parse out the fields using perl
  // if only some fields were selected
// this handles the condition where you are
// searching all fields for a value but only
// want to output selected fields
                $perlfilter = "| perl -MText::CSV -le '\$csv = Text::CSV->new({binary=>1}); while (\$row = \$csv->getline(STDIN)){print \"";
                foreach($outputfieldsarray as $sel)
                        {
                        $i = 0;
                        foreach($field as $x => $x_value)
                                {
                                if ($sel == $x)
                                        {
                                        $perlfilter.= "\$row->[$i],";
                                        break;
                                        }
 
                                $i++;
                                }
                        }
 
                $perlfilter.= "\"}'";
                }
          else
                {
                for ($i = 0; $i < count($outputfieldsarray); $i++)
                        {
                        $outputfields.= "($outputfieldsarray[$i])";
                        if ($i < (count($outputfieldsarray) - 1))
                                {
                                $outputfields.= ",";
                                }
                        }
 
                $outputfields = "[ $outputfields ]";
                }
        }
 
if (isset($_REQUEST['Search']))
        {
// kill any unfinished query if a new search is started
        exec("pkill jq");
        if ($pipetogrep)
                {
                $cmd = "cat $datafile | $jq $jqargs $grepcmd $grepargs $perlfilter";
                }
          else
                {
                $cmd = "cat $datafile | $jq $jqargs $outputfields | @csv'";
                }
        }
 
if ($chkprogress == 1)
        {
        getStatus();
        }
 
if (isset($_REQUEST['Search']))
        {
        exec("pkill jq");
        if ($pipetogrep)
                {
                $cmd = "cat $datafile | $jq $jqargs $grepcmd $grepargs $perlfilter";
                }
          else
                {
                $cmd = "cat $datafile | $jq $jqargs $outputfields | @csv'";
                }
 
        $handle = fopen("$statusfile", "w") or die("Unable to open file!");
        fwrite($handle, "$jqargs\n");
        fwrite($handle, "$cmd\n");
        fclose($handle);
        }
 
if ((isset($_REQUEST['Search'])) or ($chkprogress == 1))
        {
        print "<br/>executing:<br/> $cmd <br/><br/>\n";
        print "redirecting to: <a href=$weboutputfile> $outputfile</a><br/><br/>";
        $cmd.= " > $outputfile ";
        if (isset($_REQUEST['Search']))
                {
                $last_line = shell_exec("$cmd &");
                }
 
        print "<form method=\"POST\" action=\"";
        print htmlspecialchars($_SERVER['PHP_SELF']) . '">';
        print "<input type=\"submit\" value=\"Check progress\"  name=\"chkprogress\">";
        print "<input type=\"submit\" value=\"Kill job\"  name=\"killjob\">";
        print "</form>";
        }
 
if ($chkprogress)
        {
        showfile($outputfile);
        }
  else
        {
        if (!(isset($_REQUEST['Search'])))
                {
 
                // keep link to last job
                 getStatus();
                print "<br/>last executed:<br/> $cmd <br/><br/>\n";
                $filesize = filesize($outputfile);
                print "with $filesize bytes output to: <a href=$weboutputfile> $outputfile</a><br/><br/>";
                }
        }
 
// end of main program
 
function test_input($data)
        {
        $data = trim($data);
        $data = stripslashes($data);
        $data = htmlspecialchars($data);
        return $data;
        }
 
function showfile($filename)
        {
        exec("pgrep jq", $output, $return);
        if ($return == 0)
                {
                print "<br/>Still running!\n<br/><br/>";
                }
          else
                {
                print "<br/>No jq process running - assumed complete!\n<br/><br/>";
                }
 
        $filesize = filesize($filename);
        print "$filename file size: $filesize \n <br/>";
        $preview = 0;
        if ($preview)
                {
                if (file_exists($filename))
                        {
                        global $displaysize;
                        $displayed = 0;
                        $handle = fopen($filename, "r") or die("Unable to open file!");
 
                        //    {
 
                        if ($filesize > $displaysize)
                                {
                                print "<br/>Showing first $displaysize characters of $filesize\n<br/> ";
                                }
 
                        // print "<a href=$filename>$filename</a><br/><br/>\n";
 
                        while (!feof($handle))
                                {
                                $line = fgets($handle);
                                $displayed = strlen($line) + $displayed;
                                if ($displayed < $displaysize)
                                        {
                                        print "$line<br />";
                                        }
                                  else
                                        {
                                        break;
                                        }
                                }
 
                        fclose($handle);
                        }
                }
        }
 
function getStatus()
        {
        global $statusfile;
        global $jqargs;
        global $cmd;
        if (file_exists($statusfile))
                {
                $handle = fopen("$statusfile", "r");
                $jqargs = fgets($handle);
                $cmd = fgets($handle);
                fclose($handle);
                }
          else
                {
                $status = 1;
                }
 
        return $status;
        }
?>
</body>
</html>