Source code for feed

"""
Feeder module

Takes extracted features from audio and feed it to the elasticsearch apis,
which eventually creates and stores index of all extracted features.
"""

import os
import json
from elasticsearch import Elasticsearch
from util.helper import convertToFloat


[docs]class Feeder:
    """
    Feeder class that provides helper functions to feed extracted
    features to the elasticsearch inorder to create indexes.
    """
    def __init__(self):
        """
        Initialize Feeder object with elastic search client

        uses localhost, port 9200 as default
        """
        self.es = Elasticsearch()

[docs]    def readContent(self, jsonFile):
        """
        Converts json data to dictionary format

        Input:
            - jsonFile: a json file containing extracted features of an audio

        output:
            - content: features stored in dictionary format
        """
        with open(jsonFile, 'r') as jf:
            content = json.load(jf)
            if 'tonal' in content:
                if 'thpcp' in content['tonal']:
                    content['tonal']['thpcp'] = convertToFloat(
                        content['tonal']['thpcp'])
                if 'chords_histogram' in content['tonal']:
                    content['tonal']['chords_histogram'] = convertToFloat(
                        content['tonal']['chords_histogram'])
                if 'hpcp' in content['tonal']:
                    content['tonal']['hpcp']['max'] = convertToFloat(
                        content['tonal']['hpcp']['max'])
            return content

[docs]    def feed(self, jsonFile, cont_id, iname="audio_index", doc_type="audio"):
        """
        feed extracted features of an audio to elasticsearch for index creation

        Input:
            - jsonFile: a json file of extracted features of an audio
            - cont_id : identity of the content
            - iname   : index name for that group of content
            - doc_type: document type to be stored

        output:
            - retVal: a boolean return value. true if data is indexed otherwise
                      false.
        """
        content = self.readContent(jsonFile)
        ret = self.es.index(index=iname, doc_type=doc_type,
                            id=cont_id, body=content)
        retVal = ret['created']
        return retVal

[docs]    def feedAll(self, jsonDir, iname="audio_index", doc_type="audio"):
        """
        feed extracted features of all audio from all the files present in
        jsonDir to elasticsearch for index creation

        Input:
            - jsonDir : directory path that contains all json files
            - iname   : index name for that group of content
            - doc_type: document type to be stored
        """
        basepath = jsonDir
        jsonFiles = os.listdir(jsonDir)

        for fname in jsonFiles:
            # content id is extracted from the file name
            # file name is of format: id.json
            cont_id = fname.split(".")[0]
            fpath = os.path.join(basepath, fname)
            self.feed(fpath, cont_id, iname, doc_type)