Source code for feed

"""
Feeder module

Takes extracted features from audio and feed it to the elasticsearch apis,
which eventually creates and stores index of all extracted features.
"""

import os
import json
from elasticsearch import Elasticsearch
from util.helper import convertToFloat


[docs]class Feeder: """ Feeder class that provides helper functions to feed extracted features to the elasticsearch inorder to create indexes. """ def __init__(self): """ Initialize Feeder object with elastic search client uses localhost, port 9200 as default """ self.es = Elasticsearch()
[docs] def readContent(self, jsonFile): """ Converts json data to dictionary format Input: - jsonFile: a json file containing extracted features of an audio output: - content: features stored in dictionary format """ with open(jsonFile, 'r') as jf: content = json.load(jf) if 'tonal' in content: if 'thpcp' in content['tonal']: content['tonal']['thpcp'] = convertToFloat( content['tonal']['thpcp']) if 'chords_histogram' in content['tonal']: content['tonal']['chords_histogram'] = convertToFloat( content['tonal']['chords_histogram']) if 'hpcp' in content['tonal']: content['tonal']['hpcp']['max'] = convertToFloat( content['tonal']['hpcp']['max']) return content
[docs] def feed(self, jsonFile, cont_id, iname="audio_index", doc_type="audio"): """ feed extracted features of an audio to elasticsearch for index creation Input: - jsonFile: a json file of extracted features of an audio - cont_id : identity of the content - iname : index name for that group of content - doc_type: document type to be stored output: - retVal: a boolean return value. true if data is indexed otherwise false. """ content = self.readContent(jsonFile) ret = self.es.index(index=iname, doc_type=doc_type, id=cont_id, body=content) retVal = ret['created'] return retVal
[docs] def feedAll(self, jsonDir, iname="audio_index", doc_type="audio"): """ feed extracted features of all audio from all the files present in jsonDir to elasticsearch for index creation Input: - jsonDir : directory path that contains all json files - iname : index name for that group of content - doc_type: document type to be stored """ basepath = jsonDir jsonFiles = os.listdir(jsonDir) for fname in jsonFiles: # content id is extracted from the file name # file name is of format: id.json cont_id = fname.split(".")[0] fpath = os.path.join(basepath, fname) self.feed(fpath, cont_id, iname, doc_type)